feat(govdoc): 新增内部公文模块全链路（后端58+前端11文件）

2026-05-13 14:37:12 +08:00
parent 99699e20e1
commit 5d777599bf
63 changed files with 7608 additions and 0 deletions
@@ -0,0 +1,24 @@
+"""Check 原语注册中心：通过 register 装饰器收集，runner 通过 get 查找。"""
+
+from __future__ import annotations
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase
+
+_REGISTRY: dict[str, type[CheckBase]] = {}
+
+
+def register(name: str):
+    def deco(cls):
+        cls.name = name
+        _REGISTRY[name] = cls
+        return cls
+    return deco
+
+
+def get_check(name: str) -> type[CheckBase]:
+    if name not in _REGISTRY:
+        raise KeyError(f"unknown check: {name}; known: {list(_REGISTRY)}")
+    return _REGISTRY[name]
+
+
+def all_checks() -> list[str]:
+    return list(_REGISTRY.keys())
@@ -0,0 +1,151 @@
+"""LLM 语义检查。三级输出：pass / warn / fail。"""
+
+import logging
+import re
+from typing import Any
+from pydantic import BaseModel
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+from fastapi_modules.fastapi_leaudit.govdoc_engine.llm.client import LlmClient, LlmJsonError, _format_exc
+
+_log = logging.getLogger(__name__)
+
+
+_OUT_FORMAT = """
+请以 JSON 输出：
+{"result": "pass|warn|fail", "reason": "<简短理由>", "suggestion": "<改进建议；pass 时填空>"}
+"""
+
+_VAR_RE = re.compile(r"\{\{\s*([^}]+?)\s*\}\}")
+
+
+def _resolve_dot_path(root: Any, path: str) -> str:
+    """点语法属性访问：title.style.font_eastasia → entities['title'].style.font_eastasia。"""
+    cur: Any = root
+    for seg in path.split("."):
+        if cur is None:
+            return ""
+        if isinstance(cur, dict):
+            cur = cur.get(seg)
+        elif isinstance(cur, BaseModel):
+            cur = getattr(cur, seg, None)
+        else:
+            cur = getattr(cur, seg, None)
+    if cur is None:
+        return ""
+    if isinstance(cur, (dict, list)):
+        return str(cur)
+    return str(cur)
+
+
+def _interpolate(
+    template: str,
+    paragraphs: list,
+    entities: dict | None = None,
+    target: Any = None,
+) -> str:
+    """渲染顺序：① paragraphs[N] ② entities 点语法
+    ③ target 隐式（无前缀时视为 target.<key>）。"""
+    entities = entities or {}
+
+    def repl(m):
+        key = m.group(1).strip()
+        # ① paragraphs[N] 索引
+        if key.startswith("paragraphs["):
+            try:
+                idx = int(key[len("paragraphs["):].rstrip("]"))
+                return paragraphs[idx].text
+            except (ValueError, IndexError):
+                return ""
+        # ② entities 点语法：title.text / title.style.font_eastasia
+        head, _, rest = key.partition(".")
+        if head in entities:
+            entity = entities[head]
+            if entity is None:
+                return ""
+            return _resolve_dot_path(entity, rest) if rest else entity.text
+        # ③ target 隐式：未带前缀且 target 存在
+        if target is not None:
+            v = _resolve_dot_path(target, key)
+            if v:
+                return v
+        return ""
+
+    return _VAR_RE.sub(repl, template)
+
+
+@register("ai")
+class AiCheck(CheckBase):
+    def __init__(self, llm_client: LlmClient | None = None):
+        self.client = llm_client or LlmClient()
+
+    def _build_prompt(self, ctx: CheckContext) -> str:
+        prompt = _interpolate(
+            ctx.stage.prompt or "",
+            ctx.paragraphs,
+            ctx.entities,
+            ctx.target,
+        )
+        return prompt + "\n\n" + _OUT_FORMAT
+
+    def _interpret(self, ctx: CheckContext, resp: dict) -> CheckResult:
+        result = resp.get("result", "fail")
+        reason = resp.get("reason", "")
+        suggestion = resp.get("suggestion", "")
+        if result == "pass":
+            return CheckResult(passed=True, hits=[])
+        target_p = ctx.paragraphs[0] if ctx.paragraphs else None
+        confidence = 0.95 if result == "fail" else 0.7
+        return CheckResult(passed=False, hits=[CheckHit(
+            paragraph=target_p,
+            char_start=0,
+            char_end=len(target_p.text) if target_p else 0,
+            actual={"llm_reason": reason, "llm_suggestion": suggestion},
+            expected={},
+            message=reason or "LLM 判定不通过",
+            confidence=confidence,
+        )])
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        label = f"ai_{ctx.rule_id or 'unknown'}"
+        try:
+            resp = self.client.chat_json(
+                [{"role": "user", "content": self._build_prompt(ctx)}],
+                label=label,
+            )
+        except LlmJsonError as e:
+            _log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 返回内容无法解析为 JSON：{e}",
+            )
+        except Exception as e:
+            _log.warning("AI check skipped (LLM error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 调用失败：{e}",
+            )
+        return self._interpret(ctx, resp)
+
+    async def run_async(self, ctx: CheckContext) -> CheckResult:
+        label = f"ai_{ctx.rule_id or 'unknown'}"
+        try:
+            resp = await self.client.chat_json_async(
+                [{"role": "user", "content": self._build_prompt(ctx)}],
+                label=label,
+            )
+        except LlmJsonError as e:
+            _log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 返回内容无法解析为 JSON：{e}",
+            )
+        except Exception as e:
+            _log.warning("AI check skipped (LLM error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 调用失败：{e}",
+            )
+        return self._interpret(ctx, resp)
@@ -0,0 +1,48 @@
+"""Check 原语基类与上下文。"""
+
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, TYPE_CHECKING
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
+from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.schema import RuleStage
+
+if TYPE_CHECKING:
+    from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity
+
+
+@dataclass
+class CheckContext:
+    document: Document
+    paragraphs: list[Paragraph]
+    stage: RuleStage
+    entities: dict[str, "SemanticEntity | None"] = field(default_factory=dict)
+    target: "SemanticEntity | None" = None
+    rule_id: str = ""
+
+
+@dataclass
+class CheckHit:
+    paragraph: Paragraph | None
+    char_start: int = 0
+    char_end: int = 0
+    actual: dict[str, Any] | None = None
+    expected: dict[str, Any] | None = None
+    message: str | None = None
+    confidence: float = 1.0
+
+
+@dataclass
+class CheckResult:
+    passed: bool
+    hits: list[CheckHit] = field(default_factory=list)
+    skipped: bool = False
+    skip_reason: str = ""
+
+
+class CheckBase:
+    """所有 check 原语的抽象基类。"""
+
+    name: str = ""
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        raise NotImplementedError
@@ -0,0 +1,34 @@
+"""易混淆词对（字面 + 正则）。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("confused_pair")
+class ConfusedPairCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pairs = ctx.stage.pairs or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for pair in pairs:
+                wrong = pair.get("wrong")
+                wrong_pat = pair.get("wrong_pattern")
+                correct = pair.get("correct") or pair.get("suggest", "")
+                reason = pair.get("reason", "")
+                if wrong and wrong in p.text:
+                    start = p.text.find(wrong)
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=start, char_end=start + len(wrong),
+                        actual={"text": wrong}, expected={"text": correct},
+                        message=f"\"{wrong}\" 应为 \"{correct}\"。{reason}",
+                    ))
+                elif wrong_pat:
+                    for m in re.finditer(wrong_pat, p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            expected={"text": correct},
+                            message=f"\"{m.group(0)}\" 应为 \"{correct}\"。{reason}",
+                        ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,69 @@
+"""跨段关系 check：例如二级标题以句号结尾后又新起一段。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+# 单个附件项末尾的标点：编号(数字+点) + 内容 + 末尾标点
+_ATTACH_ITEM_TRAIL_PUNCT = re.compile(r"\d+[\.．][^\d；。，;,.]+?[；。，;,.]")
+# 整段是一个附件项
+_ATTACH_ITEM_LINE = re.compile(r"^\d+[\.．].+[；。，;,.]\s*$")
+
+
+@register("cross_role")
+class CrossRoleCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        rules = ctx.stage.rules or []
+        paras = ctx.document.paragraphs
+        hits: list[CheckHit] = []
+        for r in rules:
+            t = r.get("type")
+            if t == "h2_no_period_then_break":
+                for i, p in enumerate(paras):
+                    if p.role == "heading_2" and p.text.rstrip().endswith(("。", ".")):
+                        if i + 1 < len(paras) and paras[i + 1].text.strip():
+                            hits.append(CheckHit(
+                                paragraph=p,
+                                char_start=len(p.text) - 1, char_end=len(p.text),
+                                actual={"text": p.text},
+                                message="二级标题在换行分段时不应使用句号；如使用句号则应紧接正文",
+                            ))
+            elif t == "attachment_item_no_trailing_punct":
+                hits.extend(_attachment_item_hits(paras))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+def _attachment_item_hits(paras):
+    """从 attachment_marker 起扫描附件区块，找末尾带标点的附件项。"""
+    hits: list[CheckHit] = []
+    in_attachment = False
+    for p in paras:
+        text = p.text.strip()
+        if not text:
+            continue
+
+        if p.role == "attachment_marker":
+            in_attachment = True
+            # 同段内可能出现 "附件：1.xxx；2.yyy。" 多项一行
+            for m in _ATTACH_ITEM_TRAIL_PUNCT.finditer(text):
+                hits.append(CheckHit(
+                    paragraph=p,
+                    char_start=m.start(), char_end=m.end(),
+                    actual={"snippet": m.group(0)},
+                    message=f'附件项末尾不应有标点："{m.group(0)}"',
+                ))
+            continue
+
+        if p.role in ("signature", "date", "heading_1"):
+            in_attachment = False
+            continue
+
+        if in_attachment and _ATTACH_ITEM_LINE.match(text):
+            hits.append(CheckHit(
+                paragraph=p,
+                char_start=len(p.text) - 1, char_end=len(p.text),
+                actual={"text": p.text},
+                message=f'附件项末尾不应有标点："{text}"',
+            ))
+    return hits
@@ -0,0 +1,162 @@
+"""字体/字号/复合样式/行距 check。"""
+
+import re
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Paragraph, ParagraphStyle
+
+
+def _font_match(actual: str | None, expect: str) -> bool:
+    if not actual:
+        return False
+    return expect in actual or actual in expect
+
+
+def _size_match(actual: float | None, expect: float, tol: float = 0.5) -> bool:
+    if actual is None:
+        return False
+    return abs(actual - expect) <= tol
+
+
+def _style_matches(style: ParagraphStyle, expect: dict) -> bool:
+    if "eastasia" in expect and not _font_match(style.font_eastasia, expect["eastasia"]):
+        return False
+    if "size_pt" in expect and not _size_match(
+        style.font_size_pt, float(expect["size_pt"])
+    ):
+        return False
+    if "bold" in expect and bool(style.bold) != bool(expect["bold"]):
+        return False
+    return True
+
+
+@register("font")
+class FontCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            ok = True
+            actual = {
+                "font": p.style.font_eastasia,
+                "size": p.style.font_size_pt,
+            }
+            if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
+                ok = False
+            if "size_pt" in expect and not _size_match(
+                p.style.font_size_pt, float(expect["size_pt"])
+            ):
+                ok = False
+            if not ok:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual=actual, expected=expect,
+                    message=f"字体或字号不符合（实际 {actual['font']} {actual['size']}pt，期望 {expect}）",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("style_match")
+class StyleMatchCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            ok = True
+            actual = {
+                "font": p.style.font_eastasia,
+                "size": p.style.font_size_pt,
+                "bold": p.style.bold,
+                "italic": p.style.italic,
+                "alignment": p.style.alignment,
+            }
+            if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
+                ok = False
+            if "size_pt" in expect and not _size_match(
+                p.style.font_size_pt, float(expect["size_pt"])
+            ):
+                ok = False
+            if "bold" in expect and bool(p.style.bold) != bool(expect["bold"]):
+                ok = False
+            if "alignment" in expect and p.style.alignment != expect["alignment"]:
+                ok = False
+            if not ok:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual=actual, expected=expect, message="样式不符合",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+_ATTACHMENT_MARKER_RE = re.compile(r"^\s*(附件[:：]|附件\d+)")
+
+
+@register("attachment_marker_style")
+class AttachmentMarkerStyleCheck(CheckBase):
+    """只校验“附件：”或“附件1”等标记本身，不校验后续附件名称。"""
+
+    DEFAULT_EXPECT = {"eastasia": "黑体", "size_pt": 16, "bold": False}
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or self.DEFAULT_EXPECT
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            match = _ATTACHMENT_MARKER_RE.match(p.text)
+            if not match:
+                continue
+            marker_end = match.end(1)
+            marker_styles = _marker_run_styles(p, marker_end)
+            if not marker_styles:
+                marker_styles = [p.style]
+            bad_style = next(
+                (style for style in marker_styles if not _style_matches(style, expect)),
+                None,
+            )
+            if bad_style is not None:
+                hits.append(CheckHit(
+                    paragraph=p,
+                    char_start=match.start(1),
+                    char_end=marker_end,
+                    actual={
+                        "font": bad_style.font_eastasia,
+                        "size": bad_style.font_size_pt,
+                        "bold": bad_style.bold,
+                    },
+                    expected=expect,
+                    message="附件标记样式不符合",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+def _marker_run_styles(p: Paragraph, marker_end: int) -> list[ParagraphStyle]:
+    styles: list[ParagraphStyle] = []
+    cursor = 0
+    for run in p.runs:
+        run_start = cursor
+        run_end = cursor + len(run.text)
+        cursor = run_end
+        if run_end <= 0 or run_start >= marker_end:
+            continue
+        if run.text.strip():
+            styles.append(run.style)
+    return styles
+
+
+@register("line_spacing")
+class LineSpacingCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        target = float(expect.get("multiple", 1.5))
+        tol = float(expect.get("tol", 0.05))
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            actual = p.style.line_spacing
+            if actual is None or abs(actual - target) > tol:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual={"line_spacing": actual},
+                    expected={"line_spacing": target},
+                    message=f"行距应为 {target}，实际 {actual}",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,42 @@
+"""短语/字符黑名单。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("forbid_phrase")
+class ForbidPhraseCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        phrases = ctx.stage.phrases or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for phr in phrases:
+                start = p.text.find(phr)
+                if start >= 0:
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=start, char_end=start + len(phr),
+                        actual={"text": phr}, expected={"forbid": phr},
+                        message=f"出现禁用短语 \"{phr}\"",
+                    ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("forbid_chars")
+class ForbidCharsCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        chars = ctx.stage.chars or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for c in chars:
+                start = 0
+                while True:
+                    idx = p.text.find(c, start)
+                    if idx < 0:
+                        break
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=idx, char_end=idx + len(c),
+                        actual={"char": c}, expected={"forbid": c},
+                        message=f"禁用字符 \"{c}\" 出现在 idx {idx}",
+                    ))
+                    start = idx + len(c)
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,29 @@
+"""层级序号格式 check。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("hierarchy")
+class HierarchyCheck(CheckBase):
+    """检查层级序号格式：
+    - expected_order: 各级允许的模式（正向白名单，按 level 升序）
+    - forbid_patterns: 禁用模式（黑名单，命中即报错）
+    """
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        forbid = [re.compile(p) for p in (ctx.stage.forbid_patterns or [])]
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            text = p.text.strip()
+            for f in forbid:
+                m = f.search(text)
+                if m:
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=m.start(), char_end=m.end(),
+                        actual={"text": m.group(0)},
+                        expected={"forbid_pattern": f.pattern},
+                        message=f"层级序号格式错误：命中禁用模式 {f.pattern}",
+                    ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,46 @@
+"""标点符号专项规则。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+# 多书名号或引号并列时不应用顿号分隔（中文/中文标点）
+_QUOTE_DUNHAO_RE = re.compile(r"([”》])、([“《])")
+# 句内括号末尾（除问号/叹号/省略号外）不应有标点
+_PAREN_PUNCT_RE = re.compile(r"[（(][^）)]*?[，。；：、][）)]")
+# 引号嵌套：双引号内含单引号包裹的强调短语（如 "卓'粤'创一流"）
+_NESTED_QUOTE_RE = re.compile(r"“[^“”]*?‘[^‘’]+’[^“”]*?”")
+
+
+@register("punctuation")
+class PunctuationCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        rules = ctx.stage.rules or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for r in rules:
+                t = r.get("type")
+                if t == "no_dunhao_between_quotes":
+                    for m in _QUOTE_DUNHAO_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            expected={"text": m.group(0).replace("、", "")},
+                            message="多个引号/书名号并列不应用顿号分隔",
+                        ))
+                elif t == "no_punct_inside_inline_paren":
+                    for m in _PAREN_PUNCT_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            message="句内括号末尾通常不应含标点",
+                        ))
+                elif t == "no_outer_quote_when_inner_quote":
+                    for m in _NESTED_QUOTE_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            message="双引号内已含单引号强调时，外层不应再加双引号",
+                        ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,36 @@
+"""regex_require / regex_forbid。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("regex_require")
+class RegexRequireCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pat = re.compile(ctx.stage.pattern or "")
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            if not pat.search(p.text):
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual={"text": p.text}, expected={"pattern": ctx.stage.pattern},
+                    message=f"未匹配模式 {ctx.stage.pattern}",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("regex_forbid")
+class RegexForbidCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pat = re.compile(ctx.stage.pattern or "")
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for m in pat.finditer(p.text):
+                hits.append(CheckHit(
+                    paragraph=p, char_start=m.start(), char_end=m.end(),
+                    actual={"text": m.group(0)},
+                    expected={"forbid_pattern": ctx.stage.pattern},
+                    message=f"出现禁止模式 {ctx.stage.pattern}（命中 \"{m.group(0)}\"）",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,28 @@
+"""required check：目标实体或选中段落必须有非空文本。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+
+
+@register("required")
+class RequiredCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        # target 通道：检查实体 text 是否非空
+        if ctx.target is not None:
+            if ctx.target.text and ctx.target.text.strip():
+                return CheckResult(passed=True, hits=[])
+            anchor = ctx.paragraphs[0] if ctx.paragraphs else None
+            return CheckResult(passed=False, hits=[
+                CheckHit(paragraph=anchor, message=f"实体 {ctx.target.name} 缺失或为空")
+            ])
+
+        # applies_to 通道：所有段落必须非空
+        empty = [p for p in ctx.paragraphs if not p.text.strip()]
+        if empty:
+            return CheckResult(
+                passed=False,
+                hits=[CheckHit(paragraph=p, message="段落为空") for p in empty],
+            )
+        return CheckResult(passed=True, hits=[])
@@ -0,0 +1,42 @@
+"""文种白名单（15 种法定公文文种）。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+
+
+LEGAL_WENZHONG = {
+    "决议", "决定", "命令", "令", "公报", "公告", "通告",
+    "意见", "通知", "通报", "报告", "请示", "批复",
+    "议案", "函", "纪要",
+}
+
+
+@register("wenzhong_whitelist")
+class WenzhongWhitelistCheck(CheckBase):
+    """检查文种是否在 15 种法定文种白名单内。
+
+    数据来源：
+    1. ctx.entities["wenzhong"].text   ← 推荐
+    2. ctx.target.text  （当 rule.target = wenzhong 时）
+    """
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        wz = ""
+        wz_entity = ctx.entities.get("wenzhong") if ctx.entities else None
+        if wz_entity is not None:
+            wz = (wz_entity.text or "").strip()
+        elif ctx.target is not None and ctx.target.name == "wenzhong":
+            wz = (ctx.target.text or "").strip()
+
+        if not wz:
+            return CheckResult(passed=True, hits=[])
+        if wz in LEGAL_WENZHONG:
+            return CheckResult(passed=True, hits=[])
+        return CheckResult(passed=False, hits=[CheckHit(
+            paragraph=None,
+            actual={"wenzhong": wz},
+            expected={"wenzhong_whitelist": sorted(LEGAL_WENZHONG)},
+            message=f"非法定文种 \"{wz}\"，应为 15 种法定公文文种之一",
+        )])