feat(govdoc): 新增内部公文模块全链路（后端58+前端11文件）

2026-05-13 14:37:12 +08:00
parent 99699e20e1
commit 5d777599bf
63 changed files with 7608 additions and 0 deletions
@@ -0,0 +1,93 @@
+"""从 Document 派生出 structure（按 role 分类统计）+ outline（heading 层级树）。"""
+
+from __future__ import annotations
+from collections import Counter
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import OutlineNode, StructureItem
+
+
+_ROLE_LABELS: list[tuple[str, str, bool]] = [
+    # (role, 中文标签, 是否常规公文必备)
+    ("title", "标题", True),
+    ("doc_number", "发文字号", True),
+    ("recipient", "主送机关", True),
+    ("heading_1", "一级标题", False),
+    ("heading_2", "二级标题", False),
+    ("heading_3", "三级标题", False),
+    ("heading_4", "四级标题", False),
+    ("body", "正文", True),
+    ("attachment_marker", "附件标记", False),
+    ("attachment_title", "附件标题", False),
+    ("signature", "署名", True),
+    ("date", "成文日期", True),
+    ("no_text_marker", "(此页无正文)", False),
+    ("unknown", "未识别", False),
+]
+
+_HEADING_LEVELS = {
+    "heading_1": 1,
+    "heading_2": 2,
+    "heading_3": 3,
+    "heading_4": 4,
+}
+
+
+def _dominant_style(paragraphs: list[Paragraph]) -> tuple[str | None, float | None, bool]:
+    """返回 (字体众数, 字号众数, 是否所有段落样式一致)。"""
+    if not paragraphs:
+        return None, None, True
+    fonts = Counter(p.style.font_eastasia for p in paragraphs if p.style.font_eastasia)
+    sizes = Counter(p.style.font_size_pt for p in paragraphs if p.style.font_size_pt is not None)
+    dom_font = fonts.most_common(1)[0][0] if fonts else None
+    dom_size = sizes.most_common(1)[0][0] if sizes else None
+    uniform = len(fonts) <= 1 and len(sizes) <= 1
+    return dom_font, dom_size, uniform
+
+
+def build_structure(doc: Document) -> list[StructureItem]:
+    items: list[StructureItem] = []
+    for role, label, expected in _ROLE_LABELS:
+        paragraphs = [p for p in doc.paragraphs if p.role == role]
+        if not paragraphs and not expected:
+            # 非必备 role 没出现就不展示，保持面板紧凑
+            continue
+        samples = [p.text[:60] for p in paragraphs[:3]]
+        font, size, uniform = _dominant_style(paragraphs)
+        items.append(StructureItem(
+            role=role,
+            label=label,
+            count=len(paragraphs),
+            expected=expected,
+            paragraph_indices=[p.index for p in paragraphs],
+            samples=samples,
+            char_total=sum(len(p.text) for p in paragraphs),
+            dominant_font=font,
+            dominant_size_pt=size,
+            style_uniform=uniform,
+        ))
+    return items
+
+
+def build_outline(doc: Document) -> list[OutlineNode]:
+    """按段落顺序 + heading 层级生成树。"""
+    headings = [
+        (p.index, _HEADING_LEVELS[p.role], p.text)
+        for p in doc.paragraphs
+        if p.role in _HEADING_LEVELS
+    ]
+    if not headings:
+        return []
+
+    roots: list[OutlineNode] = []
+    stack: list[OutlineNode] = []
+    for idx, level, text in headings:
+        node = OutlineNode(paragraph_index=idx, level=level, text=text)
+        # 弹出比当前 level 更深的祖先
+        while stack and stack[-1].level >= level:
+            stack.pop()
+        if stack:
+            stack[-1].children.append(node)
+        else:
+            roots.append(node)
+        stack.append(node)
+    return roots