feat(govdoc): 新增内部公文模块全链路(后端58+前端11文件)

This commit is contained in:
wren
2026-05-13 14:37:12 +08:00
parent 99699e20e1
commit 5d777599bf
63 changed files with 7608 additions and 0 deletions
@@ -0,0 +1,93 @@
"""从 Document 派生出 structure(按 role 分类统计)+ outlineheading 层级树)。"""
from __future__ import annotations
from collections import Counter
from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import OutlineNode, StructureItem
_ROLE_LABELS: list[tuple[str, str, bool]] = [
# (role, 中文标签, 是否常规公文必备)
("title", "标题", True),
("doc_number", "发文字号", True),
("recipient", "主送机关", True),
("heading_1", "一级标题", False),
("heading_2", "二级标题", False),
("heading_3", "三级标题", False),
("heading_4", "四级标题", False),
("body", "正文", True),
("attachment_marker", "附件标记", False),
("attachment_title", "附件标题", False),
("signature", "署名", True),
("date", "成文日期", True),
("no_text_marker", "(此页无正文)", False),
("unknown", "未识别", False),
]
_HEADING_LEVELS = {
"heading_1": 1,
"heading_2": 2,
"heading_3": 3,
"heading_4": 4,
}
def _dominant_style(paragraphs: list[Paragraph]) -> tuple[str | None, float | None, bool]:
"""返回 (字体众数, 字号众数, 是否所有段落样式一致)。"""
if not paragraphs:
return None, None, True
fonts = Counter(p.style.font_eastasia for p in paragraphs if p.style.font_eastasia)
sizes = Counter(p.style.font_size_pt for p in paragraphs if p.style.font_size_pt is not None)
dom_font = fonts.most_common(1)[0][0] if fonts else None
dom_size = sizes.most_common(1)[0][0] if sizes else None
uniform = len(fonts) <= 1 and len(sizes) <= 1
return dom_font, dom_size, uniform
def build_structure(doc: Document) -> list[StructureItem]:
items: list[StructureItem] = []
for role, label, expected in _ROLE_LABELS:
paragraphs = [p for p in doc.paragraphs if p.role == role]
if not paragraphs and not expected:
# 非必备 role 没出现就不展示,保持面板紧凑
continue
samples = [p.text[:60] for p in paragraphs[:3]]
font, size, uniform = _dominant_style(paragraphs)
items.append(StructureItem(
role=role,
label=label,
count=len(paragraphs),
expected=expected,
paragraph_indices=[p.index for p in paragraphs],
samples=samples,
char_total=sum(len(p.text) for p in paragraphs),
dominant_font=font,
dominant_size_pt=size,
style_uniform=uniform,
))
return items
def build_outline(doc: Document) -> list[OutlineNode]:
"""按段落顺序 + heading 层级生成树。"""
headings = [
(p.index, _HEADING_LEVELS[p.role], p.text)
for p in doc.paragraphs
if p.role in _HEADING_LEVELS
]
if not headings:
return []
roots: list[OutlineNode] = []
stack: list[OutlineNode] = []
for idx, level, text in headings:
node = OutlineNode(paragraph_index=idx, level=level, text=text)
# 弹出比当前 level 更深的祖先
while stack and stack[-1].level >= level:
stack.pop()
if stack:
stack[-1].children.append(node)
else:
roots.append(node)
stack.append(node)
return roots