Files
leaudit-platform-backend/fastapi_modules/fastapi_leaudit/govdoc_engine/parser/entities.py
T

28 lines
1.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""语义实体:把段落 + 字段值 + 样式合在一起。"""
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, Field
from fastapi_modules.fastapi_leaudit.govdoc_engine.models import ParagraphStyle
EntitySource = Literal["structural", "llm", "derived"]
class SemanticEntity(BaseModel):
"""公文中的一个语义单元(标题 / 发文字号 / 主送机关 / ...)。
- structuralname 与某个 role 一一对应,paragraph_indices 非空,style 可用。
- derived:从其他实体推导(如 wenzhong 从 title 末尾),paragraph_indices 借用源段落。
- llm:仅当结构 / 派生路径都失败时启用,paragraph_indices 可能为空。
"""
name: str
text: str = ""
paragraph_indices: list[int] = Field(default_factory=list)
primary_role: str | None = None
style: ParagraphStyle | None = None
extra: dict[str, Any] = Field(default_factory=dict)
source: EntitySource = "structural"
confidence: float = 1.0