28 lines
1.0 KiB
Python
28 lines
1.0 KiB
Python
"""语义实体:把段落 + 字段值 + 样式合在一起。"""
|
||
|
||
from __future__ import annotations
|
||
from typing import Any, Literal
|
||
from pydantic import BaseModel, Field
|
||
from fastapi_modules.fastapi_leaudit.govdoc_engine.models import ParagraphStyle
|
||
|
||
|
||
EntitySource = Literal["structural", "llm", "derived"]
|
||
|
||
|
||
class SemanticEntity(BaseModel):
|
||
"""公文中的一个语义单元(标题 / 发文字号 / 主送机关 / ...)。
|
||
|
||
- structural:name 与某个 role 一一对应,paragraph_indices 非空,style 可用。
|
||
- derived:从其他实体推导(如 wenzhong 从 title 末尾),paragraph_indices 借用源段落。
|
||
- llm:仅当结构 / 派生路径都失败时启用,paragraph_indices 可能为空。
|
||
"""
|
||
|
||
name: str
|
||
text: str = ""
|
||
paragraph_indices: list[int] = Field(default_factory=list)
|
||
primary_role: str | None = None
|
||
style: ParagraphStyle | None = None
|
||
extra: dict[str, Any] = Field(default_factory=dict)
|
||
source: EntitySource = "structural"
|
||
confidence: float = 1.0
|