leaudit-platform-backend/fastapi_modules/fastapi_leaudit/govdoc_engine/models.py

"""公文审查的核心数据模型。"""

from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, Field


Role = Literal[
    "title", "doc_number", "recipient",
    "heading_1", "heading_2", "heading_3", "heading_4",
    "body", "attachment_marker", "attachment_title", "signature", "date",
    "no_text_marker", "unknown", "any",
]
Severity = Literal["error", "warning", "info"]


class ParagraphStyle(BaseModel):
    font_eastasia: str | None = None
    font_ascii: str | None = None
    font_size_pt: float | None = None
    bold: bool = False
    italic: bool = False
    line_spacing: float | None = None
    line_spacing_rule: str | None = None
    alignment: str = "left"
    first_line_indent_pt: float = 0.0


class Run(BaseModel):
    text: str
    style: ParagraphStyle


class Paragraph(BaseModel):
    index: int
    text: str
    runs: list[Run]
    style: ParagraphStyle
    role: Role | None = None
    role_confidence: float = 1.0
    in_table: bool = False
    in_header: bool = False
    in_footer: bool = False


class Table(BaseModel):
    index: int
    rows: list[list[str]]


class Document(BaseModel):
    meta: dict[str, Any] = Field(default_factory=dict)
    paragraphs: list[Paragraph]
    tables: list[Table] = Field(default_factory=list)


class Location(BaseModel):
    paragraph_index: int
    role: Role | None = None
    char_start: int = 0
    char_end: int = 0
    context: str = ""


class Finding(BaseModel):
    finding_id: str
    rule_id: str
    rule_name: str
    severity: Severity
    category: str
    location: Location
    actual: dict[str, Any] = Field(default_factory=dict)
    expected: dict[str, Any] = Field(default_factory=dict)
    message: str
    suggestion: str = ""
    evidence: str = ""
    confidence: float = 1.0