78 lines
1.8 KiB
Python
78 lines
1.8 KiB
Python
"""公文审查的核心数据模型。"""
|
|
|
|
from __future__ import annotations
|
|
from typing import Any, Literal
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
Role = Literal[
|
|
"title", "doc_number", "recipient",
|
|
"heading_1", "heading_2", "heading_3", "heading_4",
|
|
"body", "attachment_marker", "attachment_title", "signature", "date",
|
|
"no_text_marker", "unknown", "any",
|
|
]
|
|
Severity = Literal["error", "warning", "info"]
|
|
|
|
|
|
class ParagraphStyle(BaseModel):
|
|
font_eastasia: str | None = None
|
|
font_ascii: str | None = None
|
|
font_size_pt: float | None = None
|
|
bold: bool = False
|
|
italic: bool = False
|
|
line_spacing: float | None = None
|
|
line_spacing_rule: str | None = None
|
|
alignment: str = "left"
|
|
first_line_indent_pt: float = 0.0
|
|
|
|
|
|
class Run(BaseModel):
|
|
text: str
|
|
style: ParagraphStyle
|
|
|
|
|
|
class Paragraph(BaseModel):
|
|
index: int
|
|
text: str
|
|
runs: list[Run]
|
|
style: ParagraphStyle
|
|
role: Role | None = None
|
|
role_confidence: float = 1.0
|
|
in_table: bool = False
|
|
in_header: bool = False
|
|
in_footer: bool = False
|
|
|
|
|
|
class Table(BaseModel):
|
|
index: int
|
|
rows: list[list[str]]
|
|
|
|
|
|
class Document(BaseModel):
|
|
meta: dict[str, Any] = Field(default_factory=dict)
|
|
paragraphs: list[Paragraph]
|
|
tables: list[Table] = Field(default_factory=list)
|
|
|
|
|
|
class Location(BaseModel):
|
|
paragraph_index: int
|
|
role: Role | None = None
|
|
char_start: int = 0
|
|
char_end: int = 0
|
|
context: str = ""
|
|
|
|
|
|
class Finding(BaseModel):
|
|
finding_id: str
|
|
rule_id: str
|
|
rule_name: str
|
|
severity: Severity
|
|
category: str
|
|
location: Location
|
|
actual: dict[str, Any] = Field(default_factory=dict)
|
|
expected: dict[str, Any] = Field(default_factory=dict)
|
|
message: str
|
|
suggestion: str = ""
|
|
evidence: str = ""
|
|
confidence: float = 1.0
|