From 1bacfe41b7ce9ada6a61d02e87f852e7f5b1e6a7 Mon Sep 17 00:00:00 2001 From: wren <“porlong@qq.com”> Date: Mon, 18 May 2026 14:35:25 +0800 Subject: [PATCH] feat: integrate govdoc platform updates --- docs/内部公文模块/内部公文前端拆分实施清单.md | 513 ++++++++++++++ docs/内部公文模块/报告UI样例.html | 654 ++++++++++++++++++ .../fastapi_leaudit/domian/vo/ragChatVo.py | 1 + .../govdoc_engine/reporter/html_renderer.py | 640 +++++++++++++++-- .../services/impl/govdocServiceImpl.py | 33 +- .../services/impl/ragChatServiceImpl.py | 76 +- .../services/impl/ragDatasetServiceImpl.py | 2 +- leaudit.sh | 24 +- legal-platform-frontend | 2 +- scripts/regenerate_govdoc_html_report.py | 298 ++++++++ 10 files changed, 2151 insertions(+), 92 deletions(-) create mode 100644 docs/内部公文模块/内部公文前端拆分实施清单.md create mode 100644 docs/内部公文模块/报告UI样例.html create mode 100644 scripts/regenerate_govdoc_html_report.py diff --git a/docs/内部公文模块/内部公文前端拆分实施清单.md b/docs/内部公文模块/内部公文前端拆分实施清单.md new file mode 100644 index 0000000..b71ba2a --- /dev/null +++ b/docs/内部公文模块/内部公文前端拆分实施清单.md @@ -0,0 +1,513 @@ +# 内部公文前端拆分实施清单 + +## 1. 文档目的 + +本文档只解决一个问题: + +- 在不改变“内部公文”业务语义的前提下,如何把当前前端实现拆成一套与“交叉评查”同级的独立页面架构 + +本文档关注的是: + +- 页面编排边界 +- 组件职责边界 +- `govdoc` 与 `reviews / cross-checking` 的复用边界 +- 分阶段实施顺序 + +本文档不做以下事情: + +- 不改后端业务语义 +- 不要求照搬旧项目代码 +- 不把 `Collabora` 当成整个中栏预览架构 + +--- + +## 2. 结论先行 + +内部公文前端应按以下原则重构: + +> **像交叉评查一样独立成页,但复用 reviews 的定位型预览能力。** + +准确解释如下: + +- 内部公文应有自己独立的页面 orchestrator +- 内部公文应有自己独立的业务组件层 +- 内部公文应有自己独立的 TS service / adapter 层 +- 中栏 PDF / DOCX 预览不应重新发明一套,而应优先复用 `reviews` 已有能力 +- `Collabora` 只应作为 DOCX viewer,不应承担“问题定位主架构” + +因此,目标不是: + +- 把当前 `govdoc-audit` 页面继续补丁式扩写 + +而是: + +- 把内部公文前端收敛为“独立页面编排 + 统一预览协议 + 独立业务壳”的平台化实现 + +--- + +## 3. 当前实现现状 + +## 3.1 当前内部公文前端入口 + +当前内部公文详情页主入口为: + +- [components/govdoc-audit/audit.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audit.tsx:1) + +当前内部公文列表页主入口为: + +- [components/govdoc-audit/audits.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audits.tsx:1) + +当前路由入口为: + +- [app/(audit)/govdoc/audits/page.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/govdoc/audits/page.tsx:1) +- [app/(audit)/govdoc/detail/[documentId]/page.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/govdoc/detail/[documentId]/page.tsx:1) + +当前详情页已经具备: + +- 顶部摘要与报告下载操作 +- 评查 / 结构 / 大纲 / 实体 tab +- 中栏文档视图 +- 右栏 findings / checked rules 展示 + +问题不在于“没有功能”,而在于“页面职责混装”。 + +--- + +## 3.2 当前详情页耦合点 + +当前 [audit.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audit.tsx:1) 同时承担了以下职责: + +- 页面数据加载 +- 顶部操作区渲染 +- tab 状态切换 +- 结果统计条渲染 +- 中栏文档视图调度 +- 右栏问题面板调度 +- 规则弹窗调度 + +这会带来三个问题: + +- 页面 orchestrator 和业务组件未分层 +- 中栏预览协议没有向平台现有 `reviews` 能力对齐 +- 右栏问题区与 `reviews / cross-checking` 的定位交互无法复用 + +--- + +## 3.3 当前中栏预览为什么不应继续沿现状扩写 + +当前内部公文中栏主要使用: + +- [components/govdoc-audit/doc-view.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/doc-view.tsx:1) + +而平台现有成熟的“定位型预览”能力在: + +- [components/reviews/previewComponents/PdfPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/PdfPreviewTest.tsx:1) +- [components/reviews/previewComponents/DocxPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/DocxPreviewTest.tsx:1) +- [app/(audit)/reviews-test/ReviewsTestClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/reviews-test/ReviewsTestClient.tsx:1) +- [app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx:1) + +必须明确: + +- PDF 中栏定位主能力不是 `Collabora` +- DOCX 中栏当前虽然使用 `CollaboraViewer`,但它承担的是文档渲染,不是完整的问题定位架构 + +如果内部公文要做到: + +- 点击问题点后定位到对应页 +- 对问题字段/段落做高亮 +- 为后续“问题行定位”保留升级空间 + +则中栏必须对齐现有平台预览输入协议,而不是继续把 `DocView` 做成一套孤岛实现。 + +--- + +## 4. 目标架构 + +## 4.1 总体原则 + +前端目标架构应满足以下四条: + +- 内部公文页面独立编排 +- 中栏预览能力平台复用 +- 业务面板 govdoc 自治 +- 数据适配集中在 adapter 层 + +可以概括为: + +> **govdoc 自己负责业务壳,platform 负责通用预览能力。** + +--- + +## 4.2 目标目录结构 + +建议拆分为以下结构: + +```text +legal-platform-frontend/ + app/(audit)/govdoc/ + audits/page.tsx + detail/[documentId]/page.tsx + + components/govdoc-audit/ + GovdocAuditListPage.tsx + GovdocAuditResultPage.tsx + GovdocSummaryHeader.tsx + GovdocFindingPanel.tsx + GovdocStructurePanel.tsx + GovdocOutlinePanel.tsx + GovdocEntityPanel.tsx + GovdocReportActions.tsx + + lib/api/govdoc-audit/ + api.ts + types.ts + adapters.ts + govdoc-routes.ts +``` + +说明如下: + +- `page.tsx` 只保留路由入口职责 +- `GovdocAuditResultPage.tsx` 负责详情页 orchestrator +- `GovdocAuditListPage.tsx` 负责列表页 orchestrator +- `Govdoc*Panel` 负责内部公文独有业务视图 +- `adapters.ts` 负责把 govdoc 后端返回结果转成前端视图模型 + +--- + +## 4.3 页面编排职责 + +### 详情页 orchestrator + +建议新增: + +- `components/govdoc-audit/GovdocAuditResultPage.tsx` + +该组件只负责: + +- 读取 `documentId / runId` +- 调用 govdoc API +- 维护 tab 状态 +- 维护当前激活问题点 +- 维护当前预览定位目标 +- 组装中栏与右栏 + +它不应承担: + +- 具体 finding 卡片渲染细节 +- 实体/结构/大纲具体 UI 细节 +- 预览底层渲染逻辑 + +这部分应当参照: + +- [CrossCheckingResultClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx:1) + +--- + +### 列表页 orchestrator + +建议新增: + +- `components/govdoc-audit/GovdocAuditListPage.tsx` + +该组件负责: + +- 列表数据加载 +- 筛选状态 +- 批量操作状态 +- 导出与删除 +- 跳转详情页 + +它应继续保持内部公文自己的筛选语义,但 UI 节奏应向平台文档列表页靠拢。 + +列表页设计参照: + +- [DocumentsListClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/documents/list/DocumentsListClient.tsx:1) + +--- + +## 5. 与 reviews / cross-checking 的复用边界 + +## 5.1 应复用的能力 + +内部公文应复用以下能力: + +- PDF 中栏预览组件 +- DOCX 中栏预览组件 +- 问题点点击后的预览定位协议 +- 页码跳转、高亮、bbox / charPositions 定位能力 + +优先复用对象: + +- [PdfPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/PdfPreviewTest.tsx:1) +- [DocxPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/DocxPreviewTest.tsx:1) + +复用的是: + +- 预览能力 +- 定位协议 +- 用户交互模型 + +不是: + +- 合同业务语义 +- 卷宗业务命名 +- 旧页面外壳 + +--- + +## 5.2 不应复用的部分 + +以下部分不应直接复用: + +- `reviews-test` 自身的业务标题、业务字段命名 +- 合同/卷宗专有的右栏业务解释 +- `cross-checking` 的评分协同、提议投票、交叉意见面板 + +原因是: + +- 这些属于业务壳,而不是平台通用能力 + +内部公文应保留自己的: + +- findings 口径 +- checked rules 口径 +- 结构 / 大纲 / 实体口径 +- 报告下载口径 + +--- + +## 5.3 Collabora 的正确定位 + +`CollaboraViewer` 的边界必须明确: + +- 它是 DOCX viewer +- 它可以承担跳页、文本高亮、编辑/只读查看 +- 它不是内部公文详情页的业务 orchestrator +- 它也不是“问题行精确定位”的完整方案 + +因此: + +- `Collabora` 只能留在 `DocxPreviewTest` 这一层 +- 不应让 govdoc 页面继续直接围绕 `Collabora` 自己长出一套完整详情页体系 + +--- + +## 6. 必须新增的 adapter 层 + +## 6.1 为什么必须有 adapters.ts + +当前 `lib/api/govdoc-audit` 下已有: + +- `api.ts` +- `types.ts` +- `govdoc-routes.ts` + +但还缺一层: + +- `adapters.ts` + +这层必须存在,因为它承担的是“业务结果语义 -> 预览与页面视图语义”的转换。 + +如果没有这层,后果会是: + +- govdoc 页面自己维护一套 findings 展示模型 +- reviews 页面自己维护一套 preview target 模型 +- 同类定位交互会出现两套不兼容实现 + +--- + +## 6.2 adapters.ts 建议职责 + +`adapters.ts` 建议至少提供以下能力: + +- 将 `govdoc` 结果对象转换为结果页 view model +- 将 `finding / checked_rule` 转换为右栏展示项 +- 将 `finding / paragraph / entity` 转换为中栏跳转目标 +- 根据文件类型产出统一 preview target +- 将后端报告产物状态转换为按钮展示状态 + +建议输出的数据语义包括: + +- `previewKind` +- `previewPath` +- `activeTarget` +- `findingItems` +- `summaryCards` +- `reportActions` +- `structureItems` +- `outlineItems` +- `entityItems` + +这样后续页面层只编排,不解释后端字段细节。 + +--- + +## 7. 分阶段实施顺序 + +## 7.1 第一阶段:补 adapter,不改页面语义 + +目标: + +- 先把数据适配层补齐 + +动作: + +- 新增 `lib/api/govdoc-audit/adapters.ts` +- 收敛 `audit.tsx` 里对原始接口字段的直接解释 +- 把 preview target 语义统一为: + - `page` + - `highlightValue` + - `bboxHighlight` + - `charPositions` + +本阶段收益: + +- 不改用户可见业务逻辑 +- 为后续替换中栏和右栏做稳定基础 + +--- + +## 7.2 第二阶段:拆详情页 orchestrator + +目标: + +- 让 govdoc 详情页像 `cross-checking` 一样拥有独立 orchestrator + +动作: + +- 新增 `GovdocAuditResultPage.tsx` +- 将现有 `audit.tsx` 逻辑迁入新组件 +- 路由入口改为挂载新组件 +- 顶部摘要、下载操作、tab 切换拆成子组件 + +本阶段收益: + +- 页面职责清晰 +- 后续中栏和右栏可以独立演进 + +--- + +## 7.3 第三阶段:切换中栏到定位型预览 + +目标: + +- 内部公文详情页中栏不再以 `DocView` 为核心 + +动作: + +- 根据文件类型切换到 `PdfPreviewTest / DocxPreviewTest` +- 从 govdoc adapter 输出统一 preview target +- 让右栏点击直接驱动中栏定位 + +本阶段注意: + +- PDF 定位优先支持 `bboxHighlight / charPositions` +- DOCX 优先支持 `targetPage + highlightValue` +- 不承诺此阶段立即做到“DOCX 行级精确定位” + +--- + +## 7.4 第四阶段:重构右栏与 tab 业务壳 + +目标: + +- 保留 govdoc 自己的业务面板,但交互模型对齐平台 + +动作: + +- 将当前 `RightPanel` 重构为 `GovdocFindingPanel` +- 将结构、大纲、实体分面板组件化 +- 收敛旧的孤立交互状态 + +本阶段收益: + +- govdoc 保持业务独立 +- 同时具备平台统一的交互体验 + +--- + +## 7.5 第五阶段:样式与布局收口 + +目标: + +- govdoc 页面在视觉上向平台现有绿色主题和通用 panel 节奏靠齐 + +动作: + +- 减少 `.govdoc-audit-scope` 中重复定义 +- 优先复用 `layout-primitives.css` +- 保留必要的 govdoc 业务样式命名空间 + +本阶段原则: + +- 先统一布局和交互节奏 +- 再减少样式重复 +- 不先做“大改视觉” + +--- + +## 8. 风险点与前置条件 + +## 8.1 最大风险不在前端组件,而在定位数据颗粒度 + +内部公文要实现“定位到哪一行有问题”,前端只是承载层,真正决定上限的是后端给的数据。 + +前端能稳定消费的数据类型分为两类: + +- PDF: + - `page` + - `bbox` + - `page_box` + - `char_positions` +- DOCX: + - `targetPage` + - `highlightValue` + - 未来如果需要更高精度,还需要更细粒度锚点 + +如果后端只给: + +- 问题描述 +- 规则结果 + +而不给定位数据,那么前端最多只能做到: + +- 页级定位 +- 文本关键字高亮 + +不能承诺做到稳定的“行级定位”。 + +--- + +## 8.2 不应在这一阶段做的事情 + +以下动作不建议和本次拆分同时进行: + +- 重写 govdoc 全部视觉设计 +- 把 govdoc 规则语义改造成合同/卷宗语义 +- 试图把所有 `reviews` 业务组件直接搬进 govdoc +- 在没有 adapter 的情况下直接大规模替换页面 + +原因很简单: + +- 这些动作会把“前端分层重构”和“业务改动”混在一起,增加回归风险 + +--- + +## 9. 最终边界结论 + +内部公文前端的正确实现边界应锁定为: + +- **像交叉评查一样,独立成页** +- **像 reviews 一样,复用定位型预览能力** +- **像平台模块一样,数据解释集中在 adapter 层** +- **像内部公文自己一样,保留 findings / checked rules / structure / outline / entities 的业务语义** + +更直白地说: + +- `Govdoc 页面` 负责业务编排 +- `reviews 预览组件` 负责中栏定位能力 +- `Collabora` 只负责 DOCX 渲染 +- `adapters.ts` 负责把 govdoc 后端结果翻译成前端可复用语义 + +这就是内部公文前端后续实施的固定边界。 diff --git a/docs/内部公文模块/报告UI样例.html b/docs/内部公文模块/报告UI样例.html new file mode 100644 index 0000000..3e148d9 --- /dev/null +++ b/docs/内部公文模块/报告UI样例.html @@ -0,0 +1,654 @@ + + + + + + 内部公文报告 UI 样例 + + + +
+
+
+
+
+ 0 + 综合得分 +
+
+
当前样例沿用你提供的实际报告数据,不改业务语义
+
+ +
+
+
统一报告样式锚点
+

公文格式审核报告

+
买卖合同 (1).docx · 共 123 项问题 · 样例用于确认 UI / 配色方向
+
+ +
+
+
错误项
+
31error
+
+
+
警告项
+
92warning
+
+
+
提示项
+
0info
+
+
+
问题类别
+
4标题 / 发文 / 格式 / 其他
+
+
+ +
+ 错误 31 + 警告 92 + 提示 0 +
+
+
+ +
+ + +
+
+
+

问题明细

+ 保留当前报告语义,只收敛版式和视觉层级 +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
编号规则严重度类别位置说明
F-c0dfd361 + GW-T-001 + 标题文种合规性 + error标题P-1 () +
目标实体「title」未识别到
+
原文:未识别到标题内容,无法继续执行标题文种合规校验。
+
建议:补全标题并确保标题文种符合规则要求。
+
F-a896eaa4 + GW-N-001 + 发文字号必须用六角括号 + error发文P-1 () +
目标实体「doc_number」未识别到
+
原文:未识别到发文字号,年份括号规则无法匹配。
+
建议:发文字号年份应用六角括号〔〕,不得使用方括号或圆括号。
+
F-087a4841 + GW-F-003 + 二级标题用楷体三号 + error格式P35 (heading_2) +
字体或字号不符合(实际 仿宋 Nonept,期望 楷体 16pt)
+
原文:(一)甲方从乙方处购买:
+
建议:二级标题应使用楷体三号,保持同级标题样式一致。
+
F-37b4bb81 + GW-F-003 + 二级标题用楷体三号 + error格式P39 (heading_2) +
字体或字号不符合(实际 仿宋 Nonept,期望 楷体 16pt)
+
原文:(二)质量要求:
+
建议:这一类同级标题可在正式版中支持折叠聚合同规则项。
+
F-b2140a78 + GW-F-003 + 二级标题用楷体三号 + warning格式P62 (heading_2) +
格式接近但未完全满足规则要求
+
原文:(一)交付时间: 。
+
建议:保留原有规则说明内容,只把告警与错误的视觉层级拉开。
+
+
+
+
+ + diff --git a/fastapi_modules/fastapi_leaudit/domian/vo/ragChatVo.py b/fastapi_modules/fastapi_leaudit/domian/vo/ragChatVo.py index ddf8022..e020a16 100644 --- a/fastapi_modules/fastapi_leaudit/domian/vo/ragChatVo.py +++ b/fastapi_modules/fastapi_leaudit/domian/vo/ragChatVo.py @@ -34,6 +34,7 @@ class RagMessageItemVO(BaseModel): answer: str = Field(...) feedback: dict | None = Field(None) retrieverResources: list[dict] | None = Field(None) + suggestedQuestions: list[str] = Field(default_factory=list) createdAt: int = Field(0) diff --git a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py index 809442b..e591029 100644 --- a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py +++ b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py @@ -1,76 +1,594 @@ """把 AuditResult 渲染成单文件 HTML 报告。""" from __future__ import annotations + +from collections import Counter from html import escape + from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult _CSS = """ -body { font-family: -apple-system, "PingFang SC", sans-serif; margin: 0; padding: 24px; - background: #f7f7f9; color: #1a1a1a; } -.header { display: flex; align-items: center; gap: 16px; margin-bottom: 24px; } -.score { width: 96px; height: 96px; border-radius: 50%; - background: conic-gradient(#22c55e var(--p), #e5e7eb var(--p)); - display: grid; place-items: center; font-weight: 700; font-size: 22px; color: #111; } -.score-inner { background: white; width: 76px; height: 76px; border-radius: 50%; - display: grid; place-items: center; } -.tag { padding: 2px 8px; border-radius: 999px; font-size: 12px; } -.error { background: #fee2e2; color: #b91c1c; } -.warning { background: #fef9c3; color: #a16207; } -.info { background: #dbeafe; color: #1d4ed8; } -table { width: 100%; border-collapse: collapse; background: white; border-radius: 8px; - overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); } -th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid #f1f5f9; vertical-align: top; } -th { background: #f8fafc; font-size: 13px; } -td.msg { max-width: 480px; } -.context { color: #64748b; font-size: 12px; margin-top: 4px; } +* { box-sizing: border-box; } +html, body { margin: 0; padding: 0; } +body { + font-family: -apple-system, "PingFang SC", "Microsoft YaHei", sans-serif; + background: #f3f6f5; + color: #0f172a; +} +a { color: inherit; } +.page { + width: 100%; + padding: 20px 24px 32px; +} +.stack { + display: flex; + flex-direction: column; + gap: 20px; +} +.card { + background: #ffffff; + border: 1px solid #e2e8f0; + border-radius: 12px; + box-shadow: 0 1px 3px rgba(15, 23, 42, 0.08); + overflow: hidden; +} +.card-head { + height: 48px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 0 20px; + border-bottom: 1px solid #e2e8f0; + background: #fcfdfd; +} +.card-title { + font-size: 14px; + font-weight: 600; + color: #1e293b; +} +.card-subtitle { + font-size: 12px; + color: #64748b; +} +.summary-grid { + display: grid; + grid-template-columns: 220px minmax(0, 1fr); + gap: 20px; + padding: 20px; +} +.score-box { + border: 1px solid #cfe4dc; + background: #f7fbf9; + border-radius: 10px; + padding: 20px; +} +.score-label { + font-size: 12px; + font-weight: 500; + color: #475569; +} +.score-value { + margin-top: 12px; + font-size: 42px; + line-height: 1; + font-weight: 600; + letter-spacing: -0.05em; + color: #0f172a; +} +.score-track { + margin-top: 16px; + height: 8px; + background: #dbe8e3; + border-radius: 999px; + overflow: hidden; +} +.score-fill { + height: 100%; + background: #00684a; +} +.score-note { + margin-top: 16px; + font-size: 12px; + line-height: 1.75; + color: #475569; +} +.summary-main { + min-width: 0; +} +.eyebrow { + display: inline-flex; + align-items: center; + height: 28px; + padding: 0 12px; + border: 1px solid #cfe4dc; + border-radius: 6px; + background: #e8f3ef; + color: #00684a; + font-size: 12px; + font-weight: 500; +} +.report-title { + margin: 12px 0 0; + font-size: 32px; + line-height: 1.25; + letter-spacing: -0.03em; + font-weight: 600; + color: #0f172a; +} +.report-meta { + margin-top: 8px; + font-size: 15px; + color: #475569; +} +.metrics { + margin-top: 20px; + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 16px; +} +.metric { + border: 1px solid #e2e8f0; + border-radius: 10px; + background: #fcfdfd; + padding: 16px 20px; +} +.metric-label { + font-size: 13px; + font-weight: 500; + color: #64748b; +} +.metric-value { + margin-top: 12px; + display: flex; + align-items: baseline; + gap: 8px; +} +.metric-value strong { + font-size: 30px; + line-height: 1; + letter-spacing: -0.04em; + font-weight: 600; + color: #0f172a; +} +.metric-value span { + font-size: 13px; + color: #64748b; +} +.chips { + margin-top: 20px; + display: flex; + flex-wrap: wrap; + gap: 12px; +} +.chip, +.severity-tag { + display: inline-flex; + align-items: center; + border: 1px solid transparent; + border-radius: 6px; + font-weight: 600; +} +.chip { + height: 32px; + padding: 0 12px; + font-size: 12px; +} +.severity-tag { + height: 32px; + padding: 0 12px; + font-size: 12px; + text-transform: uppercase; +} +.error { + border-color: #fecaca; + background: #fef2f2; + color: #b91c1c; +} +.warning { + border-color: #fde68a; + background: #fffbeb; + color: #b45309; +} +.info { + border-color: #bfdbfe; + background: #eff6ff; + color: #1d4ed8; +} +.content-grid { + display: grid; + grid-template-columns: 340px minmax(0, 1fr); + gap: 20px; +} +.sidebar-body { + padding: 16px; + display: flex; + flex-direction: column; + gap: 16px; +} +.summary-row { + border: 1px solid #e2e8f0; + border-radius: 10px; + background: #fcfdfd; + padding: 16px; +} +.summary-row-label { + font-size: 12px; + font-weight: 500; + color: #64748b; +} +.summary-row-value { + margin-top: 8px; + font-size: 22px; + line-height: 1; + letter-spacing: -0.03em; + font-weight: 600; + color: #0f172a; +} +.summary-row-desc { + margin-top: 12px; + font-size: 13px; + line-height: 1.75; + color: #475569; +} +.table-toolbar { + height: 48px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 0 20px; + border-bottom: 1px solid #e2e8f0; + background: #fcfdfd; +} +.toolbar-left { + min-width: 0; +} +.toolbar-title { + font-size: 14px; + font-weight: 600; + color: #1e293b; +} +.toolbar-desc { + margin-top: 2px; + font-size: 12px; + color: #64748b; +} +.toolbar-filters { + display: flex; + gap: 8px; +} +.filter { + display: inline-flex; + align-items: center; + height: 32px; + padding: 0 12px; + border: 1px solid #e2e8f0; + border-radius: 6px; + background: #ffffff; + color: #64748b; + font-size: 12px; + font-weight: 500; +} +.filter.active { + border-color: rgba(0, 104, 74, 0.2); + background: #e8f3ef; + color: #00684a; +} +.table-wrap { + overflow-x: auto; +} +table { + width: 100%; + min-width: 1320px; + border-collapse: collapse; +} +thead tr { + background: #f8fafc; + color: #475569; + font-size: 13px; + font-weight: 500; +} +th { + padding: 16px 20px; + text-align: left; + border-bottom: 1px solid #e2e8f0; + white-space: nowrap; +} +td { + padding: 20px; + vertical-align: top; + border-bottom: 1px solid #f1f5f9; +} +tbody tr:hover { + background: #f8fafc; +} +.mono { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} +.id-cell { + font-size: 13px; + color: #64748b; +} +.rule-id { + font-size: 15px; + font-weight: 600; + color: #1e293b; +} +.rule-name { + margin-top: 4px; + font-size: 13px; + color: #64748b; +} +.category-cell { + font-size: 14px; + color: #334155; +} +.location-cell { + font-size: 13px; + color: #334155; +} +.message-cell { + min-width: 560px; +} +.message-main { + font-size: 15px; + line-height: 1.8; + color: #0f172a; +} +.context-box, +.suggestion-box { + margin-top: 12px; + border-radius: 6px; + padding: 12px 16px; + font-size: 13px; + line-height: 1.8; +} +.context-box { + border: 1px solid #e2e8f0; + background: #f8fafc; + color: #475569; +} +.suggestion-box { + border: 1px solid #cfe4dc; + background: #f4faf7; + color: #0d6b4d; +} +.empty { + padding: 24px 20px; + text-align: center; + color: #64748b; + font-size: 14px; +} +@media (max-width: 1200px) { + .summary-grid, + .content-grid { + grid-template-columns: 1fr; + } + .metrics { + grid-template-columns: repeat(2, minmax(0, 1fr)); + } +} +@media (max-width: 720px) { + .page { + padding: 16px; + } + .metrics { + grid-template-columns: 1fr; + } + .table-toolbar, + .card-head { + height: auto; + min-height: 48px; + padding-top: 12px; + padding-bottom: 12px; + align-items: flex-start; + flex-direction: column; + } +} """ def render_html(result: AuditResult) -> str: - s = result.summary - score = s.score - pct = f"{score}%" - rows = [] - for f in result.findings: - loc = f.location - suggest = ( - f'
建议: {escape(f.suggestion)}
' - if f.suggestion else "" - ) - rows.append(f""" - - {escape(f.finding_id)} - {escape(f.rule_id)}
{escape(f.rule_name)} - {f.severity} - {escape(f.category)} - P{loc.paragraph_index} ({escape(loc.role or '')}) - {escape(f.message)} -
原文: {escape((loc.context or '')[:80])}
- {suggest} - -""") + summary = result.summary + score = int(summary.score or 0) + score_pct = max(0, min(score, 100)) + severity_counts = _severity_counts(result) + category_count = len([key for key, value in (summary.by_category or {}).items() if key and value]) + filename = escape(str(result.document.get("filename", ""))) + top_rule_id, top_rule_count = _top_rule(result) + line_range = _line_range(result) + entity_summary = _entity_summary(result) - body = f""" -公文审核报告 - -
-
{score}
-
-

公文格式审核报告

-
{escape(result.document.get('filename', ''))} · 共 {s.total_findings} 项
-
- 错误 {s.by_severity.get('error', 0)} - 警告 {s.by_severity.get('warning', 0)} - 提示 {s.by_severity.get('info', 0)} + rows = [] + for finding in result.findings: + location_label = _format_location(finding.location.paragraph_index) + context = escape((finding.location.context or "").strip()) + message = escape(finding.message) + suggestion = escape(finding.suggestion) if finding.suggestion else "按规则要求修正对应内容。" + + rows.append( + f""" + + {escape(finding.finding_id)} + +
{escape(finding.rule_id)}
+
{escape(finding.rule_name)}
+ + {escape(finding.severity)} + {escape(finding.category)} + {location_label} + +
{message}
+
原文:{context or "未提取到上下文"}
+
建议:{suggestion}
+ +""" + ) + + return f""" + + + + + 公文审核报告 + + + +
+
+
+
+
报告摘要
+
不改报告语义,仅收敛样式、配色与信息层级
+
+
+
+
综合得分
+
{score}
+
+
这份正式 HTML 报告沿用平台工作台的版式语言,突出摘要、明细和建议三层信息。
+
+ +
+
正式报告样式方向
+

公文格式审核报告

+
{filename} · 共 {summary.total_findings} 项问题 · 用作正式 HTML 报告输出
+ +
+
+
错误项
+
{severity_counts["error"]}error
+
+
+
警告项
+
{severity_counts["warning"]}warning
+
+
+
提示项
+
{severity_counts["info"]}info
+
+
+
问题类别
+
{category_count}标题 / 发文 / 格式 / 其他
+
+
+ +
+ 错误 {severity_counts["error"]} + 警告 {severity_counts["warning"]} + 提示 {severity_counts["info"]} +
+
+
+
+ +
+ + +
+
+
+
问题明细
+
保留当前报告语义,只收敛版式、层级和配色。
+
+
+ 全部 + 错误 + 警告 +
+
+
+ + + + + + + + + + + + + {''.join(rows) or ''} + +
编号规则严重度类别位置说明
未发现问题
+
+
+
-
- - - - - {''.join(rows) or ''} -
编号规则严重度类别位置说明
未发现问题
-""" - return body + +""" + + +def _severity_counts(result: AuditResult) -> dict[str, int]: + counts = Counter(finding.severity for finding in result.findings) + return { + "error": counts.get("error", 0), + "warning": counts.get("warning", 0), + "info": counts.get("info", 0), + } + + +def _top_rule(result: AuditResult) -> tuple[str, int]: + counter = Counter(finding.rule_id for finding in result.findings if finding.rule_id) + if not counter: + return "无", 0 + rule_id, count = counter.most_common(1)[0] + return rule_id, count + + +def _line_range(result: AuditResult) -> str: + indices = sorted( + { + int(finding.location.paragraph_index) + 1 + for finding in result.findings + if finding.location.paragraph_index is not None + } + ) + if not indices: + return "未定位" + if len(indices) == 1: + return f"第 {indices[0]} 行" + return f"第 {indices[0]} 行 - 第 {indices[-1]} 行" + + +def _entity_summary(result: AuditResult) -> str: + expected = ["title", "doc_number", "recipient", "date"] + missing = [key for key in expected if not result.entities.get(key)] + if not missing: + return "核心实体齐全" + if len(missing) == len(expected): + return "标题 / 发文" + return "缺少 " + " / ".join(missing[:2]) + + +def _format_location(paragraph_index: int | None) -> str: + if paragraph_index is None: + return "未定位" + return f"第 {int(paragraph_index) + 1} 行" diff --git a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py index 2e658e4..5ba5d9f 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py @@ -7,7 +7,7 @@ import json import mimetypes import time from dataclasses import dataclass -from datetime import datetime +from datetime import date, datetime from pathlib import Path from typing import Any @@ -60,6 +60,20 @@ class GovdocServiceImpl(IGovdocService): self.OssService = OssService or OssServiceImpl() self.Storage = StorageAdapter() + def _parse_date_filter(self, value: str | None, field_name: str) -> date | None: + if value is None: + return None + normalized = value.strip() + if not normalized: + return None + try: + return date.fromisoformat(normalized) + except ValueError as exc: + raise LeauditException( + StatusCodeEnum.HTTP_400_BAD_REQUEST, + f"{field_name} 格式非法,应为 YYYY-MM-DD", + ) from exc + # ── 文档 ────────────────────────────────────────────── async def UploadDocument( @@ -250,12 +264,14 @@ class GovdocServiceImpl(IGovdocService): if resultStatus: filters.append("COALESCE(gr.result_status, '') = :result_status") params["result_status"] = resultStatus.strip() - if dateFrom: - filters.append("d.created_at >= CAST(:date_from AS date)") - params["date_from"] = dateFrom.strip() - if dateTo: - filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')") - params["date_to"] = dateTo.strip() + parsedDateFrom = self._parse_date_filter(dateFrom, "dateFrom") + parsedDateTo = self._parse_date_filter(dateTo, "dateTo") + if parsedDateFrom: + filters.append("d.created_at::date >= :date_from") + params["date_from"] = parsedDateFrom + if parsedDateTo: + filters.append("d.created_at::date <= :date_to") + params["date_to"] = parsedDateTo whereClause = " AND ".join(filters) @@ -901,9 +917,10 @@ class GovdocServiceImpl(IGovdocService): artifact = await self._get_report_artifact(runId, "html_report") if not artifact: return {"runId": runId, "htmlUrl": ""} + content = await self.OssService.DownloadBytes(str(artifact["oss_url"])) return { "runId": runId, - "htmlUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])), + "html": content.decode("utf-8"), } async def GetReportDocx(self, runId: int) -> dict[str, Any]: diff --git a/fastapi_modules/fastapi_leaudit/services/impl/ragChatServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/ragChatServiceImpl.py index f97faf1..33cf0bb 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/ragChatServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/ragChatServiceImpl.py @@ -4,6 +4,7 @@ import json import uuid from typing import AsyncGenerator +import httpx from sqlalchemy import text from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession @@ -25,6 +26,7 @@ from fastapi_modules.fastapi_leaudit.domian.vo.ragChatVo import ( RagMessagePageVO, RagOperationResultVO, ) +from fastapi_modules.fastapi_leaudit.rag_engine.config import RAG_CONFIG from fastapi_modules.fastapi_leaudit.rag_engine.generator import generate_stream from fastapi_modules.fastapi_leaudit.rag_engine.question_chains import generate_followups from fastapi_modules.fastapi_leaudit.services.ragChatService import IRagChatService @@ -194,7 +196,7 @@ class RagChatServiceImpl(IRagChatService): await session.execute( text( """ - SELECT message_id, role, content, sources, feedback, created_at + SELECT message_id, role, content, sources, metadata, feedback, created_at FROM rag_message WHERE conversation_id = :conversation_id ORDER BY created_at ASC @@ -216,6 +218,11 @@ class RagChatServiceImpl(IRagChatService): row = items[idx] if row["role"] == "user": answer = items[idx + 1] if idx + 1 < len(items) and items[idx + 1]["role"] == "assistant" else None + answer_sources = self._parse_json_field(answer.get("sources")) if answer else [] + answer_metadata = self._parse_json_field(answer.get("metadata")) if answer else {} + suggested_questions = answer_metadata.get("suggested_questions") if isinstance(answer_metadata, dict) else [] + if not isinstance(suggested_questions, list): + suggested_questions = [] data.append( RagMessageItemVO( id=(answer["message_id"] if answer else row["message_id"]), @@ -223,7 +230,8 @@ class RagChatServiceImpl(IRagChatService): query=row["content"], answer=answer["content"] if answer else "", feedback=({"rating": answer["feedback"]} if answer and answer.get("feedback") else None), - retrieverResources=(answer.get("sources") if answer else None), + retrieverResources=answer_sources or None, + suggestedQuestions=[str(item) for item in suggested_questions], createdAt=int(row["created_at"].timestamp()) if row.get("created_at") else 0, ) ) @@ -392,6 +400,18 @@ class RagChatServiceImpl(IRagChatService): area = row.get("area") or "" return area in ("", "省级", user_area or "") or bool(row.get("dataset_public")) + def _parse_json_field(self, value): + if value is None: + return {} + if isinstance(value, (dict, list)): + return value + if isinstance(value, str): + try: + return json.loads(value) + except Exception: + return {} + return {} + async def _ensure_conversation(self, user_id: int, conversation_id: str | None, app_id: int | None) -> str: if conversation_id and conversation_id != "-1": async with GetAsyncSession() as session: @@ -450,7 +470,7 @@ class RagChatServiceImpl(IRagChatService): await session.execute( text( """ - SELECT id, name, collection_name, retrieval_model + SELECT id, name, collection_name, retrieval_model, embedding_model FROM rag_dataset WHERE id = :dataset_id AND deleted_at IS NULL LIMIT 1 @@ -475,7 +495,12 @@ class RagChatServiceImpl(IRagChatService): return [], dataset.get("name") or "" try: collection = get_chroma().get_or_create_collection(dataset["collection_name"]) - result = collection.query(query_texts=[query], n_results=max(top_k, 1)) + query_embedding = await self._embed_texts([query], dataset.get("embedding_model") or "") + result = collection.query( + query_embeddings=query_embedding, + n_results=max(top_k, 1), + include=["documents", "metadatas", "distances"], + ) docs = (result.get("documents") or [[]])[0] metas = (result.get("metadatas") or [[]])[0] distances = (result.get("distances") or [[]])[0] @@ -483,7 +508,8 @@ class RagChatServiceImpl(IRagChatService): for idx, doc in enumerate(docs): meta = metas[idx] if idx < len(metas) else {} dist = distances[idx] if idx < len(distances) else 0.0 - score = 1 - float(dist or 0.0) + distance = max(0.0, float(dist or 0.0)) + score = 1.0 / (1.0 + distance) if score_threshold is not None and score < score_threshold: continue chunks.append( @@ -501,6 +527,46 @@ class RagChatServiceImpl(IRagChatService): except Exception: return [], dataset.get("name") or "" + async def _embed_texts(self, texts: list[str], model_name: str) -> list[list[float]]: + embed_url = (RAG_CONFIG.get("EMBED_URL") or "").strip() or f"{RAG_CONFIG['LLM_BASE_URL'].rstrip('/')}/embeddings" + embed_key = (RAG_CONFIG.get("EMBED_KEY") or "").strip() or RAG_CONFIG["LLM_API_KEY"] + embed_model = model_name or (RAG_CONFIG.get("EMBED_MODEL") or "").strip() or "text-embedding-v4" + batch_size = max(1, int(RAG_CONFIG.get("EMBED_BATCH_SIZE") or 10)) + if not embed_url or not embed_key: + raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "未配置可用的向量化服务") + + embeddings: list[list[float]] = [] + async with httpx.AsyncClient(timeout=120.0) as client: + for start in range(0, len(texts), batch_size): + batch_texts = texts[start:start + batch_size] + try: + response = await client.post( + embed_url, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {embed_key}", + }, + json={"model": embed_model, "input": batch_texts}, + ) + response.raise_for_status() + except httpx.HTTPStatusError as exc: + error_message = exc.response.text.strip() or f"{exc.response.status_code} {exc.response.reason_phrase}" + raise LeauditException( + StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, + f"向量化服务调用失败: {error_message[:300]}", + ) from exc + + payload = response.json() + rows = payload.get("data") or [] + batch_embeddings = [row.get("embedding") for row in rows if isinstance(row, dict) and row.get("embedding")] + if len(batch_embeddings) != len(batch_texts): + raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常") + embeddings.extend(batch_embeddings) + + if len(embeddings) != len(texts): + raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常") + return embeddings + def _build_sources(self, context_chunks: list[dict], dataset_name: str) -> list[dict]: return [ { diff --git a/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py index 893290d..360988e 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py @@ -1186,7 +1186,7 @@ class RagDatasetServiceImpl(IRagDatasetService): content = documents[index] if index < len(documents) else "" metadata = metadatas[index] if index < len(metadatas) and isinstance(metadatas[index], dict) else {} distance = float(distances[index]) if index < len(distances) and distances[index] is not None else 1.0 - score = max(0.0, min(1.0, 1.0 - distance)) + score = max(0.0, min(1.0, 1.0 / (1.0 + max(0.0, distance)))) if score_threshold_enabled and score_threshold is not None and score < score_threshold: continue diff --git a/leaudit.sh b/leaudit.sh index 32dbc2d..3c329c6 100755 --- a/leaudit.sh +++ b/leaudit.sh @@ -167,10 +167,8 @@ start_backend() { log_info "启动后端服务 (端口: $BACKEND_PORT)..." : > "$BACKEND_LOG" - ( - cd "$BACKEND_DIR" - exec "$BACKEND_PYTHON" run.py - ) >> "$BACKEND_LOG" 2>&1 & + nohup bash -lc "cd \"$BACKEND_DIR\" && exec \"$BACKEND_PYTHON\" run.py" \ + >> "$BACKEND_LOG" 2>&1 < /dev/null & pid=$! sleep 2 @@ -201,10 +199,8 @@ start_frontend() { log_info "启动前端开发服务 (端口: $FRONTEND_DEV_PORT,代理入口: $FRONTEND_PUBLIC_PORT)..." : > "$FRONTEND_LOG" - ( - cd "$FRONTEND_DIR" - exec npm run dev:dev - ) >> "$FRONTEND_LOG" 2>&1 & + nohup bash -lc "cd \"$FRONTEND_DIR\" && exec npm run dev:dev" \ + >> "$FRONTEND_LOG" 2>&1 < /dev/null & pid=$! sleep 4 @@ -238,10 +234,8 @@ start_worker() { log_info "启动 Worker 服务..." : > "$WORKER_LOG" - ( - cd "$PROJECT_DIR" - exec "$WORKER_SCRIPT" - ) >> "$WORKER_LOG" 2>&1 & + nohup bash -lc "cd \"$PROJECT_DIR\" && exec \"$WORKER_SCRIPT\"" \ + >> "$WORKER_LOG" 2>&1 < /dev/null & pid=$! echo "$pid" > "$WORKER_PID_FILE" sleep 2 @@ -273,10 +267,8 @@ start_beat() { log_info "启动 Beat 调度服务..." : > "$BEAT_LOG" - ( - cd "$PROJECT_DIR" - exec "$BEAT_SCRIPT" - ) >> "$BEAT_LOG" 2>&1 & + nohup bash -lc "cd \"$PROJECT_DIR\" && exec \"$BEAT_SCRIPT\"" \ + >> "$BEAT_LOG" 2>&1 < /dev/null & pid=$! echo "$pid" > "$BEAT_PID_FILE" sleep 2 diff --git a/legal-platform-frontend b/legal-platform-frontend index f6bb4aa..d541eb7 160000 --- a/legal-platform-frontend +++ b/legal-platform-frontend @@ -1 +1 @@ -Subproject commit f6bb4aa5524ee4325bdd871c5f7a21b1543f8d80 +Subproject commit d541eb74aee5d6668ac3bd6720db4a3b0eeb896e diff --git a/scripts/regenerate_govdoc_html_report.py b/scripts/regenerate_govdoc_html_report.py new file mode 100644 index 0000000..5c307c4 --- /dev/null +++ b/scripts/regenerate_govdoc_html_report.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +"""按已有 govdoc run 重生成 HTML 报告并覆盖 OSS 产物。""" + +from __future__ import annotations + +import argparse +import asyncio +import hashlib +import json +from typing import Any + +from sqlalchemy import text + +from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession +from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import ( + AuditResult, + AuditSummary, + CheckedRule, + OutlineNode, + StructureItem, +) +from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Finding, Location +from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity +from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html +from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl + + +def _parse_json(raw: Any) -> Any: + if raw is None or raw == "": + return None + if isinstance(raw, (dict, list)): + return raw + try: + return json.loads(raw) + except Exception: + return None + + +def _build_checked_rules(rule_rows: list[dict[str, Any]]) -> list[CheckedRule]: + checked_rules: list[CheckedRule] = [] + seen_rule_ids: set[str] = set() + for row in rule_rows: + rule_id = str(row["rule_id"]) + if rule_id in seen_rule_ids: + continue + seen_rule_ids.add(rule_id) + status = str(row.get("result") or "pass") + checked_rules.append( + CheckedRule( + rule_id=rule_id, + name=row.get("rule_name") or rule_id, + severity=row.get("severity") or "info", + category=row.get("category") or "", + status=status if status in {"pass", "fail", "skipped"} else "pass", + skip_reason=row.get("skip_reason") or "", + ) + ) + return checked_rules + + +def _build_findings(rule_rows: list[dict[str, Any]]) -> list[Finding]: + findings: list[Finding] = [] + for index, row in enumerate(rule_rows): + if row.get("result") != "fail": + continue + paragraph_index = int(row.get("paragraph_index") or 0) + findings.append( + Finding( + finding_id=f"{row['rule_id']}-{paragraph_index or index}", + rule_id=str(row["rule_id"]), + rule_name=row.get("rule_name") or str(row["rule_id"]), + severity=row.get("severity") or "info", + category=row.get("category") or "", + location=Location( + paragraph_index=paragraph_index, + role=row.get("location_path"), + char_start=0, + char_end=0, + context=row.get("paragraph_text") or "", + ), + actual=_parse_json(row.get("actual")) or {}, + expected=_parse_json(row.get("expected")) or {}, + message=row.get("message") or "", + suggestion=row.get("suggestion") or "", + evidence=str(row.get("evidence") or ""), + confidence=1.0, + ) + ) + return findings + + +def _build_summary(run_row: dict[str, Any], findings: list[Finding]) -> AuditSummary: + severity_stats: dict[str, int] = {} + category_stats: dict[str, int] = {} + for finding in findings: + severity_stats[finding.severity] = severity_stats.get(finding.severity, 0) + 1 + if finding.category: + category_stats[finding.category] = category_stats.get(finding.category, 0) + 1 + return AuditSummary( + score=int(float(run_row.get("total_score") or 0)), + total_findings=len(findings), + by_severity=severity_stats, + by_category=category_stats, + passed_count=int(run_row.get("passed_count") or 0), + failed_count=int(run_row.get("failed_count") or 0), + skipped_count=int(run_row.get("skipped_count") or 0), + ) + + +def _normalize_structure_item(item: dict[str, Any]) -> dict[str, Any]: + return { + "role": item.get("role"), + "label": item.get("label") or "", + "count": item.get("count") or 0, + "expected": bool(item.get("expected", False)), + "paragraph_indices": item.get("paragraphIndices") or item.get("paragraph_indices") or [], + "samples": item.get("samples") or [], + "char_total": item.get("charTotal") or item.get("char_total") or 0, + "dominant_font": item.get("dominantFont") or item.get("dominant_font"), + "dominant_size_pt": item.get("dominantSizePt") or item.get("dominant_size_pt"), + "style_uniform": bool(item.get("styleUniform", item.get("style_uniform", True))), + } + + +def _normalize_outline_node(item: dict[str, Any]) -> dict[str, Any]: + return { + "paragraph_index": item.get("paragraphIndex") or item.get("paragraph_index") or 0, + "level": item.get("level") or 0, + "text": item.get("text") or "", + "children": [_normalize_outline_node(child) for child in (item.get("children") or [])], + } + + +async def regenerate_html_report(run_id: int) -> None: + oss_service = OssServiceImpl() + + async with GetAsyncSession() as session: + run_row = ( + await session.execute( + text( + """ + SELECT + gr.id, + gr.document_id, + gr.total_score, + gr.passed_count, + gr.failed_count, + gr.skipped_count, + gr.result_summary_json, + d.region, + f.file_name + FROM govdoc_runs gr + JOIN leaudit_documents d + ON d.id = gr.document_id + AND d.deleted_at IS NULL + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.file_role = 'original' + AND f.is_active = true + AND f.deleted_at IS NULL + WHERE gr.id = :run_id + AND gr.deleted_at IS NULL + LIMIT 1 + """ + ), + {"run_id": run_id}, + ) + ).mappings().first() + if not run_row: + raise RuntimeError(f"run {run_id} 不存在") + + rule_rows = ( + await session.execute( + text( + """ + SELECT + rule_id, + rule_name, + severity, + category, + result, + skip_reason, + message, + suggestion, + actual, + expected, + evidence, + paragraph_index, + paragraph_text, + location_path + FROM govdoc_rule_results + WHERE run_id = :run_id + AND deleted_at IS NULL + ORDER BY id ASC + """ + ), + {"run_id": run_id}, + ) + ).mappings().all() + + artifact_row = ( + await session.execute( + text( + """ + SELECT id, file_name, oss_url + FROM govdoc_report_artifacts + WHERE run_id = :run_id + AND artifact_type = 'html_report' + AND deleted_at IS NULL + ORDER BY id DESC + LIMIT 1 + """ + ), + {"run_id": run_id}, + ) + ).mappings().first() + if not artifact_row: + raise RuntimeError(f"run {run_id} 没有 html_report 产物记录") + + aux = _parse_json(run_row.get("result_summary_json")) or {} + findings = _build_findings(rule_rows) + result = AuditResult( + audit_id=str(run_id), + document={ + "documentId": int(run_row["document_id"]), + "filename": run_row.get("file_name") or "", + }, + summary=_build_summary(run_row, findings), + findings=findings, + checked_rules=_build_checked_rules(rule_rows), + structure=[ + StructureItem.model_validate(_normalize_structure_item(item)) + for item in aux.get("structure", []) + ], + outline=[ + OutlineNode.model_validate(_normalize_outline_node(item)) + for item in aux.get("outline", []) + ], + entities={ + name: SemanticEntity.model_validate(value) + for name, value in (aux.get("entities") or {}).items() + if value is not None + }, + ) + + html = render_html(result) + html_bytes = html.encode("utf-8") + sha256 = hashlib.sha256(html_bytes).hexdigest() + + await oss_service.UploadText( + ObjectKey=str(artifact_row["oss_url"]), + Content=html, + ContentType="text/html; charset=utf-8", + ) + + await session.execute( + text( + """ + UPDATE govdoc_report_artifacts + SET file_size = :file_size, + sha256 = :sha256, + mime_type = 'text/html; charset=utf-8', + updated_at = now() + WHERE id = :artifact_id + """ + ), + { + "artifact_id": int(artifact_row["id"]), + "file_size": len(html_bytes), + "sha256": sha256, + }, + ) + await session.commit() + + print( + json.dumps( + { + "runId": run_id, + "documentId": int(run_row["document_id"]), + "fileName": run_row.get("file_name") or "", + "artifactOssKey": artifact_row["oss_url"], + "htmlBytes": len(html_bytes), + "sha256": sha256, + }, + ensure_ascii=False, + ) + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="重生成 govdoc HTML 报告") + parser.add_argument("run_id", type=int, help="govdoc run id") + args = parser.parse_args() + asyncio.run(regenerate_html_report(args.run_id)) + + +if __name__ == "__main__": + main()