diff --git a/app.toml b/app.toml index c2c87d7..dfb0181 100644 --- a/app.toml +++ b/app.toml @@ -33,12 +33,12 @@ BUCKET = "leaudit" REGION = "" [LLM] -BASE_URL = "https://hub.leke.run/qwen/v1/chat/completions" +BASE_URL = "https://hub.leke.run/qwen/v1" MODEL = "qwen3.5-35b-a3b" API_KEY = "sk-6c7466b543b947ffadc50a5d79135712" [VLM] -BASE_URL = "https://hub.leke.run/qwen/v1/chat/completions" +BASE_URL = "https://hub.leke.run/qwen/v1" MODEL = "qwen3.5-35b-a3b" API_KEY = "sk-6c7466b543b947ffadc50a5d79135712" diff --git a/docs/内部公文模块/内部公文前端拆分实施清单.md b/docs/内部公文模块/内部公文前端拆分实施清单.md new file mode 100644 index 0000000..b71ba2a --- /dev/null +++ b/docs/内部公文模块/内部公文前端拆分实施清单.md @@ -0,0 +1,513 @@ +# 内部公文前端拆分实施清单 + +## 1. 文档目的 + +本文档只解决一个问题: + +- 在不改变“内部公文”业务语义的前提下,如何把当前前端实现拆成一套与“交叉评查”同级的独立页面架构 + +本文档关注的是: + +- 页面编排边界 +- 组件职责边界 +- `govdoc` 与 `reviews / cross-checking` 的复用边界 +- 分阶段实施顺序 + +本文档不做以下事情: + +- 不改后端业务语义 +- 不要求照搬旧项目代码 +- 不把 `Collabora` 当成整个中栏预览架构 + +--- + +## 2. 结论先行 + +内部公文前端应按以下原则重构: + +> **像交叉评查一样独立成页,但复用 reviews 的定位型预览能力。** + +准确解释如下: + +- 内部公文应有自己独立的页面 orchestrator +- 内部公文应有自己独立的业务组件层 +- 内部公文应有自己独立的 TS service / adapter 层 +- 中栏 PDF / DOCX 预览不应重新发明一套,而应优先复用 `reviews` 已有能力 +- `Collabora` 只应作为 DOCX viewer,不应承担“问题定位主架构” + +因此,目标不是: + +- 把当前 `govdoc-audit` 页面继续补丁式扩写 + +而是: + +- 把内部公文前端收敛为“独立页面编排 + 统一预览协议 + 独立业务壳”的平台化实现 + +--- + +## 3. 当前实现现状 + +## 3.1 当前内部公文前端入口 + +当前内部公文详情页主入口为: + +- [components/govdoc-audit/audit.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audit.tsx:1) + +当前内部公文列表页主入口为: + +- [components/govdoc-audit/audits.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audits.tsx:1) + +当前路由入口为: + +- [app/(audit)/govdoc/audits/page.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/govdoc/audits/page.tsx:1) +- [app/(audit)/govdoc/detail/[documentId]/page.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/govdoc/detail/[documentId]/page.tsx:1) + +当前详情页已经具备: + +- 顶部摘要与报告下载操作 +- 评查 / 结构 / 大纲 / 实体 tab +- 中栏文档视图 +- 右栏 findings / checked rules 展示 + +问题不在于“没有功能”,而在于“页面职责混装”。 + +--- + +## 3.2 当前详情页耦合点 + +当前 [audit.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/audit.tsx:1) 同时承担了以下职责: + +- 页面数据加载 +- 顶部操作区渲染 +- tab 状态切换 +- 结果统计条渲染 +- 中栏文档视图调度 +- 右栏问题面板调度 +- 规则弹窗调度 + +这会带来三个问题: + +- 页面 orchestrator 和业务组件未分层 +- 中栏预览协议没有向平台现有 `reviews` 能力对齐 +- 右栏问题区与 `reviews / cross-checking` 的定位交互无法复用 + +--- + +## 3.3 当前中栏预览为什么不应继续沿现状扩写 + +当前内部公文中栏主要使用: + +- [components/govdoc-audit/doc-view.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/govdoc-audit/doc-view.tsx:1) + +而平台现有成熟的“定位型预览”能力在: + +- [components/reviews/previewComponents/PdfPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/PdfPreviewTest.tsx:1) +- [components/reviews/previewComponents/DocxPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/DocxPreviewTest.tsx:1) +- [app/(audit)/reviews-test/ReviewsTestClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/reviews-test/ReviewsTestClient.tsx:1) +- [app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx:1) + +必须明确: + +- PDF 中栏定位主能力不是 `Collabora` +- DOCX 中栏当前虽然使用 `CollaboraViewer`,但它承担的是文档渲染,不是完整的问题定位架构 + +如果内部公文要做到: + +- 点击问题点后定位到对应页 +- 对问题字段/段落做高亮 +- 为后续“问题行定位”保留升级空间 + +则中栏必须对齐现有平台预览输入协议,而不是继续把 `DocView` 做成一套孤岛实现。 + +--- + +## 4. 目标架构 + +## 4.1 总体原则 + +前端目标架构应满足以下四条: + +- 内部公文页面独立编排 +- 中栏预览能力平台复用 +- 业务面板 govdoc 自治 +- 数据适配集中在 adapter 层 + +可以概括为: + +> **govdoc 自己负责业务壳,platform 负责通用预览能力。** + +--- + +## 4.2 目标目录结构 + +建议拆分为以下结构: + +```text +legal-platform-frontend/ + app/(audit)/govdoc/ + audits/page.tsx + detail/[documentId]/page.tsx + + components/govdoc-audit/ + GovdocAuditListPage.tsx + GovdocAuditResultPage.tsx + GovdocSummaryHeader.tsx + GovdocFindingPanel.tsx + GovdocStructurePanel.tsx + GovdocOutlinePanel.tsx + GovdocEntityPanel.tsx + GovdocReportActions.tsx + + lib/api/govdoc-audit/ + api.ts + types.ts + adapters.ts + govdoc-routes.ts +``` + +说明如下: + +- `page.tsx` 只保留路由入口职责 +- `GovdocAuditResultPage.tsx` 负责详情页 orchestrator +- `GovdocAuditListPage.tsx` 负责列表页 orchestrator +- `Govdoc*Panel` 负责内部公文独有业务视图 +- `adapters.ts` 负责把 govdoc 后端返回结果转成前端视图模型 + +--- + +## 4.3 页面编排职责 + +### 详情页 orchestrator + +建议新增: + +- `components/govdoc-audit/GovdocAuditResultPage.tsx` + +该组件只负责: + +- 读取 `documentId / runId` +- 调用 govdoc API +- 维护 tab 状态 +- 维护当前激活问题点 +- 维护当前预览定位目标 +- 组装中栏与右栏 + +它不应承担: + +- 具体 finding 卡片渲染细节 +- 实体/结构/大纲具体 UI 细节 +- 预览底层渲染逻辑 + +这部分应当参照: + +- [CrossCheckingResultClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/cross-checking/result/CrossCheckingResultClient.tsx:1) + +--- + +### 列表页 orchestrator + +建议新增: + +- `components/govdoc-audit/GovdocAuditListPage.tsx` + +该组件负责: + +- 列表数据加载 +- 筛选状态 +- 批量操作状态 +- 导出与删除 +- 跳转详情页 + +它应继续保持内部公文自己的筛选语义,但 UI 节奏应向平台文档列表页靠拢。 + +列表页设计参照: + +- [DocumentsListClient.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/app/(audit)/documents/list/DocumentsListClient.tsx:1) + +--- + +## 5. 与 reviews / cross-checking 的复用边界 + +## 5.1 应复用的能力 + +内部公文应复用以下能力: + +- PDF 中栏预览组件 +- DOCX 中栏预览组件 +- 问题点点击后的预览定位协议 +- 页码跳转、高亮、bbox / charPositions 定位能力 + +优先复用对象: + +- [PdfPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/PdfPreviewTest.tsx:1) +- [DocxPreviewTest.tsx](/home/wren-dev/Porject/leaudit-platform/legal-platform-frontend/components/reviews/previewComponents/DocxPreviewTest.tsx:1) + +复用的是: + +- 预览能力 +- 定位协议 +- 用户交互模型 + +不是: + +- 合同业务语义 +- 卷宗业务命名 +- 旧页面外壳 + +--- + +## 5.2 不应复用的部分 + +以下部分不应直接复用: + +- `reviews-test` 自身的业务标题、业务字段命名 +- 合同/卷宗专有的右栏业务解释 +- `cross-checking` 的评分协同、提议投票、交叉意见面板 + +原因是: + +- 这些属于业务壳,而不是平台通用能力 + +内部公文应保留自己的: + +- findings 口径 +- checked rules 口径 +- 结构 / 大纲 / 实体口径 +- 报告下载口径 + +--- + +## 5.3 Collabora 的正确定位 + +`CollaboraViewer` 的边界必须明确: + +- 它是 DOCX viewer +- 它可以承担跳页、文本高亮、编辑/只读查看 +- 它不是内部公文详情页的业务 orchestrator +- 它也不是“问题行精确定位”的完整方案 + +因此: + +- `Collabora` 只能留在 `DocxPreviewTest` 这一层 +- 不应让 govdoc 页面继续直接围绕 `Collabora` 自己长出一套完整详情页体系 + +--- + +## 6. 必须新增的 adapter 层 + +## 6.1 为什么必须有 adapters.ts + +当前 `lib/api/govdoc-audit` 下已有: + +- `api.ts` +- `types.ts` +- `govdoc-routes.ts` + +但还缺一层: + +- `adapters.ts` + +这层必须存在,因为它承担的是“业务结果语义 -> 预览与页面视图语义”的转换。 + +如果没有这层,后果会是: + +- govdoc 页面自己维护一套 findings 展示模型 +- reviews 页面自己维护一套 preview target 模型 +- 同类定位交互会出现两套不兼容实现 + +--- + +## 6.2 adapters.ts 建议职责 + +`adapters.ts` 建议至少提供以下能力: + +- 将 `govdoc` 结果对象转换为结果页 view model +- 将 `finding / checked_rule` 转换为右栏展示项 +- 将 `finding / paragraph / entity` 转换为中栏跳转目标 +- 根据文件类型产出统一 preview target +- 将后端报告产物状态转换为按钮展示状态 + +建议输出的数据语义包括: + +- `previewKind` +- `previewPath` +- `activeTarget` +- `findingItems` +- `summaryCards` +- `reportActions` +- `structureItems` +- `outlineItems` +- `entityItems` + +这样后续页面层只编排,不解释后端字段细节。 + +--- + +## 7. 分阶段实施顺序 + +## 7.1 第一阶段:补 adapter,不改页面语义 + +目标: + +- 先把数据适配层补齐 + +动作: + +- 新增 `lib/api/govdoc-audit/adapters.ts` +- 收敛 `audit.tsx` 里对原始接口字段的直接解释 +- 把 preview target 语义统一为: + - `page` + - `highlightValue` + - `bboxHighlight` + - `charPositions` + +本阶段收益: + +- 不改用户可见业务逻辑 +- 为后续替换中栏和右栏做稳定基础 + +--- + +## 7.2 第二阶段:拆详情页 orchestrator + +目标: + +- 让 govdoc 详情页像 `cross-checking` 一样拥有独立 orchestrator + +动作: + +- 新增 `GovdocAuditResultPage.tsx` +- 将现有 `audit.tsx` 逻辑迁入新组件 +- 路由入口改为挂载新组件 +- 顶部摘要、下载操作、tab 切换拆成子组件 + +本阶段收益: + +- 页面职责清晰 +- 后续中栏和右栏可以独立演进 + +--- + +## 7.3 第三阶段:切换中栏到定位型预览 + +目标: + +- 内部公文详情页中栏不再以 `DocView` 为核心 + +动作: + +- 根据文件类型切换到 `PdfPreviewTest / DocxPreviewTest` +- 从 govdoc adapter 输出统一 preview target +- 让右栏点击直接驱动中栏定位 + +本阶段注意: + +- PDF 定位优先支持 `bboxHighlight / charPositions` +- DOCX 优先支持 `targetPage + highlightValue` +- 不承诺此阶段立即做到“DOCX 行级精确定位” + +--- + +## 7.4 第四阶段:重构右栏与 tab 业务壳 + +目标: + +- 保留 govdoc 自己的业务面板,但交互模型对齐平台 + +动作: + +- 将当前 `RightPanel` 重构为 `GovdocFindingPanel` +- 将结构、大纲、实体分面板组件化 +- 收敛旧的孤立交互状态 + +本阶段收益: + +- govdoc 保持业务独立 +- 同时具备平台统一的交互体验 + +--- + +## 7.5 第五阶段:样式与布局收口 + +目标: + +- govdoc 页面在视觉上向平台现有绿色主题和通用 panel 节奏靠齐 + +动作: + +- 减少 `.govdoc-audit-scope` 中重复定义 +- 优先复用 `layout-primitives.css` +- 保留必要的 govdoc 业务样式命名空间 + +本阶段原则: + +- 先统一布局和交互节奏 +- 再减少样式重复 +- 不先做“大改视觉” + +--- + +## 8. 风险点与前置条件 + +## 8.1 最大风险不在前端组件,而在定位数据颗粒度 + +内部公文要实现“定位到哪一行有问题”,前端只是承载层,真正决定上限的是后端给的数据。 + +前端能稳定消费的数据类型分为两类: + +- PDF: + - `page` + - `bbox` + - `page_box` + - `char_positions` +- DOCX: + - `targetPage` + - `highlightValue` + - 未来如果需要更高精度,还需要更细粒度锚点 + +如果后端只给: + +- 问题描述 +- 规则结果 + +而不给定位数据,那么前端最多只能做到: + +- 页级定位 +- 文本关键字高亮 + +不能承诺做到稳定的“行级定位”。 + +--- + +## 8.2 不应在这一阶段做的事情 + +以下动作不建议和本次拆分同时进行: + +- 重写 govdoc 全部视觉设计 +- 把 govdoc 规则语义改造成合同/卷宗语义 +- 试图把所有 `reviews` 业务组件直接搬进 govdoc +- 在没有 adapter 的情况下直接大规模替换页面 + +原因很简单: + +- 这些动作会把“前端分层重构”和“业务改动”混在一起,增加回归风险 + +--- + +## 9. 最终边界结论 + +内部公文前端的正确实现边界应锁定为: + +- **像交叉评查一样,独立成页** +- **像 reviews 一样,复用定位型预览能力** +- **像平台模块一样,数据解释集中在 adapter 层** +- **像内部公文自己一样,保留 findings / checked rules / structure / outline / entities 的业务语义** + +更直白地说: + +- `Govdoc 页面` 负责业务编排 +- `reviews 预览组件` 负责中栏定位能力 +- `Collabora` 只负责 DOCX 渲染 +- `adapters.ts` 负责把 govdoc 后端结果翻译成前端可复用语义 + +这就是内部公文前端后续实施的固定边界。 diff --git a/docs/内部公文模块/报告UI样例.html b/docs/内部公文模块/报告UI样例.html new file mode 100644 index 0000000..3e148d9 --- /dev/null +++ b/docs/内部公文模块/报告UI样例.html @@ -0,0 +1,654 @@ + + + + + + 内部公文报告 UI 样例 + + + +
+
+
+
+
+ 0 + 综合得分 +
+
+
当前样例沿用你提供的实际报告数据,不改业务语义
+
+ +
+
+
统一报告样式锚点
+

公文格式审核报告

+
买卖合同 (1).docx · 共 123 项问题 · 样例用于确认 UI / 配色方向
+
+ +
+
+
错误项
+
31error
+
+
+
警告项
+
92warning
+
+
+
提示项
+
0info
+
+
+
问题类别
+
4标题 / 发文 / 格式 / 其他
+
+
+ +
+ 错误 31 + 警告 92 + 提示 0 +
+
+
+ +
+ + +
+
+
+

问题明细

+ 保留当前报告语义,只收敛版式和视觉层级 +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
编号规则严重度类别位置说明
F-c0dfd361 + GW-T-001 + 标题文种合规性 + error标题P-1 () +
目标实体「title」未识别到
+
原文:未识别到标题内容,无法继续执行标题文种合规校验。
+
建议:补全标题并确保标题文种符合规则要求。
+
F-a896eaa4 + GW-N-001 + 发文字号必须用六角括号 + error发文P-1 () +
目标实体「doc_number」未识别到
+
原文:未识别到发文字号,年份括号规则无法匹配。
+
建议:发文字号年份应用六角括号〔〕,不得使用方括号或圆括号。
+
F-087a4841 + GW-F-003 + 二级标题用楷体三号 + error格式P35 (heading_2) +
字体或字号不符合(实际 仿宋 Nonept,期望 楷体 16pt)
+
原文:(一)甲方从乙方处购买:
+
建议:二级标题应使用楷体三号,保持同级标题样式一致。
+
F-37b4bb81 + GW-F-003 + 二级标题用楷体三号 + error格式P39 (heading_2) +
字体或字号不符合(实际 仿宋 Nonept,期望 楷体 16pt)
+
原文:(二)质量要求:
+
建议:这一类同级标题可在正式版中支持折叠聚合同规则项。
+
F-b2140a78 + GW-F-003 + 二级标题用楷体三号 + warning格式P62 (heading_2) +
格式接近但未完全满足规则要求
+
原文:(一)交付时间: 。
+
建议:保留原有规则说明内容,只把告警与错误的视觉层级拉开。
+
+
+
+
+ + diff --git a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py index 809442b..f19f59d 100644 --- a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py +++ b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py @@ -1,76 +1,616 @@ """把 AuditResult 渲染成单文件 HTML 报告。""" from __future__ import annotations + +from collections import Counter from html import escape + from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult _CSS = """ -body { font-family: -apple-system, "PingFang SC", sans-serif; margin: 0; padding: 24px; - background: #f7f7f9; color: #1a1a1a; } -.header { display: flex; align-items: center; gap: 16px; margin-bottom: 24px; } -.score { width: 96px; height: 96px; border-radius: 50%; - background: conic-gradient(#22c55e var(--p), #e5e7eb var(--p)); - display: grid; place-items: center; font-weight: 700; font-size: 22px; color: #111; } -.score-inner { background: white; width: 76px; height: 76px; border-radius: 50%; - display: grid; place-items: center; } -.tag { padding: 2px 8px; border-radius: 999px; font-size: 12px; } -.error { background: #fee2e2; color: #b91c1c; } -.warning { background: #fef9c3; color: #a16207; } -.info { background: #dbeafe; color: #1d4ed8; } -table { width: 100%; border-collapse: collapse; background: white; border-radius: 8px; - overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); } -th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid #f1f5f9; vertical-align: top; } -th { background: #f8fafc; font-size: 13px; } -td.msg { max-width: 480px; } -.context { color: #64748b; font-size: 12px; margin-top: 4px; } +* { box-sizing: border-box; } +html, body { margin: 0; padding: 0; } +body { + font-family: -apple-system, "PingFang SC", "Microsoft YaHei", sans-serif; + background: #f3f6f5; + color: #0f172a; +} +a { color: inherit; } +.page { + width: 100%; + padding: 20px 24px 32px; +} +.stack { + display: flex; + flex-direction: column; + gap: 20px; +} +.card { + background: #ffffff; + border: 1px solid #e2e8f0; + border-radius: 12px; + box-shadow: 0 1px 3px rgba(15, 23, 42, 0.08); + overflow: hidden; +} +.card-head { + height: 48px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 0 20px; + border-bottom: 1px solid #e2e8f0; + background: #fcfdfd; +} +.card-title { + font-size: 14px; + font-weight: 600; + color: #1e293b; +} +.card-subtitle { + font-size: 12px; + color: #64748b; +} +.summary-grid { + display: grid; + grid-template-columns: 220px minmax(0, 1fr); + gap: 20px; + padding: 20px; +} +.score-box { + border: 1px solid #cfe4dc; + background: #f7fbf9; + border-radius: 10px; + padding: 20px; +} +.score-label { + font-size: 12px; + font-weight: 500; + color: #475569; +} +.score-value { + margin-top: 12px; + font-size: 42px; + line-height: 1; + font-weight: 600; + letter-spacing: -0.05em; + color: #0f172a; +} +.score-track { + margin-top: 16px; + height: 8px; + background: #dbe8e3; + border-radius: 999px; + overflow: hidden; +} +.score-fill { + height: 100%; + background: #00684a; +} +.score-note { + margin-top: 16px; + font-size: 12px; + line-height: 1.75; + color: #475569; +} +.summary-main { + min-width: 0; +} +.eyebrow { + display: inline-flex; + align-items: center; + height: 28px; + padding: 0 12px; + border: 1px solid #cfe4dc; + border-radius: 6px; + background: #e8f3ef; + color: #00684a; + font-size: 12px; + font-weight: 500; +} +.report-title { + margin: 12px 0 0; + font-size: 32px; + line-height: 1.25; + letter-spacing: -0.03em; + font-weight: 600; + color: #0f172a; +} +.report-meta { + margin-top: 8px; + font-size: 15px; + color: #475569; +} +.metrics { + margin-top: 20px; + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 16px; +} +.metric { + border: 1px solid #e2e8f0; + border-radius: 10px; + background: #fcfdfd; + padding: 16px 20px; +} +.metric-label { + font-size: 13px; + font-weight: 500; + color: #64748b; +} +.metric-value { + margin-top: 12px; + display: flex; + align-items: baseline; + gap: 8px; +} +.metric-value strong { + font-size: 30px; + line-height: 1; + letter-spacing: -0.04em; + font-weight: 600; + color: #0f172a; +} +.metric-value span { + font-size: 13px; + color: #64748b; +} +.chips { + margin-top: 20px; + display: flex; + flex-wrap: wrap; + gap: 12px; +} +.chip, +.severity-tag { + display: inline-flex; + align-items: center; + border: 1px solid transparent; + border-radius: 6px; + font-weight: 600; +} +.chip { + height: 32px; + padding: 0 12px; + font-size: 12px; +} +.severity-tag { + height: 32px; + padding: 0 12px; + font-size: 12px; + text-transform: uppercase; +} +.error { + border-color: #fecaca; + background: #fef2f2; + color: #b91c1c; +} +.warning { + border-color: #fde68a; + background: #fffbeb; + color: #b45309; +} +.info { + border-color: #bfdbfe; + background: #eff6ff; + color: #1d4ed8; +} +.content-grid { + display: grid; + grid-template-columns: 340px minmax(0, 1fr); + gap: 20px; +} +.sidebar-body { + padding: 16px; + display: flex; + flex-direction: column; + gap: 16px; +} +.summary-row { + border: 1px solid #e2e8f0; + border-radius: 10px; + background: #fcfdfd; + padding: 16px; +} +.summary-row-label { + font-size: 12px; + font-weight: 500; + color: #64748b; +} +.summary-row-value { + margin-top: 8px; + font-size: 22px; + line-height: 1; + letter-spacing: -0.03em; + font-weight: 600; + color: #0f172a; +} +.summary-row-desc { + margin-top: 12px; + font-size: 13px; + line-height: 1.75; + color: #475569; +} +.table-toolbar { + height: 48px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 0 20px; + border-bottom: 1px solid #e2e8f0; + background: #fcfdfd; +} +.toolbar-left { + min-width: 0; +} +.toolbar-title { + font-size: 14px; + font-weight: 600; + color: #1e293b; +} +.toolbar-desc { + margin-top: 2px; + font-size: 12px; + color: #64748b; +} +.toolbar-filters { + display: flex; + gap: 8px; +} +.filter { + display: inline-flex; + align-items: center; + height: 32px; + padding: 0 12px; + border: 1px solid #e2e8f0; + border-radius: 6px; + background: #ffffff; + color: #64748b; + font-size: 12px; + font-weight: 500; +} +.filter.active { + border-color: rgba(0, 104, 74, 0.2); + background: #e8f3ef; + color: #00684a; +} +.table-wrap { + overflow-x: auto; +} +table { + width: 100%; + min-width: 1320px; + border-collapse: collapse; +} +thead tr { + background: #f8fafc; + color: #475569; + font-size: 13px; + font-weight: 500; +} +th { + padding: 16px 20px; + text-align: left; + border-bottom: 1px solid #e2e8f0; + white-space: nowrap; +} +td { + padding: 20px; + vertical-align: top; + border-bottom: 1px solid #f1f5f9; +} +tbody tr:hover { + background: #f8fafc; +} +.mono { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} +.id-cell { + font-size: 13px; + color: #64748b; +} +.rule-id { + font-size: 15px; + font-weight: 600; + color: #1e293b; +} +.rule-name { + margin-top: 4px; + font-size: 13px; + color: #64748b; +} +.category-cell { + font-size: 14px; + color: #334155; +} +.location-cell { + font-size: 13px; + color: #334155; +} +.message-cell { + min-width: 560px; +} +.message-main { + font-size: 15px; + line-height: 1.8; + color: #0f172a; +} +.context-box, +.suggestion-box { + margin-top: 12px; + border-radius: 6px; + padding: 12px 16px; + font-size: 13px; + line-height: 1.8; +} +.context-box { + border: 1px solid #e2e8f0; + background: #f8fafc; + color: #475569; +} +.suggestion-box { + border: 1px solid #cfe4dc; + background: #f4faf7; + color: #0d6b4d; +} +.empty { + padding: 24px 20px; + text-align: center; + color: #64748b; + font-size: 14px; +} +@media (max-width: 1200px) { + .summary-grid, + .content-grid { + grid-template-columns: 1fr; + } + .metrics { + grid-template-columns: repeat(2, minmax(0, 1fr)); + } +} +@media (max-width: 720px) { + .page { + padding: 16px; + } + .metrics { + grid-template-columns: 1fr; + } + .table-toolbar, + .card-head { + height: auto; + min-height: 48px; + padding-top: 12px; + padding-bottom: 12px; + align-items: flex-start; + flex-direction: column; + } +} """ -def render_html(result: AuditResult) -> str: - s = result.summary - score = s.score - pct = f"{score}%" - rows = [] - for f in result.findings: - loc = f.location - suggest = ( - f'
建议: {escape(f.suggestion)}
' - if f.suggestion else "" - ) - rows.append(f""" - - {escape(f.finding_id)} - {escape(f.rule_id)}
{escape(f.rule_name)} - {f.severity} - {escape(f.category)} - P{loc.paragraph_index} ({escape(loc.role or '')}) - {escape(f.message)} -
原文: {escape((loc.context or '')[:80])}
- {suggest} - -""") +def _summary_score(summary: object) -> int: + for attr in ("score", "totalScore", "total_score"): + value = getattr(summary, attr, None) + if value is None and isinstance(summary, dict): + value = summary.get(attr) + if value is None: + continue + try: + return int(float(value)) + except (TypeError, ValueError): + continue + return 0 - body = f""" -公文审核报告 - -
-
{score}
-
-

公文格式审核报告

-
{escape(result.document.get('filename', ''))} · 共 {s.total_findings} 项
-
- 错误 {s.by_severity.get('error', 0)} - 警告 {s.by_severity.get('warning', 0)} - 提示 {s.by_severity.get('info', 0)} + +def _severity_label(severity: str) -> str: + return { + "error": "错误", + "warning": "警告", + "info": "提示", + }.get(severity, severity) + + +def render_html(result: AuditResult) -> str: + summary = result.summary + score = _summary_score(summary) + score_pct = max(0, min(score, 100)) + severity_counts = _severity_counts(result) + category_count = len([key for key, value in (summary.by_category or {}).items() if key and value]) + filename = escape(str(result.document.get("filename", ""))) + top_rule_id, top_rule_count = _top_rule(result) + line_range = _line_range(result) + entity_summary = _entity_summary(result) + + rows = [] + for finding in result.findings: + location_label = _format_location(finding.location.paragraph_index) + context = escape((finding.location.context or "").strip()) + message = escape(finding.message) + suggestion = escape(finding.suggestion) if finding.suggestion else "按规则要求修正对应内容。" + severity = escape(finding.severity) + severity_label = escape(_severity_label(finding.severity)) + + rows.append( + f""" + + {escape(finding.finding_id)} + +
{escape(finding.rule_id)}
+
{escape(finding.rule_name)}
+ + {severity_label} + {escape(finding.category)} + {location_label} + +
{message}
+
原文:{context or "未提取到上下文"}
+
建议:{suggestion}
+ +""" + ) + + return f""" + + + + + 公文审核报告 + + + +
+
+
+
+
报告摘要
+
+
+
+
综合得分
+
{score}
+
+
综合反映本次公文审核结果,可结合下方问题明细逐项核对修正。
+
+ +
+
审核结果
+

公文格式审核报告

+
{filename} · 共 {summary.total_findings} 项问题
+ +
+
+
错误项
+
{severity_counts["error"]}错误
+
+
+
警告项
+
{severity_counts["warning"]}警告
+
+
+
提示项
+
{severity_counts["info"]}提示
+
+
+
问题类别
+
{category_count}标题 / 发文 / 格式 / 其他
+
+
+ +
+ 错误 {severity_counts["error"]} + 警告 {severity_counts["warning"]} + 提示 {severity_counts["info"]} +
+
+
+
+ +
+ + +
+
+
+
问题明细
+
+
+ 全部 + 错误 + 警告 +
+
+
+ + + + + + + + + + + + + {''.join(rows) or ''} + +
编号规则严重度类别位置说明
未发现问题
+
+
+
-
- - - - - {''.join(rows) or ''} -
编号规则严重度类别位置说明
未发现问题
-""" - return body + +""" + + +def _severity_counts(result: AuditResult) -> dict[str, int]: + counts = Counter(finding.severity for finding in result.findings) + return { + "error": counts.get("error", 0), + "warning": counts.get("warning", 0), + "info": counts.get("info", 0), + } + + +def _top_rule(result: AuditResult) -> tuple[str, int]: + counter = Counter(finding.rule_id for finding in result.findings if finding.rule_id) + if not counter: + return "无", 0 + rule_id, count = counter.most_common(1)[0] + return rule_id, count + + +def _line_range(result: AuditResult) -> str: + indices = sorted( + { + int(finding.location.paragraph_index) + 1 + for finding in result.findings + if finding.location.paragraph_index is not None + } + ) + if not indices: + return "未定位" + if len(indices) == 1: + return f"第 {indices[0]} 行" + return f"第 {indices[0]} 行 - 第 {indices[-1]} 行" + + +def _entity_summary(result: AuditResult) -> str: + expected = ["title", "doc_number", "recipient", "date"] + missing = [key for key in expected if not result.entities.get(key)] + if not missing: + return "核心实体齐全" + if len(missing) == len(expected): + return "标题 / 发文" + return "缺少 " + " / ".join(missing[:2]) + + +def _format_location(paragraph_index: int | None) -> str: + if paragraph_index is None: + return "未定位" + return f"第 {int(paragraph_index) + 1} 行" diff --git a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py index 626f0ca..2a3af9e 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py @@ -8,12 +8,12 @@ import mimetypes import time import uuid from dataclasses import dataclass -from datetime import datetime +from datetime import date, datetime from pathlib import Path from typing import Any from fastapi import UploadFile -from sqlalchemy import text +from sqlalchemy import bindparam, text from fastapi_common.fastapi_common_logger import logger from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession @@ -22,12 +22,14 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter +from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult, AuditSummary, CheckedRule, OutlineNode, StructureItem from fastapi_modules.fastapi_leaudit.govdoc_bridge.tasks import dispatch_govdoc_task from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.docx_parser import parse_docx +from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Finding, Location from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_paragraph import paragraphs_to_html +from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile from fastapi_modules.fastapi_leaudit.services import IGovdocService, IOssService -from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import _find_latest_version_candidate from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl @@ -39,9 +41,11 @@ class _GovdocDocumentRow: currentRunId: int | None versionGroupKey: str | None versionNo: int - rootVersionId: int | None + totalVersions: int previousVersionId: int | None + rootVersionId: int | None isLatestVersion: bool + normalizedName: str | None createdAt: Any updatedAt: Any fileId: int @@ -56,7 +60,11 @@ class _GovdocDocumentRow: passedCount: int | None failedCount: int | None skippedCount: int | None - resultSummaryJson: Any + findingCount: int + errorCount: int + warningCount: int + infoCount: int + rulesPath: str | None hasHtmlReport: bool hasDocxReport: bool @@ -68,6 +76,20 @@ class GovdocServiceImpl(IGovdocService): self.OssService = OssService or OssServiceImpl() self.Storage = StorageAdapter() + def _parse_date_filter(self, value: str | None, field_name: str) -> date | None: + if value is None: + return None + normalized = value.strip() + if not normalized: + return None + try: + return date.fromisoformat(normalized) + except ValueError as exc: + raise LeauditException( + StatusCodeEnum.HTTP_400_BAD_REQUEST, + f"{field_name} 格式非法,应为 YYYY-MM-DD", + ) from exc + # ── 文档 ────────────────────────────────────────────── async def UploadDocument( @@ -104,26 +126,32 @@ class GovdocServiceImpl(IGovdocService): async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) + await self._backfill_missing_version_groups(session) currentUser = await self._getCurrentUserContext(createdBy) resolvedRegion = self._resolve_upload_region(currentUser, normalizedRegion) + latestCandidate = await self._find_latest_version_candidate( + session, + region=resolvedRegion, + normalizedName=normalizedName, + fileExt=fileExt, + ) + if latestCandidate and not latestCandidate.get("version_group_key"): + latestCandidate = await self._backfill_legacy_version_chain( + session, + region=resolvedRegion, + normalizedName=normalizedName, + fileExt=fileExt, + ) previousVersionId: int | None = None rootVersionId: int | None = None versionGroupKey: str | None = None versionNo = 1 - latestCandidate = await self._find_govdoc_latest_version_candidate( - session, - typeId=typeId, - region=resolvedRegion, - normalizedName=normalizedName, - fileExt=fileExt, - ) - if latestCandidate: previousVersionId = int(latestCandidate["document_id"]) rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"]) versionGroupKey = str(latestCandidate["version_group_key"] or "") - versionNo = int(latestCandidate["version_no"] or 1) + 1 + versionNo = int(latestCandidate["version_no"] or 0) + 1 previousDocument = await session.get(LeauditDocument, previousVersionId) if previousDocument is not None: previousDocument.isLatestVersion = False @@ -154,7 +182,7 @@ class GovdocServiceImpl(IGovdocService): Region=resolvedRegion, TypeCode="govdoc", DocumentId=document.Id, - Version=f"v{document.versionNo or 1}", + Version=f"v{document.versionNo}", FileRole="original", FileName=fileName, Year=uploadedAt.year, @@ -256,7 +284,6 @@ class GovdocServiceImpl(IGovdocService): "f.is_active = true", "f.file_role = 'original'", "COALESCE(d.engine_type, 'leaudit') = 'govdoc'", - "COALESCE(d.is_latest_version, true) = true", ] filters.extend( self._buildDocumentScopeFilters( @@ -283,79 +310,161 @@ class GovdocServiceImpl(IGovdocService): if resultStatus: filters.append("COALESCE(gr.result_status, '') = :result_status") params["result_status"] = resultStatus.strip() - if dateFrom: - filters.append("d.created_at >= CAST(:date_from AS date)") - params["date_from"] = dateFrom.strip() - if dateTo: - filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')") - params["date_to"] = dateTo.strip() + parsedDateFrom = self._parse_date_filter(dateFrom, "dateFrom") + parsedDateTo = self._parse_date_filter(dateTo, "dateTo") + if parsedDateFrom: + filters.append("d.created_at::date >= :date_from") + params["date_from"] = parsedDateFrom + if parsedDateTo: + filters.append("d.created_at::date <= :date_to") + params["date_to"] = parsedDateTo whereClause = " AND ".join(filters) async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) + baseSelect = f""" + WITH effective_docs AS ( + SELECT + d.id AS document_id, + COALESCE(d.region, 'default') AS region, + COALESCE(d.processing_status, 'waiting') AS processing_status, + d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, + d.created_at, + d.updated_at, + f.id AS file_id, + f.file_name, + f.file_ext, + f.mime_type, + f.file_size, + f.oss_url, + f.created_by, + gr.result_status, + gr.total_score, + gr.passed_count, + gr.failed_count, + gr.skipped_count, + gr.rules_path, + COALESCE(fc.finding_count, 0) AS finding_count, + COALESCE(fc.error_count, 0) AS error_count, + COALESCE(fc.warning_count, 0) AS warning_count, + COALESCE(fc.info_count, 0) AS info_count, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'html_report' + AND gra.deleted_at IS NULL + ) AS has_html_report, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'annotated_docx' + AND gra.deleted_at IS NULL + ) AS has_docx_report + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + LEFT JOIN govdoc_runs gr + ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT + run_id, + COUNT(*) FILTER (WHERE result = 'fail') AS finding_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'error') AS error_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'warning') AS warning_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'info') AS info_count + FROM govdoc_rule_results + WHERE deleted_at IS NULL + GROUP BY run_id + ) fc + ON fc.run_id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id + WHERE {whereClause} + ) + """ + rows = ( await session.execute( text( f""" - SELECT - d.id AS document_id, - COALESCE(d.region, 'default') AS region, - COALESCE(d.processing_status, 'waiting') AS processing_status, - d.current_run_id, - d.version_group_key, - COALESCE(d.version_no, 1) AS version_no, - d.root_version_id, - d.previous_version_id, - COALESCE(d.is_latest_version, true) AS is_latest_version, - d.created_at, - d.updated_at, - f.id AS file_id, - f.file_name, - f.file_ext, - f.mime_type, - f.file_size, - f.oss_url, - f.created_by, - gr.result_status, - gr.total_score, - gr.passed_count, - gr.failed_count, - gr.skipped_count, - gr.result_summary_json, - COALESCE(vc.total_versions, 1) AS total_versions, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'html_report' - AND gra.deleted_at IS NULL - ) AS has_html_report, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'annotated_docx' - AND gra.deleted_at IS NULL - ) AS has_docx_report - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - LEFT JOIN govdoc_runs gr - ON gr.id = d.current_run_id - LEFT JOIN ( - SELECT version_group_key, COUNT(*) AS total_versions - FROM leaudit_documents - WHERE deleted_at IS NULL - AND COALESCE(engine_type, 'leaudit') = 'govdoc' - AND COALESCE(version_group_key, '') <> '' - GROUP BY version_group_key - ) vc - ON vc.version_group_key = d.version_group_key - WHERE {whereClause} - ORDER BY d.created_at DESC + {baseSelect} + SELECT * + FROM effective_docs + WHERE is_latest_version = true + ORDER BY created_at DESC, document_id DESC LIMIT :limit OFFSET :offset """ ), @@ -368,16 +477,10 @@ class GovdocServiceImpl(IGovdocService): await session.execute( text( f""" + {baseSelect} SELECT COUNT(1) - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - LEFT JOIN govdoc_runs gr - ON gr.id = d.current_run_id - WHERE {whereClause} + FROM effective_docs + WHERE is_latest_version = true """ ), params, @@ -385,98 +488,46 @@ class GovdocServiceImpl(IGovdocService): ).scalar_one() ) - history_by_group: dict[str, list[dict[str, Any]]] = {} - total_versions_by_group = { - str(row["version_group_key"]): int(row.get("total_versions") or 1) + historyRowsByGroup: dict[str, list[dict[str, Any]]] = {} + versionGroupKeys = [ + str(row["version_group_key"]) for row in rows - if row.get("version_group_key") - } - group_keys = [str(row["version_group_key"]) for row in rows if row.get("version_group_key")] - if group_keys: - history_rows = ( + if row.get("version_group_key") and int(row.get("total_versions") or 1) > 1 + ] + if versionGroupKeys: + historyRows = ( await session.execute( text( + f""" + {baseSelect} + SELECT * + FROM effective_docs + WHERE version_group_key IN :version_group_keys + AND is_latest_version = false + ORDER BY created_at DESC, document_id DESC """ - SELECT - d.id AS document_id, - COALESCE(d.region, 'default') AS region, - COALESCE(d.processing_status, 'waiting') AS processing_status, - d.current_run_id, - d.version_group_key, - COALESCE(d.version_no, 1) AS version_no, - d.root_version_id, - d.previous_version_id, - COALESCE(d.is_latest_version, false) AS is_latest_version, - d.created_at, - d.updated_at, - f.id AS file_id, - f.file_name, - f.file_ext, - f.mime_type, - f.file_size, - f.oss_url, - f.created_by, - gr.result_status, - gr.total_score, - gr.passed_count, - gr.failed_count, - gr.skipped_count, - gr.result_summary_json, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'html_report' - AND gra.deleted_at IS NULL - ) AS has_html_report, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'annotated_docx' - AND gra.deleted_at IS NULL - ) AS has_docx_report - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - LEFT JOIN govdoc_runs gr - ON gr.id = d.current_run_id - WHERE d.deleted_at IS NULL - AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' - AND d.version_group_key = ANY(:group_keys) - AND COALESCE(d.is_latest_version, false) = false - ORDER BY d.version_group_key, COALESCE(d.version_no, 1) DESC, d.id DESC - """ - ), - {"group_keys": group_keys}, + ).bindparams(bindparam("version_group_keys", expanding=True)), + {"version_group_keys": versionGroupKeys, **params}, ) ).mappings().all() - for history_row in history_rows: - group_key = str(history_row["version_group_key"] or "") - history_by_group.setdefault(group_key, []).append( - self._serialize_list_item_row( - self._map_document_row(history_row), - totalVersions=total_versions_by_group.get(group_key, 1), - historyVersions=[], - ) - ) + for historyRow in historyRows: + groupKey = str(historyRow.get("version_group_key") or "") + if not groupKey: + continue + historyRowsByGroup.setdefault(groupKey, []).append(dict(historyRow)) items = [] for row in rows: mapped = self._map_document_row(row) - group_key = str(mapped.versionGroupKey or "") - total_versions = int(row.get("total_versions") or 1) - history_versions = history_by_group.get(group_key, []) if group_key else [] - items.append( - self._serialize_list_item_row( - mapped, - totalVersions=total_versions, - historyVersions=history_versions, - ) - ) + item = await self._build_document_list_item(mapped) + groupKey = mapped.versionGroupKey or "" + historyItems = [ + await self._build_document_list_item(self._map_document_row(historyRow)) + for historyRow in historyRowsByGroup.get(groupKey, []) + ] + item["historyCount"] = len(historyItems) + item["historyVersions"] = historyItems + items.append(item) return {"items": items, "total": total, "page": page, "pageSize": pageSize} @@ -507,6 +558,7 @@ class GovdocServiceImpl(IGovdocService): async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -516,6 +568,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -529,7 +605,26 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + gr.rules_path, + COALESCE(fc.finding_count, 0) AS finding_count, + COALESCE(fc.error_count, 0) AS error_count, + COALESCE(fc.warning_count, 0) AS warning_count, + COALESCE(fc.info_count, 0) AS info_count, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'html_report' + AND gra.deleted_at IS NULL + ) AS has_html_report, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'annotated_docx' + AND gra.deleted_at IS NULL + ) AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -538,6 +633,63 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT + run_id, + COUNT(*) FILTER (WHERE result = 'fail') AS finding_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'error') AS error_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'warning') AS warning_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'info') AS info_count + FROM govdoc_rule_results + WHERE deleted_at IS NULL + GROUP BY run_id + ) fc + ON fc.run_id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE {whereClause} LIMIT 1 """ @@ -616,6 +768,12 @@ class GovdocServiceImpl(IGovdocService): "fileSize": mapped.fileSize, "region": mapped.region, "processingStatus": mapped.processingStatus, + "versionGroupKey": mapped.versionGroupKey, + "versionNo": mapped.versionNo, + "totalVersions": mapped.totalVersions, + "previousVersionId": mapped.previousVersionId, + "rootVersionId": mapped.rootVersionId, + "isLatestVersion": mapped.isLatestVersion, "createdAt": self._iso(mapped.createdAt), "updatedAt": self._iso(mapped.updatedAt), }, @@ -1001,13 +1159,9 @@ class GovdocServiceImpl(IGovdocService): return {"runId": runId, "outline": result.get("outline", [])} async def GetReportHtml(self, runId: int) -> dict[str, Any]: - artifact = await self._get_report_artifact(runId, "html_report") - if not artifact: - return {"runId": runId, "htmlUrl": ""} - return { - "runId": runId, - "htmlUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])), - } + result = await self.GetRunResult(runId) + html = render_html(self._build_audit_result_from_run_result(result)) + return {"runId": runId, "html": html} async def GetReportDocx(self, runId: int) -> dict[str, Any]: artifact = await self._get_report_artifact(runId, "annotated_docx") @@ -1221,6 +1375,7 @@ class GovdocServiceImpl(IGovdocService): async def _getCurrentUserContext(self, CurrentUserId: int) -> dict[str, Any]: async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -1338,6 +1493,7 @@ class GovdocServiceImpl(IGovdocService): ) whereClause = " AND ".join(filters) async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -1347,6 +1503,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -1360,7 +1540,14 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + NULL::VARCHAR AS rules_path, + 0 AS finding_count, + 0 AS error_count, + 0 AS warning_count, + 0 AS info_count, + false AS has_html_report, + false AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -1369,6 +1556,51 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE {whereClause} LIMIT 1 """ @@ -1391,6 +1623,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -1404,7 +1660,14 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + NULL::VARCHAR AS rules_path, + 0 AS finding_count, + 0 AS error_count, + 0 AS warning_count, + 0 AS info_count, + false AS has_html_report, + false AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -1413,6 +1676,51 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE d.id = :document_id AND d.deleted_at IS NULL AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' @@ -1498,78 +1806,20 @@ class GovdocServiceImpl(IGovdocService): passedCount: Any, failedCount: Any, skippedCount: Any, + totalFindings: Any | None = None, + bySeverity: dict[str, int] | None = None, + byCategory: dict[str, int] | None = None, ) -> dict[str, Any]: return { "score": float(totalScore or 0), - "total_findings": int(failedCount or 0), - "by_severity": {}, - "by_category": {}, + "total_findings": int(totalFindings if totalFindings is not None else (failedCount or 0)), + "by_severity": bySeverity or {}, + "by_category": byCategory or {}, "passed_count": int(passedCount or 0), "failed_count": int(failedCount or 0), "skipped_count": int(skippedCount or 0), } - async def _find_govdoc_latest_version_candidate( - self, - session, - *, - typeId: int | None, - region: str, - normalizedName: str, - fileExt: str | None, - ) -> dict[str, Any] | None: - if typeId is not None: - candidate = await _find_latest_version_candidate( - session, - type_id=int(typeId), - root_group_id=None, - region=region, - normalized_name=normalizedName, - file_ext=fileExt, - ) - if candidate: - return candidate - - ext_clause = "" - params: dict[str, Any] = { - "region": region, - "normalized_name": normalizedName, - } - if fileExt: - ext_clause = " AND LOWER(COALESCE(f.file_ext, '')) = :file_ext" - params["file_ext"] = fileExt.lower() - - row = ( - await session.execute( - text( - f""" - SELECT - d.id AS document_id, - d.version_group_key, - d.version_no, - d.root_version_id, - f.id AS file_id, - f.sha256 - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - WHERE d.region = :region - AND d.normalized_name = :normalized_name - AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' - AND COALESCE(d.is_latest_version, true) = true - AND d.deleted_at IS NULL{ext_clause} - ORDER BY COALESCE(d.version_no, 1) DESC, d.id DESC - LIMIT 1 - """ - ), - params, - ) - ).mappings().first() - return dict(row) if row else None - def _group_artifacts_by_run(self, rows: list[Any]) -> dict[int, dict[str, Any]]: grouped: dict[int, dict[str, Any]] = {} artifactTypeMap = { @@ -1609,76 +1859,23 @@ class GovdocServiceImpl(IGovdocService): ) return grouped - def _build_list_summary_payload(self, row: _GovdocDocumentRow) -> dict[str, Any]: - summary = self._parse_json(row.resultSummaryJson) or {} - if not isinstance(summary, dict): - summary = {} - bySeverity = summary.get("by_severity") - byCategory = summary.get("by_category") - return { - "score": float(summary.get("score") or row.totalScore or 0), - "total_findings": int(summary.get("total_findings") or row.failedCount or 0), - "by_severity": bySeverity if isinstance(bySeverity, dict) else {}, - "by_category": byCategory if isinstance(byCategory, dict) else {}, - "passed_count": int(summary.get("passed_count") or row.passedCount or 0), - "failed_count": int(summary.get("failed_count") or row.failedCount or 0), - "skipped_count": int(summary.get("skipped_count") or row.skippedCount or 0), - } - - def _serialize_list_item_row( - self, - row: _GovdocDocumentRow, - *, - totalVersions: int | None, - historyVersions: list[dict[str, Any]], - ) -> dict[str, Any]: - summary = self._build_list_summary_payload(row) - return { - "documentId": row.documentId, - "fileId": row.fileId, - "fileName": row.fileName, - "fileExt": row.fileExt, - "mimeType": row.mimeType, - "fileSize": row.fileSize, - "region": row.region, - "processingStatus": row.processingStatus, - "currentRunId": row.currentRunId, - "latestRunId": row.currentRunId, - "resultStatus": row.resultStatus, - "score": float(row.totalScore) if row.totalScore is not None else None, - "passedCount": row.passedCount or 0, - "failedCount": row.failedCount or 0, - "skippedCount": row.skippedCount or 0, - "versionGroupKey": row.versionGroupKey or "", - "versionNo": int(row.versionNo or 1), - "rootVersionId": int(row.rootVersionId or row.documentId), - "previousVersionId": int(row.previousVersionId) if row.previousVersionId is not None else None, - "totalVersions": int(totalVersions or max(1, len(historyVersions) + 1)), - "historyCount": len(historyVersions), - "historyVersions": historyVersions, - "latestRun": { - "runId": row.currentRunId, - "summary": summary, - } if row.currentRunId else None, - "reports": { - "hasHtmlReport": row.hasHtmlReport, - "hasDocxReport": row.hasDocxReport, - }, - "createdAt": self._iso(row.createdAt), - "updatedAt": self._iso(row.updatedAt), - } - def _map_document_row(self, row: Any) -> _GovdocDocumentRow: + versionNoValue = row.get("derived_version_no") + if versionNoValue is None: + versionNoValue = row.get("version_no") + return _GovdocDocumentRow( documentId=int(row["document_id"]), region=str(row["region"] or "default"), processingStatus=str(row["processing_status"] or "waiting"), currentRunId=int(row["current_run_id"]) if row.get("current_run_id") is not None else None, versionGroupKey=str(row["version_group_key"]) if row.get("version_group_key") else None, - versionNo=int(row.get("version_no") or 1), - rootVersionId=int(row["root_version_id"]) if row.get("root_version_id") is not None else None, + versionNo=int(versionNoValue or 1), + totalVersions=int(row.get("total_versions") or 1), previousVersionId=int(row["previous_version_id"]) if row.get("previous_version_id") is not None else None, + rootVersionId=int(row["root_version_id"]) if row.get("root_version_id") is not None else None, isLatestVersion=bool(row.get("is_latest_version", True)), + normalizedName=str(row["normalized_name"]) if row.get("normalized_name") else None, createdAt=row.get("created_at"), updatedAt=row.get("updated_at"), fileId=int(row["file_id"]), @@ -1693,11 +1890,292 @@ class GovdocServiceImpl(IGovdocService): passedCount=int(row["passed_count"]) if row.get("passed_count") is not None else None, failedCount=int(row["failed_count"]) if row.get("failed_count") is not None else None, skippedCount=int(row["skipped_count"]) if row.get("skipped_count") is not None else None, - resultSummaryJson=row.get("result_summary_json"), + findingCount=int(row.get("finding_count") or 0), + errorCount=int(row.get("error_count") or 0), + warningCount=int(row.get("warning_count") or 0), + infoCount=int(row.get("info_count") or 0), + rulesPath=str(row["rules_path"]) if row.get("rules_path") else None, hasHtmlReport=bool(row.get("has_html_report")), hasDocxReport=bool(row.get("has_docx_report")), ) + async def _build_document_list_item(self, mapped: _GovdocDocumentRow) -> dict[str, Any]: + summary = self._build_summary_payload( + mapped.totalScore, + mapped.passedCount, + mapped.failedCount, + mapped.skippedCount, + totalFindings=mapped.findingCount, + bySeverity={ + "error": mapped.errorCount, + "warning": mapped.warningCount, + "info": mapped.infoCount, + }, + ) + rulesetMeta = await self._resolve_ruleset_metadata(mapped.rulesPath) + return { + "documentId": mapped.documentId, + "fileId": mapped.fileId, + "fileName": mapped.fileName, + "fileExt": mapped.fileExt, + "mimeType": mapped.mimeType, + "fileSize": mapped.fileSize, + "region": mapped.region, + "processingStatus": mapped.processingStatus, + "currentRunId": mapped.currentRunId, + "latestRunId": mapped.currentRunId, + "resultStatus": mapped.resultStatus, + "score": float(mapped.totalScore) if mapped.totalScore is not None else None, + "versionGroupKey": mapped.versionGroupKey, + "versionNo": mapped.versionNo, + "totalVersions": mapped.totalVersions, + "previousVersionId": mapped.previousVersionId, + "rootVersionId": mapped.rootVersionId, + "isLatestVersion": mapped.isLatestVersion, + "rulesetId": rulesetMeta["typeId"], + "rulesetName": rulesetMeta["name"], + "rulesetVersion": rulesetMeta["version"], + "passedCount": mapped.passedCount or 0, + "failedCount": mapped.failedCount or 0, + "skippedCount": mapped.skippedCount or 0, + "latestRun": { + "runId": mapped.currentRunId, + "summary": summary, + } if mapped.currentRunId else None, + "reports": { + "hasHtmlReport": mapped.hasHtmlReport, + "hasDocxReport": mapped.hasDocxReport, + }, + "createdAt": self._iso(mapped.createdAt), + "updatedAt": self._iso(mapped.updatedAt), + } + + async def _find_latest_version_candidate( + self, + session, + *, + region: str, + normalizedName: str, + fileExt: str | None, + ) -> dict[str, Any] | None: + extClause = "" + params: dict[str, Any] = { + "region": region, + "normalized_name": normalizedName, + } + if fileExt: + extClause = " AND f.file_ext = :file_ext" + params["file_ext"] = fileExt + + row = ( + await session.execute( + text( + f""" + SELECT + d.id AS document_id, + d.version_group_key, + d.version_no, + d.root_version_id + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + AND d.region = :region + AND COALESCE(d.normalized_name, '') = :normalized_name + AND COALESCE(d.is_latest_version, true) = true{extClause} + ORDER BY d.version_no DESC, d.id DESC + LIMIT 1 + """ + ), + params, + ) + ).mappings().first() + return dict(row) if row else None + + async def _backfill_legacy_version_chain( + self, + session, + *, + region: str, + normalizedName: str, + fileExt: str | None, + ) -> dict[str, Any] | None: + extClause = "" + params: dict[str, Any] = { + "region": region, + "normalized_name": normalizedName, + } + if fileExt: + extClause = " AND f.file_ext = :file_ext" + params["file_ext"] = fileExt + + rows = ( + await session.execute( + text( + f""" + SELECT d.id AS document_id + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + AND d.region = :region + AND COALESCE(d.normalized_name, '') = :normalized_name{extClause} + ORDER BY d.created_at ASC, d.id ASC + """ + ), + params, + ) + ).mappings().all() + + if not rows: + return None + + groupKey = uuid.uuid4().hex + rootId = int(rows[0]["document_id"]) + previousId: int | None = None + + for index, row in enumerate(rows, start=1): + documentId = int(row["document_id"]) + isLatest = index == len(rows) + await session.execute( + text( + """ + UPDATE leaudit_documents + SET version_group_key = :version_group_key, + version_no = :version_no, + previous_version_id = :previous_version_id, + root_version_id = :root_version_id, + is_latest_version = :is_latest_version, + updated_at = NOW() + WHERE id = :document_id + """ + ), + { + "version_group_key": groupKey, + "version_no": index, + "previous_version_id": previousId, + "root_version_id": rootId, + "is_latest_version": isLatest, + "document_id": documentId, + }, + ) + previousId = documentId + + latestId = int(rows[-1]["document_id"]) + return { + "document_id": latestId, + "version_group_key": groupKey, + "version_no": len(rows), + "root_version_id": rootId, + } + + async def _backfill_missing_version_groups(self, session) -> None: + groups = ( + await session.execute( + text( + """ + SELECT + d.region, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(f.file_ext, '') AS file_ext, + ARRAY_AGG(d.id ORDER BY d.created_at ASC, d.id ASC) AS document_ids, + MIN(NULLIF(d.version_group_key, '')) AS existing_version_group_key + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + GROUP BY d.region, COALESCE(d.normalized_name, ''), COALESCE(f.file_ext, '') + HAVING BOOL_OR(COALESCE(d.version_group_key, '') = '') + """ + ) + ) + ).mappings().all() + + if not groups: + return + + for group in groups: + region = str(group["region"] or "default") + normalizedName = str(group["normalized_name"] or "") + fileExt = str(group["file_ext"] or "") + documentIds = [int(value) for value in (group["document_ids"] or [])] + if not documentIds: + continue + + versionGroupKey = str(group["existing_version_group_key"] or "").strip() or self._derive_version_group_key( + region=region, + normalizedName=normalizedName, + fileExt=fileExt or None, + ) + rootId = documentIds[0] + previousId: int | None = None + + for index, documentId in enumerate(documentIds, start=1): + isLatest = index == len(documentIds) + await session.execute( + text( + """ + UPDATE leaudit_documents + SET version_group_key = :version_group_key, + version_no = :version_no, + previous_version_id = :previous_version_id, + root_version_id = :root_version_id, + is_latest_version = :is_latest_version, + updated_at = NOW() + WHERE id = :document_id + """ + ), + { + "version_group_key": versionGroupKey, + "version_no": index, + "previous_version_id": previousId, + "root_version_id": rootId, + "is_latest_version": isLatest, + "document_id": documentId, + }, + ) + previousId = documentId + + await session.commit() + + def _derive_version_group_key(self, *, region: str, normalizedName: str, fileExt: str | None) -> str: + raw = f"{region}|{normalizedName}|{fileExt or ''}" + return hashlib.md5(raw.encode("utf-8")).hexdigest() + + async def _resolve_ruleset_metadata(self, rulesPath: str | None) -> dict[str, str]: + ruleset = await self._load_ruleset(rulesPath) + if ruleset is not None: + return { + "typeId": str(ruleset.metadata.type_id or ""), + "name": str(ruleset.metadata.name or ""), + "version": str(ruleset.metadata.version or ""), + } + + resolved = await self._resolve_rules_path(rulesPath) + if not resolved: + return {"typeId": "", "name": "", "version": ""} + path = Path(resolved) + return { + "typeId": path.stem, + "name": path.parent.name, + "version": "", + } + async def _get_report_artifact(self, runId: int, artifactType: str) -> Any | None: async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) @@ -1735,3 +2213,23 @@ class GovdocServiceImpl(IGovdocService): if isinstance(value, datetime): return value.isoformat() return str(value) + + def _build_audit_result_from_run_result(self, payload: dict[str, Any]) -> AuditResult: + summaryPayload = payload.get("summary") or {} + findingsPayload = payload.get("findings") or [] + checkedRulesPayload = payload.get("checkedRules") or [] + structurePayload = payload.get("structure") or [] + outlinePayload = payload.get("outline") or [] + entitiesPayload = payload.get("entities") or {} + documentPayload = payload.get("document") or {} + + return AuditResult( + audit_id=str(payload.get("runId") or ""), + document=documentPayload, + summary=AuditSummary.model_validate(summaryPayload), + findings=[Finding.model_validate(item) for item in findingsPayload], + checked_rules=[CheckedRule.model_validate(item) for item in checkedRulesPayload], + structure=[StructureItem.model_validate(item) for item in structurePayload], + outline=[OutlineNode.model_validate(item) for item in outlinePayload], + entities=entitiesPayload, + ) diff --git a/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py index 2077a0b..62b7615 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/ragDatasetServiceImpl.py @@ -1186,7 +1186,7 @@ class RagDatasetServiceImpl(IRagDatasetService): content = documents[index] if index < len(documents) else "" metadata = metadatas[index] if index < len(metadatas) and isinstance(metadatas[index], dict) else {} distance = float(distances[index]) if index < len(distances) and distances[index] is not None else 1.0 - score = max(0.0, min(1.0, 1.0 - distance)) + score = max(0.0, min(1.0, 1.0 / (1.0 + max(0.0, distance)))) if score_threshold_enabled and score_threshold is not None and score < score_threshold: continue diff --git a/leaudit.sh b/leaudit.sh index 32dbc2d..3c329c6 100755 --- a/leaudit.sh +++ b/leaudit.sh @@ -167,10 +167,8 @@ start_backend() { log_info "启动后端服务 (端口: $BACKEND_PORT)..." : > "$BACKEND_LOG" - ( - cd "$BACKEND_DIR" - exec "$BACKEND_PYTHON" run.py - ) >> "$BACKEND_LOG" 2>&1 & + nohup bash -lc "cd \"$BACKEND_DIR\" && exec \"$BACKEND_PYTHON\" run.py" \ + >> "$BACKEND_LOG" 2>&1 < /dev/null & pid=$! sleep 2 @@ -201,10 +199,8 @@ start_frontend() { log_info "启动前端开发服务 (端口: $FRONTEND_DEV_PORT,代理入口: $FRONTEND_PUBLIC_PORT)..." : > "$FRONTEND_LOG" - ( - cd "$FRONTEND_DIR" - exec npm run dev:dev - ) >> "$FRONTEND_LOG" 2>&1 & + nohup bash -lc "cd \"$FRONTEND_DIR\" && exec npm run dev:dev" \ + >> "$FRONTEND_LOG" 2>&1 < /dev/null & pid=$! sleep 4 @@ -238,10 +234,8 @@ start_worker() { log_info "启动 Worker 服务..." : > "$WORKER_LOG" - ( - cd "$PROJECT_DIR" - exec "$WORKER_SCRIPT" - ) >> "$WORKER_LOG" 2>&1 & + nohup bash -lc "cd \"$PROJECT_DIR\" && exec \"$WORKER_SCRIPT\"" \ + >> "$WORKER_LOG" 2>&1 < /dev/null & pid=$! echo "$pid" > "$WORKER_PID_FILE" sleep 2 @@ -273,10 +267,8 @@ start_beat() { log_info "启动 Beat 调度服务..." : > "$BEAT_LOG" - ( - cd "$PROJECT_DIR" - exec "$BEAT_SCRIPT" - ) >> "$BEAT_LOG" 2>&1 & + nohup bash -lc "cd \"$PROJECT_DIR\" && exec \"$BEAT_SCRIPT\"" \ + >> "$BEAT_LOG" 2>&1 < /dev/null & pid=$! echo "$pid" > "$BEAT_PID_FILE" sleep 2 diff --git a/legal-platform-frontend b/legal-platform-frontend index f6bb4aa..c41ddc8 160000 --- a/legal-platform-frontend +++ b/legal-platform-frontend @@ -1 +1 @@ -Subproject commit f6bb4aa5524ee4325bdd871c5f7a21b1543f8d80 +Subproject commit c41ddc844ce04ad2ae5ee2679cc2155cc25d44bf diff --git a/pyproject.toml b/pyproject.toml index 1c79224..fbc3d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "tomli>=2.2.0", "python-multipart>=0.0.18", "pyjwt>=2.10.0", + "openai>=1.30.0", "pillow>=11.0.0", "pyyaml>=6.0", "minio>=7.2.8", diff --git a/scripts/regenerate_govdoc_html_report.py b/scripts/regenerate_govdoc_html_report.py new file mode 100644 index 0000000..5c307c4 --- /dev/null +++ b/scripts/regenerate_govdoc_html_report.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +"""按已有 govdoc run 重生成 HTML 报告并覆盖 OSS 产物。""" + +from __future__ import annotations + +import argparse +import asyncio +import hashlib +import json +from typing import Any + +from sqlalchemy import text + +from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession +from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import ( + AuditResult, + AuditSummary, + CheckedRule, + OutlineNode, + StructureItem, +) +from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Finding, Location +from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity +from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html +from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl + + +def _parse_json(raw: Any) -> Any: + if raw is None or raw == "": + return None + if isinstance(raw, (dict, list)): + return raw + try: + return json.loads(raw) + except Exception: + return None + + +def _build_checked_rules(rule_rows: list[dict[str, Any]]) -> list[CheckedRule]: + checked_rules: list[CheckedRule] = [] + seen_rule_ids: set[str] = set() + for row in rule_rows: + rule_id = str(row["rule_id"]) + if rule_id in seen_rule_ids: + continue + seen_rule_ids.add(rule_id) + status = str(row.get("result") or "pass") + checked_rules.append( + CheckedRule( + rule_id=rule_id, + name=row.get("rule_name") or rule_id, + severity=row.get("severity") or "info", + category=row.get("category") or "", + status=status if status in {"pass", "fail", "skipped"} else "pass", + skip_reason=row.get("skip_reason") or "", + ) + ) + return checked_rules + + +def _build_findings(rule_rows: list[dict[str, Any]]) -> list[Finding]: + findings: list[Finding] = [] + for index, row in enumerate(rule_rows): + if row.get("result") != "fail": + continue + paragraph_index = int(row.get("paragraph_index") or 0) + findings.append( + Finding( + finding_id=f"{row['rule_id']}-{paragraph_index or index}", + rule_id=str(row["rule_id"]), + rule_name=row.get("rule_name") or str(row["rule_id"]), + severity=row.get("severity") or "info", + category=row.get("category") or "", + location=Location( + paragraph_index=paragraph_index, + role=row.get("location_path"), + char_start=0, + char_end=0, + context=row.get("paragraph_text") or "", + ), + actual=_parse_json(row.get("actual")) or {}, + expected=_parse_json(row.get("expected")) or {}, + message=row.get("message") or "", + suggestion=row.get("suggestion") or "", + evidence=str(row.get("evidence") or ""), + confidence=1.0, + ) + ) + return findings + + +def _build_summary(run_row: dict[str, Any], findings: list[Finding]) -> AuditSummary: + severity_stats: dict[str, int] = {} + category_stats: dict[str, int] = {} + for finding in findings: + severity_stats[finding.severity] = severity_stats.get(finding.severity, 0) + 1 + if finding.category: + category_stats[finding.category] = category_stats.get(finding.category, 0) + 1 + return AuditSummary( + score=int(float(run_row.get("total_score") or 0)), + total_findings=len(findings), + by_severity=severity_stats, + by_category=category_stats, + passed_count=int(run_row.get("passed_count") or 0), + failed_count=int(run_row.get("failed_count") or 0), + skipped_count=int(run_row.get("skipped_count") or 0), + ) + + +def _normalize_structure_item(item: dict[str, Any]) -> dict[str, Any]: + return { + "role": item.get("role"), + "label": item.get("label") or "", + "count": item.get("count") or 0, + "expected": bool(item.get("expected", False)), + "paragraph_indices": item.get("paragraphIndices") or item.get("paragraph_indices") or [], + "samples": item.get("samples") or [], + "char_total": item.get("charTotal") or item.get("char_total") or 0, + "dominant_font": item.get("dominantFont") or item.get("dominant_font"), + "dominant_size_pt": item.get("dominantSizePt") or item.get("dominant_size_pt"), + "style_uniform": bool(item.get("styleUniform", item.get("style_uniform", True))), + } + + +def _normalize_outline_node(item: dict[str, Any]) -> dict[str, Any]: + return { + "paragraph_index": item.get("paragraphIndex") or item.get("paragraph_index") or 0, + "level": item.get("level") or 0, + "text": item.get("text") or "", + "children": [_normalize_outline_node(child) for child in (item.get("children") or [])], + } + + +async def regenerate_html_report(run_id: int) -> None: + oss_service = OssServiceImpl() + + async with GetAsyncSession() as session: + run_row = ( + await session.execute( + text( + """ + SELECT + gr.id, + gr.document_id, + gr.total_score, + gr.passed_count, + gr.failed_count, + gr.skipped_count, + gr.result_summary_json, + d.region, + f.file_name + FROM govdoc_runs gr + JOIN leaudit_documents d + ON d.id = gr.document_id + AND d.deleted_at IS NULL + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.file_role = 'original' + AND f.is_active = true + AND f.deleted_at IS NULL + WHERE gr.id = :run_id + AND gr.deleted_at IS NULL + LIMIT 1 + """ + ), + {"run_id": run_id}, + ) + ).mappings().first() + if not run_row: + raise RuntimeError(f"run {run_id} 不存在") + + rule_rows = ( + await session.execute( + text( + """ + SELECT + rule_id, + rule_name, + severity, + category, + result, + skip_reason, + message, + suggestion, + actual, + expected, + evidence, + paragraph_index, + paragraph_text, + location_path + FROM govdoc_rule_results + WHERE run_id = :run_id + AND deleted_at IS NULL + ORDER BY id ASC + """ + ), + {"run_id": run_id}, + ) + ).mappings().all() + + artifact_row = ( + await session.execute( + text( + """ + SELECT id, file_name, oss_url + FROM govdoc_report_artifacts + WHERE run_id = :run_id + AND artifact_type = 'html_report' + AND deleted_at IS NULL + ORDER BY id DESC + LIMIT 1 + """ + ), + {"run_id": run_id}, + ) + ).mappings().first() + if not artifact_row: + raise RuntimeError(f"run {run_id} 没有 html_report 产物记录") + + aux = _parse_json(run_row.get("result_summary_json")) or {} + findings = _build_findings(rule_rows) + result = AuditResult( + audit_id=str(run_id), + document={ + "documentId": int(run_row["document_id"]), + "filename": run_row.get("file_name") or "", + }, + summary=_build_summary(run_row, findings), + findings=findings, + checked_rules=_build_checked_rules(rule_rows), + structure=[ + StructureItem.model_validate(_normalize_structure_item(item)) + for item in aux.get("structure", []) + ], + outline=[ + OutlineNode.model_validate(_normalize_outline_node(item)) + for item in aux.get("outline", []) + ], + entities={ + name: SemanticEntity.model_validate(value) + for name, value in (aux.get("entities") or {}).items() + if value is not None + }, + ) + + html = render_html(result) + html_bytes = html.encode("utf-8") + sha256 = hashlib.sha256(html_bytes).hexdigest() + + await oss_service.UploadText( + ObjectKey=str(artifact_row["oss_url"]), + Content=html, + ContentType="text/html; charset=utf-8", + ) + + await session.execute( + text( + """ + UPDATE govdoc_report_artifacts + SET file_size = :file_size, + sha256 = :sha256, + mime_type = 'text/html; charset=utf-8', + updated_at = now() + WHERE id = :artifact_id + """ + ), + { + "artifact_id": int(artifact_row["id"]), + "file_size": len(html_bytes), + "sha256": sha256, + }, + ) + await session.commit() + + print( + json.dumps( + { + "runId": run_id, + "documentId": int(run_row["document_id"]), + "fileName": run_row.get("file_name") or "", + "artifactOssKey": artifact_row["oss_url"], + "htmlBytes": len(html_bytes), + "sha256": sha256, + }, + ensure_ascii=False, + ) + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="重生成 govdoc HTML 报告") + parser.add_argument("run_id", type=int, help="govdoc run id") + args = parser.parse_args() + asyncio.run(regenerate_html_report(args.run_id)) + + +if __name__ == "__main__": + main()