From ff6665e936cb31b0cd406e4ba0eba52ab3ca9fa4 Mon Sep 17 00:00:00 2001 From: wren <“porlong@qq.com”> Date: Mon, 18 May 2026 16:08:40 +0800 Subject: [PATCH] fix: improve govdoc reports and versioned document list --- .../govdoc_engine/reporter/html_renderer.py | 50 +- .../services/impl/govdocServiceImpl.py | 926 ++++++++++++++++-- legal-platform-frontend | 2 +- pyproject.toml | 1 + 4 files changed, 856 insertions(+), 123 deletions(-) diff --git a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py index e591029..f19f59d 100644 --- a/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py +++ b/fastapi_modules/fastapi_leaudit/govdoc_engine/reporter/html_renderer.py @@ -390,9 +390,31 @@ tbody tr:hover { """ +def _summary_score(summary: object) -> int: + for attr in ("score", "totalScore", "total_score"): + value = getattr(summary, attr, None) + if value is None and isinstance(summary, dict): + value = summary.get(attr) + if value is None: + continue + try: + return int(float(value)) + except (TypeError, ValueError): + continue + return 0 + + +def _severity_label(severity: str) -> str: + return { + "error": "错误", + "warning": "警告", + "info": "提示", + }.get(severity, severity) + + def render_html(result: AuditResult) -> str: summary = result.summary - score = int(summary.score or 0) + score = _summary_score(summary) score_pct = max(0, min(score, 100)) severity_counts = _severity_counts(result) category_count = len([key for key, value in (summary.by_category or {}).items() if key and value]) @@ -407,6 +429,8 @@ def render_html(result: AuditResult) -> str: context = escape((finding.location.context or "").strip()) message = escape(finding.message) suggestion = escape(finding.suggestion) if finding.suggestion else "按规则要求修正对应内容。" + severity = escape(finding.severity) + severity_label = escape(_severity_label(finding.severity)) rows.append( f""" @@ -416,7 +440,7 @@ def render_html(result: AuditResult) -> str:
{escape(finding.rule_id)}
{escape(finding.rule_name)}
- {escape(finding.severity)} + {severity_label} {escape(finding.category)} {location_label} @@ -441,33 +465,32 @@ def render_html(result: AuditResult) -> str:
报告摘要
-
不改报告语义,仅收敛样式、配色与信息层级
综合得分
{score}
-
这份正式 HTML 报告沿用平台工作台的版式语言,突出摘要、明细和建议三层信息。
+
综合反映本次公文审核结果,可结合下方问题明细逐项核对修正。
-
正式报告样式方向
+
审核结果

公文格式审核报告

-
{filename} · 共 {summary.total_findings} 项问题 · 用作正式 HTML 报告输出
+
{filename} · 共 {summary.total_findings} 项问题
错误项
-
{severity_counts["error"]}error
+
{severity_counts["error"]}错误
警告项
-
{severity_counts["warning"]}warning
+
{severity_counts["warning"]}警告
提示项
-
{severity_counts["info"]}info
+
{severity_counts["info"]}提示
问题类别
@@ -488,23 +511,23 @@ def render_html(result: AuditResult) -> str: @@ -513,7 +536,6 @@ def render_html(result: AuditResult) -> str:
问题明细
-
保留当前报告语义,只收敛版式、层级和配色。
全部 diff --git a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py index 5ba5d9f..2a3af9e 100644 --- a/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py +++ b/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py @@ -6,13 +6,14 @@ import hashlib import json import mimetypes import time +import uuid from dataclasses import dataclass from datetime import date, datetime from pathlib import Path from typing import Any from fastapi import UploadFile -from sqlalchemy import text +from sqlalchemy import bindparam, text from fastapi_common.fastapi_common_logger import logger from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession @@ -21,9 +22,12 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter +from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult, AuditSummary, CheckedRule, OutlineNode, StructureItem from fastapi_modules.fastapi_leaudit.govdoc_bridge.tasks import dispatch_govdoc_task from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.docx_parser import parse_docx +from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Finding, Location from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_paragraph import paragraphs_to_html +from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile from fastapi_modules.fastapi_leaudit.services import IGovdocService, IOssService from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl @@ -35,6 +39,13 @@ class _GovdocDocumentRow: region: str processingStatus: str currentRunId: int | None + versionGroupKey: str | None + versionNo: int + totalVersions: int + previousVersionId: int | None + rootVersionId: int | None + isLatestVersion: bool + normalizedName: str | None createdAt: Any updatedAt: Any fileId: int @@ -49,6 +60,11 @@ class _GovdocDocumentRow: passedCount: int | None failedCount: int | None skippedCount: int | None + findingCount: int + errorCount: int + warningCount: int + infoCount: int + rulesPath: str | None hasHtmlReport: bool hasDocxReport: bool @@ -110,8 +126,37 @@ class GovdocServiceImpl(IGovdocService): async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) + await self._backfill_missing_version_groups(session) currentUser = await self._getCurrentUserContext(createdBy) resolvedRegion = self._resolve_upload_region(currentUser, normalizedRegion) + latestCandidate = await self._find_latest_version_candidate( + session, + region=resolvedRegion, + normalizedName=normalizedName, + fileExt=fileExt, + ) + if latestCandidate and not latestCandidate.get("version_group_key"): + latestCandidate = await self._backfill_legacy_version_chain( + session, + region=resolvedRegion, + normalizedName=normalizedName, + fileExt=fileExt, + ) + previousVersionId: int | None = None + rootVersionId: int | None = None + versionGroupKey: str | None = None + versionNo = 1 + + if latestCandidate: + previousVersionId = int(latestCandidate["document_id"]) + rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"]) + versionGroupKey = str(latestCandidate["version_group_key"] or "") + versionNo = int(latestCandidate["version_no"] or 0) + 1 + previousDocument = await session.get(LeauditDocument, previousVersionId) + if previousDocument is not None: + previousDocument.isLatestVersion = False + else: + versionGroupKey = uuid.uuid4().hex document = await LeauditDocument.create_new( session, @@ -121,22 +166,23 @@ class GovdocServiceImpl(IGovdocService): region=resolvedRegion, processingStatus="waiting", currentRunId=None, - versionGroupKey=None, - versionNo=1, - previousVersionId=None, - rootVersionId=None, + versionGroupKey=versionGroupKey, + versionNo=versionNo, + previousVersionId=previousVersionId, + rootVersionId=rootVersionId, isLatestVersion=True, normalizedName=normalizedName, reviewScope="govdoc", ) - document.rootVersionId = document.Id + if document.rootVersionId is None: + document.rootVersionId = document.Id await session.flush() objectKey = OssPathUtils.BuildBusinessDocKey( Region=resolvedRegion, TypeCode="govdoc", DocumentId=document.Id, - Version="v1", + Version=f"v{document.versionNo}", FileRole="original", FileName=fileName, Year=uploadedAt.year, @@ -276,53 +322,149 @@ class GovdocServiceImpl(IGovdocService): whereClause = " AND ".join(filters) async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) + baseSelect = f""" + WITH effective_docs AS ( + SELECT + d.id AS document_id, + COALESCE(d.region, 'default') AS region, + COALESCE(d.processing_status, 'waiting') AS processing_status, + d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, + d.created_at, + d.updated_at, + f.id AS file_id, + f.file_name, + f.file_ext, + f.mime_type, + f.file_size, + f.oss_url, + f.created_by, + gr.result_status, + gr.total_score, + gr.passed_count, + gr.failed_count, + gr.skipped_count, + gr.rules_path, + COALESCE(fc.finding_count, 0) AS finding_count, + COALESCE(fc.error_count, 0) AS error_count, + COALESCE(fc.warning_count, 0) AS warning_count, + COALESCE(fc.info_count, 0) AS info_count, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'html_report' + AND gra.deleted_at IS NULL + ) AS has_html_report, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'annotated_docx' + AND gra.deleted_at IS NULL + ) AS has_docx_report + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + LEFT JOIN govdoc_runs gr + ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT + run_id, + COUNT(*) FILTER (WHERE result = 'fail') AS finding_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'error') AS error_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'warning') AS warning_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'info') AS info_count + FROM govdoc_rule_results + WHERE deleted_at IS NULL + GROUP BY run_id + ) fc + ON fc.run_id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id + WHERE {whereClause} + ) + """ + rows = ( await session.execute( text( f""" - SELECT - d.id AS document_id, - COALESCE(d.region, 'default') AS region, - COALESCE(d.processing_status, 'waiting') AS processing_status, - d.current_run_id, - d.created_at, - d.updated_at, - f.id AS file_id, - f.file_name, - f.file_ext, - f.mime_type, - f.file_size, - f.oss_url, - f.created_by, - gr.result_status, - gr.total_score, - gr.passed_count, - gr.failed_count, - gr.skipped_count, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'html_report' - AND gra.deleted_at IS NULL - ) AS has_html_report, - EXISTS( - SELECT 1 - FROM govdoc_report_artifacts gra - WHERE gra.run_id = d.current_run_id - AND gra.artifact_type = 'annotated_docx' - AND gra.deleted_at IS NULL - ) AS has_docx_report - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - LEFT JOIN govdoc_runs gr - ON gr.id = d.current_run_id - WHERE {whereClause} - ORDER BY d.created_at DESC + {baseSelect} + SELECT * + FROM effective_docs + WHERE is_latest_version = true + ORDER BY created_at DESC, document_id DESC LIMIT :limit OFFSET :offset """ ), @@ -335,16 +477,10 @@ class GovdocServiceImpl(IGovdocService): await session.execute( text( f""" + {baseSelect} SELECT COUNT(1) - FROM leaudit_documents d - JOIN leaudit_document_files f - ON f.document_id = d.id - AND f.is_active = true - AND f.file_role = 'original' - AND f.deleted_at IS NULL - LEFT JOIN govdoc_runs gr - ON gr.id = d.current_run_id - WHERE {whereClause} + FROM effective_docs + WHERE is_latest_version = true """ ), params, @@ -352,44 +488,46 @@ class GovdocServiceImpl(IGovdocService): ).scalar_one() ) + historyRowsByGroup: dict[str, list[dict[str, Any]]] = {} + versionGroupKeys = [ + str(row["version_group_key"]) + for row in rows + if row.get("version_group_key") and int(row.get("total_versions") or 1) > 1 + ] + if versionGroupKeys: + historyRows = ( + await session.execute( + text( + f""" + {baseSelect} + SELECT * + FROM effective_docs + WHERE version_group_key IN :version_group_keys + AND is_latest_version = false + ORDER BY created_at DESC, document_id DESC + """ + ).bindparams(bindparam("version_group_keys", expanding=True)), + {"version_group_keys": versionGroupKeys, **params}, + ) + ).mappings().all() + for historyRow in historyRows: + groupKey = str(historyRow.get("version_group_key") or "") + if not groupKey: + continue + historyRowsByGroup.setdefault(groupKey, []).append(dict(historyRow)) + items = [] for row in rows: mapped = self._map_document_row(row) - summary = self._build_summary_payload( - mapped.totalScore, - mapped.passedCount, - mapped.failedCount, - mapped.skippedCount, - ) - items.append( - { - "documentId": mapped.documentId, - "fileId": mapped.fileId, - "fileName": mapped.fileName, - "fileExt": mapped.fileExt, - "mimeType": mapped.mimeType, - "fileSize": mapped.fileSize, - "region": mapped.region, - "processingStatus": mapped.processingStatus, - "currentRunId": mapped.currentRunId, - "latestRunId": mapped.currentRunId, - "resultStatus": mapped.resultStatus, - "score": float(mapped.totalScore) if mapped.totalScore is not None else None, - "passedCount": mapped.passedCount or 0, - "failedCount": mapped.failedCount or 0, - "skippedCount": mapped.skippedCount or 0, - "latestRun": { - "runId": mapped.currentRunId, - "summary": summary, - } if mapped.currentRunId else None, - "reports": { - "hasHtmlReport": mapped.hasHtmlReport, - "hasDocxReport": mapped.hasDocxReport, - }, - "createdAt": self._iso(mapped.createdAt), - "updatedAt": self._iso(mapped.updatedAt), - } - ) + item = await self._build_document_list_item(mapped) + groupKey = mapped.versionGroupKey or "" + historyItems = [ + await self._build_document_list_item(self._map_document_row(historyRow)) + for historyRow in historyRowsByGroup.get(groupKey, []) + ] + item["historyCount"] = len(historyItems) + item["historyVersions"] = historyItems + items.append(item) return {"items": items, "total": total, "page": page, "pageSize": pageSize} @@ -420,6 +558,7 @@ class GovdocServiceImpl(IGovdocService): async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -429,6 +568,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -442,7 +605,26 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + gr.rules_path, + COALESCE(fc.finding_count, 0) AS finding_count, + COALESCE(fc.error_count, 0) AS error_count, + COALESCE(fc.warning_count, 0) AS warning_count, + COALESCE(fc.info_count, 0) AS info_count, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'html_report' + AND gra.deleted_at IS NULL + ) AS has_html_report, + EXISTS( + SELECT 1 + FROM govdoc_report_artifacts gra + WHERE gra.run_id = d.current_run_id + AND gra.artifact_type = 'annotated_docx' + AND gra.deleted_at IS NULL + ) AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -451,6 +633,63 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT + run_id, + COUNT(*) FILTER (WHERE result = 'fail') AS finding_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'error') AS error_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'warning') AS warning_count, + COUNT(*) FILTER (WHERE result = 'fail' AND severity = 'info') AS info_count + FROM govdoc_rule_results + WHERE deleted_at IS NULL + GROUP BY run_id + ) fc + ON fc.run_id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE {whereClause} LIMIT 1 """ @@ -529,6 +768,12 @@ class GovdocServiceImpl(IGovdocService): "fileSize": mapped.fileSize, "region": mapped.region, "processingStatus": mapped.processingStatus, + "versionGroupKey": mapped.versionGroupKey, + "versionNo": mapped.versionNo, + "totalVersions": mapped.totalVersions, + "previousVersionId": mapped.previousVersionId, + "rootVersionId": mapped.rootVersionId, + "isLatestVersion": mapped.isLatestVersion, "createdAt": self._iso(mapped.createdAt), "updatedAt": self._iso(mapped.updatedAt), }, @@ -914,14 +1159,9 @@ class GovdocServiceImpl(IGovdocService): return {"runId": runId, "outline": result.get("outline", [])} async def GetReportHtml(self, runId: int) -> dict[str, Any]: - artifact = await self._get_report_artifact(runId, "html_report") - if not artifact: - return {"runId": runId, "htmlUrl": ""} - content = await self.OssService.DownloadBytes(str(artifact["oss_url"])) - return { - "runId": runId, - "html": content.decode("utf-8"), - } + result = await self.GetRunResult(runId) + html = render_html(self._build_audit_result_from_run_result(result)) + return {"runId": runId, "html": html} async def GetReportDocx(self, runId: int) -> dict[str, Any]: artifact = await self._get_report_artifact(runId, "annotated_docx") @@ -1135,6 +1375,7 @@ class GovdocServiceImpl(IGovdocService): async def _getCurrentUserContext(self, CurrentUserId: int) -> dict[str, Any]: async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -1252,6 +1493,7 @@ class GovdocServiceImpl(IGovdocService): ) whereClause = " AND ".join(filters) async with GetAsyncSession() as session: + await self._backfill_missing_version_groups(session) row = ( await session.execute( text( @@ -1261,6 +1503,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -1274,7 +1540,14 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + NULL::VARCHAR AS rules_path, + 0 AS finding_count, + 0 AS error_count, + 0 AS warning_count, + 0 AS info_count, + false AS has_html_report, + false AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -1283,6 +1556,51 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE {whereClause} LIMIT 1 """ @@ -1305,6 +1623,30 @@ class GovdocServiceImpl(IGovdocService): COALESCE(d.region, 'default') AS region, COALESCE(d.processing_status, 'waiting') AS processing_status, d.current_run_id, + COALESCE(NULLIF(d.version_group_key, ''), fallback_vc.derived_version_group_key, '') AS version_group_key, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_version_no, 1) + ELSE COALESCE(NULLIF(d.version_no, 0), 1) + END AS version_no, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN fallback_vc.derived_previous_version_id + ELSE d.previous_version_id + END AS previous_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_root_version_id, d.id) + ELSE COALESCE(d.root_version_id, d.id) + END AS root_version_id, + CASE + WHEN COALESCE(d.version_group_key, '') = '' + THEN COALESCE(fallback_vc.derived_is_latest_version, COALESCE(d.is_latest_version, true)) + ELSE COALESCE(d.is_latest_version, true) + END AS is_latest_version, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(vc.total_versions, fallback_vc.total_versions, 1) AS total_versions, + fallback_vc.derived_version_no, d.created_at, d.updated_at, f.id AS file_id, @@ -1318,7 +1660,14 @@ class GovdocServiceImpl(IGovdocService): gr.total_score, gr.passed_count, gr.failed_count, - gr.skipped_count + gr.skipped_count, + NULL::VARCHAR AS rules_path, + 0 AS finding_count, + 0 AS error_count, + 0 AS warning_count, + 0 AS info_count, + false AS has_html_report, + false AS has_docx_report FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id @@ -1327,6 +1676,51 @@ class GovdocServiceImpl(IGovdocService): AND f.deleted_at IS NULL LEFT JOIN govdoc_runs gr ON gr.id = d.current_run_id + LEFT JOIN ( + SELECT version_group_key, COUNT(*) AS total_versions + FROM leaudit_documents + WHERE deleted_at IS NULL + AND COALESCE(version_group_key, '') <> '' + GROUP BY version_group_key + ) vc + ON vc.version_group_key = d.version_group_key + LEFT JOIN ( + SELECT + d2.id AS document_id, + COUNT(*) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ) AS total_versions, + ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_version_no, + LAG(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_previous_version_id, + FIRST_VALUE(d2.id) OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at ASC, d2.id ASC + ) AS derived_root_version_id, + CASE + WHEN ROW_NUMBER() OVER ( + PARTITION BY d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, '') + ORDER BY d2.created_at DESC, d2.id DESC + ) = 1 THEN true + ELSE false + END AS derived_is_latest_version, + md5(CONCAT_WS('|', d2.region, COALESCE(d2.normalized_name, ''), COALESCE(f2.file_ext, ''))) AS derived_version_group_key + FROM leaudit_documents d2 + JOIN leaudit_document_files f2 + ON f2.document_id = d2.id + AND f2.is_active = true + AND f2.file_role = 'original' + AND f2.deleted_at IS NULL + WHERE d2.deleted_at IS NULL + AND d2.review_scope = 'govdoc' + AND COALESCE(d2.engine_type, 'leaudit') = 'govdoc' + ) fallback_vc + ON fallback_vc.document_id = d.id WHERE d.id = :document_id AND d.deleted_at IS NULL AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' @@ -1412,12 +1806,15 @@ class GovdocServiceImpl(IGovdocService): passedCount: Any, failedCount: Any, skippedCount: Any, + totalFindings: Any | None = None, + bySeverity: dict[str, int] | None = None, + byCategory: dict[str, int] | None = None, ) -> dict[str, Any]: return { "score": float(totalScore or 0), - "total_findings": int(failedCount or 0), - "by_severity": {}, - "by_category": {}, + "total_findings": int(totalFindings if totalFindings is not None else (failedCount or 0)), + "by_severity": bySeverity or {}, + "by_category": byCategory or {}, "passed_count": int(passedCount or 0), "failed_count": int(failedCount or 0), "skipped_count": int(skippedCount or 0), @@ -1463,11 +1860,22 @@ class GovdocServiceImpl(IGovdocService): return grouped def _map_document_row(self, row: Any) -> _GovdocDocumentRow: + versionNoValue = row.get("derived_version_no") + if versionNoValue is None: + versionNoValue = row.get("version_no") + return _GovdocDocumentRow( documentId=int(row["document_id"]), region=str(row["region"] or "default"), processingStatus=str(row["processing_status"] or "waiting"), currentRunId=int(row["current_run_id"]) if row.get("current_run_id") is not None else None, + versionGroupKey=str(row["version_group_key"]) if row.get("version_group_key") else None, + versionNo=int(versionNoValue or 1), + totalVersions=int(row.get("total_versions") or 1), + previousVersionId=int(row["previous_version_id"]) if row.get("previous_version_id") is not None else None, + rootVersionId=int(row["root_version_id"]) if row.get("root_version_id") is not None else None, + isLatestVersion=bool(row.get("is_latest_version", True)), + normalizedName=str(row["normalized_name"]) if row.get("normalized_name") else None, createdAt=row.get("created_at"), updatedAt=row.get("updated_at"), fileId=int(row["file_id"]), @@ -1482,10 +1890,292 @@ class GovdocServiceImpl(IGovdocService): passedCount=int(row["passed_count"]) if row.get("passed_count") is not None else None, failedCount=int(row["failed_count"]) if row.get("failed_count") is not None else None, skippedCount=int(row["skipped_count"]) if row.get("skipped_count") is not None else None, + findingCount=int(row.get("finding_count") or 0), + errorCount=int(row.get("error_count") or 0), + warningCount=int(row.get("warning_count") or 0), + infoCount=int(row.get("info_count") or 0), + rulesPath=str(row["rules_path"]) if row.get("rules_path") else None, hasHtmlReport=bool(row.get("has_html_report")), hasDocxReport=bool(row.get("has_docx_report")), ) + async def _build_document_list_item(self, mapped: _GovdocDocumentRow) -> dict[str, Any]: + summary = self._build_summary_payload( + mapped.totalScore, + mapped.passedCount, + mapped.failedCount, + mapped.skippedCount, + totalFindings=mapped.findingCount, + bySeverity={ + "error": mapped.errorCount, + "warning": mapped.warningCount, + "info": mapped.infoCount, + }, + ) + rulesetMeta = await self._resolve_ruleset_metadata(mapped.rulesPath) + return { + "documentId": mapped.documentId, + "fileId": mapped.fileId, + "fileName": mapped.fileName, + "fileExt": mapped.fileExt, + "mimeType": mapped.mimeType, + "fileSize": mapped.fileSize, + "region": mapped.region, + "processingStatus": mapped.processingStatus, + "currentRunId": mapped.currentRunId, + "latestRunId": mapped.currentRunId, + "resultStatus": mapped.resultStatus, + "score": float(mapped.totalScore) if mapped.totalScore is not None else None, + "versionGroupKey": mapped.versionGroupKey, + "versionNo": mapped.versionNo, + "totalVersions": mapped.totalVersions, + "previousVersionId": mapped.previousVersionId, + "rootVersionId": mapped.rootVersionId, + "isLatestVersion": mapped.isLatestVersion, + "rulesetId": rulesetMeta["typeId"], + "rulesetName": rulesetMeta["name"], + "rulesetVersion": rulesetMeta["version"], + "passedCount": mapped.passedCount or 0, + "failedCount": mapped.failedCount or 0, + "skippedCount": mapped.skippedCount or 0, + "latestRun": { + "runId": mapped.currentRunId, + "summary": summary, + } if mapped.currentRunId else None, + "reports": { + "hasHtmlReport": mapped.hasHtmlReport, + "hasDocxReport": mapped.hasDocxReport, + }, + "createdAt": self._iso(mapped.createdAt), + "updatedAt": self._iso(mapped.updatedAt), + } + + async def _find_latest_version_candidate( + self, + session, + *, + region: str, + normalizedName: str, + fileExt: str | None, + ) -> dict[str, Any] | None: + extClause = "" + params: dict[str, Any] = { + "region": region, + "normalized_name": normalizedName, + } + if fileExt: + extClause = " AND f.file_ext = :file_ext" + params["file_ext"] = fileExt + + row = ( + await session.execute( + text( + f""" + SELECT + d.id AS document_id, + d.version_group_key, + d.version_no, + d.root_version_id + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + AND d.region = :region + AND COALESCE(d.normalized_name, '') = :normalized_name + AND COALESCE(d.is_latest_version, true) = true{extClause} + ORDER BY d.version_no DESC, d.id DESC + LIMIT 1 + """ + ), + params, + ) + ).mappings().first() + return dict(row) if row else None + + async def _backfill_legacy_version_chain( + self, + session, + *, + region: str, + normalizedName: str, + fileExt: str | None, + ) -> dict[str, Any] | None: + extClause = "" + params: dict[str, Any] = { + "region": region, + "normalized_name": normalizedName, + } + if fileExt: + extClause = " AND f.file_ext = :file_ext" + params["file_ext"] = fileExt + + rows = ( + await session.execute( + text( + f""" + SELECT d.id AS document_id + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + AND d.region = :region + AND COALESCE(d.normalized_name, '') = :normalized_name{extClause} + ORDER BY d.created_at ASC, d.id ASC + """ + ), + params, + ) + ).mappings().all() + + if not rows: + return None + + groupKey = uuid.uuid4().hex + rootId = int(rows[0]["document_id"]) + previousId: int | None = None + + for index, row in enumerate(rows, start=1): + documentId = int(row["document_id"]) + isLatest = index == len(rows) + await session.execute( + text( + """ + UPDATE leaudit_documents + SET version_group_key = :version_group_key, + version_no = :version_no, + previous_version_id = :previous_version_id, + root_version_id = :root_version_id, + is_latest_version = :is_latest_version, + updated_at = NOW() + WHERE id = :document_id + """ + ), + { + "version_group_key": groupKey, + "version_no": index, + "previous_version_id": previousId, + "root_version_id": rootId, + "is_latest_version": isLatest, + "document_id": documentId, + }, + ) + previousId = documentId + + latestId = int(rows[-1]["document_id"]) + return { + "document_id": latestId, + "version_group_key": groupKey, + "version_no": len(rows), + "root_version_id": rootId, + } + + async def _backfill_missing_version_groups(self, session) -> None: + groups = ( + await session.execute( + text( + """ + SELECT + d.region, + COALESCE(d.normalized_name, '') AS normalized_name, + COALESCE(f.file_ext, '') AS file_ext, + ARRAY_AGG(d.id ORDER BY d.created_at ASC, d.id ASC) AS document_ids, + MIN(NULLIF(d.version_group_key, '')) AS existing_version_group_key + FROM leaudit_documents d + JOIN leaudit_document_files f + ON f.document_id = d.id + AND f.is_active = true + AND f.file_role = 'original' + AND f.deleted_at IS NULL + WHERE d.deleted_at IS NULL + AND d.review_scope = 'govdoc' + AND COALESCE(d.engine_type, 'leaudit') = 'govdoc' + GROUP BY d.region, COALESCE(d.normalized_name, ''), COALESCE(f.file_ext, '') + HAVING BOOL_OR(COALESCE(d.version_group_key, '') = '') + """ + ) + ) + ).mappings().all() + + if not groups: + return + + for group in groups: + region = str(group["region"] or "default") + normalizedName = str(group["normalized_name"] or "") + fileExt = str(group["file_ext"] or "") + documentIds = [int(value) for value in (group["document_ids"] or [])] + if not documentIds: + continue + + versionGroupKey = str(group["existing_version_group_key"] or "").strip() or self._derive_version_group_key( + region=region, + normalizedName=normalizedName, + fileExt=fileExt or None, + ) + rootId = documentIds[0] + previousId: int | None = None + + for index, documentId in enumerate(documentIds, start=1): + isLatest = index == len(documentIds) + await session.execute( + text( + """ + UPDATE leaudit_documents + SET version_group_key = :version_group_key, + version_no = :version_no, + previous_version_id = :previous_version_id, + root_version_id = :root_version_id, + is_latest_version = :is_latest_version, + updated_at = NOW() + WHERE id = :document_id + """ + ), + { + "version_group_key": versionGroupKey, + "version_no": index, + "previous_version_id": previousId, + "root_version_id": rootId, + "is_latest_version": isLatest, + "document_id": documentId, + }, + ) + previousId = documentId + + await session.commit() + + def _derive_version_group_key(self, *, region: str, normalizedName: str, fileExt: str | None) -> str: + raw = f"{region}|{normalizedName}|{fileExt or ''}" + return hashlib.md5(raw.encode("utf-8")).hexdigest() + + async def _resolve_ruleset_metadata(self, rulesPath: str | None) -> dict[str, str]: + ruleset = await self._load_ruleset(rulesPath) + if ruleset is not None: + return { + "typeId": str(ruleset.metadata.type_id or ""), + "name": str(ruleset.metadata.name or ""), + "version": str(ruleset.metadata.version or ""), + } + + resolved = await self._resolve_rules_path(rulesPath) + if not resolved: + return {"typeId": "", "name": "", "version": ""} + path = Path(resolved) + return { + "typeId": path.stem, + "name": path.parent.name, + "version": "", + } + async def _get_report_artifact(self, runId: int, artifactType: str) -> Any | None: async with GetAsyncSession() as session: await self._ensureGovdocSchema(session) @@ -1523,3 +2213,23 @@ class GovdocServiceImpl(IGovdocService): if isinstance(value, datetime): return value.isoformat() return str(value) + + def _build_audit_result_from_run_result(self, payload: dict[str, Any]) -> AuditResult: + summaryPayload = payload.get("summary") or {} + findingsPayload = payload.get("findings") or [] + checkedRulesPayload = payload.get("checkedRules") or [] + structurePayload = payload.get("structure") or [] + outlinePayload = payload.get("outline") or [] + entitiesPayload = payload.get("entities") or {} + documentPayload = payload.get("document") or {} + + return AuditResult( + audit_id=str(payload.get("runId") or ""), + document=documentPayload, + summary=AuditSummary.model_validate(summaryPayload), + findings=[Finding.model_validate(item) for item in findingsPayload], + checked_rules=[CheckedRule.model_validate(item) for item in checkedRulesPayload], + structure=[StructureItem.model_validate(item) for item in structurePayload], + outline=[OutlineNode.model_validate(item) for item in outlinePayload], + entities=entitiesPayload, + ) diff --git a/legal-platform-frontend b/legal-platform-frontend index d541eb7..c41ddc8 160000 --- a/legal-platform-frontend +++ b/legal-platform-frontend @@ -1 +1 @@ -Subproject commit d541eb74aee5d6668ac3bd6720db4a3b0eeb896e +Subproject commit c41ddc844ce04ad2ae5ee2679cc2155cc25d44bf diff --git a/pyproject.toml b/pyproject.toml index 1c79224..fbc3d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "tomli>=2.2.0", "python-multipart>=0.0.18", "pyjwt>=2.10.0", + "openai>=1.30.0", "pillow>=11.0.0", "pyyaml>=6.0", "minio>=7.2.8",