feat: integrate govdoc platform updates
This commit is contained in:
@@ -34,6 +34,7 @@ class RagMessageItemVO(BaseModel):
|
||||
answer: str = Field(...)
|
||||
feedback: dict | None = Field(None)
|
||||
retrieverResources: list[dict] | None = Field(None)
|
||||
suggestedQuestions: list[str] = Field(default_factory=list)
|
||||
createdAt: int = Field(0)
|
||||
|
||||
|
||||
|
||||
@@ -1,76 +1,594 @@
|
||||
"""把 AuditResult 渲染成单文件 HTML 报告。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from html import escape
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult
|
||||
|
||||
|
||||
_CSS = """
|
||||
body { font-family: -apple-system, "PingFang SC", sans-serif; margin: 0; padding: 24px;
|
||||
background: #f7f7f9; color: #1a1a1a; }
|
||||
.header { display: flex; align-items: center; gap: 16px; margin-bottom: 24px; }
|
||||
.score { width: 96px; height: 96px; border-radius: 50%;
|
||||
background: conic-gradient(#22c55e var(--p), #e5e7eb var(--p));
|
||||
display: grid; place-items: center; font-weight: 700; font-size: 22px; color: #111; }
|
||||
.score-inner { background: white; width: 76px; height: 76px; border-radius: 50%;
|
||||
display: grid; place-items: center; }
|
||||
.tag { padding: 2px 8px; border-radius: 999px; font-size: 12px; }
|
||||
.error { background: #fee2e2; color: #b91c1c; }
|
||||
.warning { background: #fef9c3; color: #a16207; }
|
||||
.info { background: #dbeafe; color: #1d4ed8; }
|
||||
table { width: 100%; border-collapse: collapse; background: white; border-radius: 8px;
|
||||
overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); }
|
||||
th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid #f1f5f9; vertical-align: top; }
|
||||
th { background: #f8fafc; font-size: 13px; }
|
||||
td.msg { max-width: 480px; }
|
||||
.context { color: #64748b; font-size: 12px; margin-top: 4px; }
|
||||
* { box-sizing: border-box; }
|
||||
html, body { margin: 0; padding: 0; }
|
||||
body {
|
||||
font-family: -apple-system, "PingFang SC", "Microsoft YaHei", sans-serif;
|
||||
background: #f3f6f5;
|
||||
color: #0f172a;
|
||||
}
|
||||
a { color: inherit; }
|
||||
.page {
|
||||
width: 100%;
|
||||
padding: 20px 24px 32px;
|
||||
}
|
||||
.stack {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 20px;
|
||||
}
|
||||
.card {
|
||||
background: #ffffff;
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 1px 3px rgba(15, 23, 42, 0.08);
|
||||
overflow: hidden;
|
||||
}
|
||||
.card-head {
|
||||
height: 48px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
padding: 0 20px;
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
background: #fcfdfd;
|
||||
}
|
||||
.card-title {
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
color: #1e293b;
|
||||
}
|
||||
.card-subtitle {
|
||||
font-size: 12px;
|
||||
color: #64748b;
|
||||
}
|
||||
.summary-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 220px minmax(0, 1fr);
|
||||
gap: 20px;
|
||||
padding: 20px;
|
||||
}
|
||||
.score-box {
|
||||
border: 1px solid #cfe4dc;
|
||||
background: #f7fbf9;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
}
|
||||
.score-label {
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: #475569;
|
||||
}
|
||||
.score-value {
|
||||
margin-top: 12px;
|
||||
font-size: 42px;
|
||||
line-height: 1;
|
||||
font-weight: 600;
|
||||
letter-spacing: -0.05em;
|
||||
color: #0f172a;
|
||||
}
|
||||
.score-track {
|
||||
margin-top: 16px;
|
||||
height: 8px;
|
||||
background: #dbe8e3;
|
||||
border-radius: 999px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.score-fill {
|
||||
height: 100%;
|
||||
background: #00684a;
|
||||
}
|
||||
.score-note {
|
||||
margin-top: 16px;
|
||||
font-size: 12px;
|
||||
line-height: 1.75;
|
||||
color: #475569;
|
||||
}
|
||||
.summary-main {
|
||||
min-width: 0;
|
||||
}
|
||||
.eyebrow {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
height: 28px;
|
||||
padding: 0 12px;
|
||||
border: 1px solid #cfe4dc;
|
||||
border-radius: 6px;
|
||||
background: #e8f3ef;
|
||||
color: #00684a;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
}
|
||||
.report-title {
|
||||
margin: 12px 0 0;
|
||||
font-size: 32px;
|
||||
line-height: 1.25;
|
||||
letter-spacing: -0.03em;
|
||||
font-weight: 600;
|
||||
color: #0f172a;
|
||||
}
|
||||
.report-meta {
|
||||
margin-top: 8px;
|
||||
font-size: 15px;
|
||||
color: #475569;
|
||||
}
|
||||
.metrics {
|
||||
margin-top: 20px;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, minmax(0, 1fr));
|
||||
gap: 16px;
|
||||
}
|
||||
.metric {
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 10px;
|
||||
background: #fcfdfd;
|
||||
padding: 16px 20px;
|
||||
}
|
||||
.metric-label {
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
color: #64748b;
|
||||
}
|
||||
.metric-value {
|
||||
margin-top: 12px;
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 8px;
|
||||
}
|
||||
.metric-value strong {
|
||||
font-size: 30px;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
font-weight: 600;
|
||||
color: #0f172a;
|
||||
}
|
||||
.metric-value span {
|
||||
font-size: 13px;
|
||||
color: #64748b;
|
||||
}
|
||||
.chips {
|
||||
margin-top: 20px;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 12px;
|
||||
}
|
||||
.chip,
|
||||
.severity-tag {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
border: 1px solid transparent;
|
||||
border-radius: 6px;
|
||||
font-weight: 600;
|
||||
}
|
||||
.chip {
|
||||
height: 32px;
|
||||
padding: 0 12px;
|
||||
font-size: 12px;
|
||||
}
|
||||
.severity-tag {
|
||||
height: 32px;
|
||||
padding: 0 12px;
|
||||
font-size: 12px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.error {
|
||||
border-color: #fecaca;
|
||||
background: #fef2f2;
|
||||
color: #b91c1c;
|
||||
}
|
||||
.warning {
|
||||
border-color: #fde68a;
|
||||
background: #fffbeb;
|
||||
color: #b45309;
|
||||
}
|
||||
.info {
|
||||
border-color: #bfdbfe;
|
||||
background: #eff6ff;
|
||||
color: #1d4ed8;
|
||||
}
|
||||
.content-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 340px minmax(0, 1fr);
|
||||
gap: 20px;
|
||||
}
|
||||
.sidebar-body {
|
||||
padding: 16px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 16px;
|
||||
}
|
||||
.summary-row {
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 10px;
|
||||
background: #fcfdfd;
|
||||
padding: 16px;
|
||||
}
|
||||
.summary-row-label {
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: #64748b;
|
||||
}
|
||||
.summary-row-value {
|
||||
margin-top: 8px;
|
||||
font-size: 22px;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.03em;
|
||||
font-weight: 600;
|
||||
color: #0f172a;
|
||||
}
|
||||
.summary-row-desc {
|
||||
margin-top: 12px;
|
||||
font-size: 13px;
|
||||
line-height: 1.75;
|
||||
color: #475569;
|
||||
}
|
||||
.table-toolbar {
|
||||
height: 48px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
padding: 0 20px;
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
background: #fcfdfd;
|
||||
}
|
||||
.toolbar-left {
|
||||
min-width: 0;
|
||||
}
|
||||
.toolbar-title {
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
color: #1e293b;
|
||||
}
|
||||
.toolbar-desc {
|
||||
margin-top: 2px;
|
||||
font-size: 12px;
|
||||
color: #64748b;
|
||||
}
|
||||
.toolbar-filters {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
.filter {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
height: 32px;
|
||||
padding: 0 12px;
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 6px;
|
||||
background: #ffffff;
|
||||
color: #64748b;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
}
|
||||
.filter.active {
|
||||
border-color: rgba(0, 104, 74, 0.2);
|
||||
background: #e8f3ef;
|
||||
color: #00684a;
|
||||
}
|
||||
.table-wrap {
|
||||
overflow-x: auto;
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
min-width: 1320px;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
thead tr {
|
||||
background: #f8fafc;
|
||||
color: #475569;
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
}
|
||||
th {
|
||||
padding: 16px 20px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
white-space: nowrap;
|
||||
}
|
||||
td {
|
||||
padding: 20px;
|
||||
vertical-align: top;
|
||||
border-bottom: 1px solid #f1f5f9;
|
||||
}
|
||||
tbody tr:hover {
|
||||
background: #f8fafc;
|
||||
}
|
||||
.mono {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||
}
|
||||
.id-cell {
|
||||
font-size: 13px;
|
||||
color: #64748b;
|
||||
}
|
||||
.rule-id {
|
||||
font-size: 15px;
|
||||
font-weight: 600;
|
||||
color: #1e293b;
|
||||
}
|
||||
.rule-name {
|
||||
margin-top: 4px;
|
||||
font-size: 13px;
|
||||
color: #64748b;
|
||||
}
|
||||
.category-cell {
|
||||
font-size: 14px;
|
||||
color: #334155;
|
||||
}
|
||||
.location-cell {
|
||||
font-size: 13px;
|
||||
color: #334155;
|
||||
}
|
||||
.message-cell {
|
||||
min-width: 560px;
|
||||
}
|
||||
.message-main {
|
||||
font-size: 15px;
|
||||
line-height: 1.8;
|
||||
color: #0f172a;
|
||||
}
|
||||
.context-box,
|
||||
.suggestion-box {
|
||||
margin-top: 12px;
|
||||
border-radius: 6px;
|
||||
padding: 12px 16px;
|
||||
font-size: 13px;
|
||||
line-height: 1.8;
|
||||
}
|
||||
.context-box {
|
||||
border: 1px solid #e2e8f0;
|
||||
background: #f8fafc;
|
||||
color: #475569;
|
||||
}
|
||||
.suggestion-box {
|
||||
border: 1px solid #cfe4dc;
|
||||
background: #f4faf7;
|
||||
color: #0d6b4d;
|
||||
}
|
||||
.empty {
|
||||
padding: 24px 20px;
|
||||
text-align: center;
|
||||
color: #64748b;
|
||||
font-size: 14px;
|
||||
}
|
||||
@media (max-width: 1200px) {
|
||||
.summary-grid,
|
||||
.content-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.metrics {
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
}
|
||||
}
|
||||
@media (max-width: 720px) {
|
||||
.page {
|
||||
padding: 16px;
|
||||
}
|
||||
.metrics {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.table-toolbar,
|
||||
.card-head {
|
||||
height: auto;
|
||||
min-height: 48px;
|
||||
padding-top: 12px;
|
||||
padding-bottom: 12px;
|
||||
align-items: flex-start;
|
||||
flex-direction: column;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def render_html(result: AuditResult) -> str:
|
||||
s = result.summary
|
||||
score = s.score
|
||||
pct = f"{score}%"
|
||||
rows = []
|
||||
for f in result.findings:
|
||||
loc = f.location
|
||||
suggest = (
|
||||
f'<div style="color:#0369a1">建议: {escape(f.suggestion)}</div>'
|
||||
if f.suggestion else ""
|
||||
)
|
||||
rows.append(f"""
|
||||
<tr>
|
||||
<td>{escape(f.finding_id)}</td>
|
||||
<td>{escape(f.rule_id)}<br><span style="color:#64748b;font-size:12px">{escape(f.rule_name)}</span></td>
|
||||
<td><span class="tag {f.severity}">{f.severity}</span></td>
|
||||
<td>{escape(f.category)}</td>
|
||||
<td>P{loc.paragraph_index} ({escape(loc.role or '')})</td>
|
||||
<td class="msg">{escape(f.message)}
|
||||
<div class="context">原文: {escape((loc.context or '')[:80])}</div>
|
||||
{suggest}
|
||||
</td>
|
||||
</tr>""")
|
||||
summary = result.summary
|
||||
score = int(summary.score or 0)
|
||||
score_pct = max(0, min(score, 100))
|
||||
severity_counts = _severity_counts(result)
|
||||
category_count = len([key for key, value in (summary.by_category or {}).items() if key and value])
|
||||
filename = escape(str(result.document.get("filename", "")))
|
||||
top_rule_id, top_rule_count = _top_rule(result)
|
||||
line_range = _line_range(result)
|
||||
entity_summary = _entity_summary(result)
|
||||
|
||||
body = f"""<!doctype html>
|
||||
<html lang="zh"><head><meta charset="utf-8"><title>公文审核报告</title>
|
||||
<style>{_CSS}</style></head><body>
|
||||
<div class="header">
|
||||
<div class="score" style="--p:{pct}"><div class="score-inner">{score}</div></div>
|
||||
<div>
|
||||
<h1 style="margin:0">公文格式审核报告</h1>
|
||||
<div style="color:#64748b">{escape(result.document.get('filename', ''))} · 共 {s.total_findings} 项</div>
|
||||
<div style="margin-top:6px">
|
||||
<span class="tag error">错误 {s.by_severity.get('error', 0)}</span>
|
||||
<span class="tag warning">警告 {s.by_severity.get('warning', 0)}</span>
|
||||
<span class="tag info">提示 {s.by_severity.get('info', 0)}</span>
|
||||
rows = []
|
||||
for finding in result.findings:
|
||||
location_label = _format_location(finding.location.paragraph_index)
|
||||
context = escape((finding.location.context or "").strip())
|
||||
message = escape(finding.message)
|
||||
suggestion = escape(finding.suggestion) if finding.suggestion else "按规则要求修正对应内容。"
|
||||
|
||||
rows.append(
|
||||
f"""
|
||||
<tr>
|
||||
<td class="mono id-cell">{escape(finding.finding_id)}</td>
|
||||
<td>
|
||||
<div class="rule-id">{escape(finding.rule_id)}</div>
|
||||
<div class="rule-name">{escape(finding.rule_name)}</div>
|
||||
</td>
|
||||
<td><span class="severity-tag {escape(finding.severity)}">{escape(finding.severity)}</span></td>
|
||||
<td class="category-cell">{escape(finding.category)}</td>
|
||||
<td class="mono location-cell">{location_label}</td>
|
||||
<td class="message-cell">
|
||||
<div class="message-main">{message}</div>
|
||||
<div class="context-box">原文:{context or "未提取到上下文"}</div>
|
||||
<div class="suggestion-box">建议:{suggestion}</div>
|
||||
</td>
|
||||
</tr>"""
|
||||
)
|
||||
|
||||
return f"""<!doctype html>
|
||||
<html lang="zh">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>公文审核报告</title>
|
||||
<style>{_CSS}</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="page">
|
||||
<div class="stack">
|
||||
<section class="card">
|
||||
<div class="card-head">
|
||||
<div class="card-title">报告摘要</div>
|
||||
<div class="card-subtitle">不改报告语义,仅收敛样式、配色与信息层级</div>
|
||||
</div>
|
||||
<div class="summary-grid">
|
||||
<div class="score-box">
|
||||
<div class="score-label">综合得分</div>
|
||||
<div class="score-value">{score}</div>
|
||||
<div class="score-track"><div class="score-fill" style="width:{score_pct}%"></div></div>
|
||||
<div class="score-note">这份正式 HTML 报告沿用平台工作台的版式语言,突出摘要、明细和建议三层信息。</div>
|
||||
</div>
|
||||
|
||||
<div class="summary-main">
|
||||
<div class="eyebrow">正式报告样式方向</div>
|
||||
<h1 class="report-title">公文格式审核报告</h1>
|
||||
<div class="report-meta">{filename} · 共 {summary.total_findings} 项问题 · 用作正式 HTML 报告输出</div>
|
||||
|
||||
<div class="metrics">
|
||||
<div class="metric">
|
||||
<div class="metric-label">错误项</div>
|
||||
<div class="metric-value"><strong>{severity_counts["error"]}</strong><span>error</span></div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">警告项</div>
|
||||
<div class="metric-value"><strong>{severity_counts["warning"]}</strong><span>warning</span></div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">提示项</div>
|
||||
<div class="metric-value"><strong>{severity_counts["info"]}</strong><span>info</span></div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">问题类别</div>
|
||||
<div class="metric-value"><strong>{category_count}</strong><span>标题 / 发文 / 格式 / 其他</span></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chips">
|
||||
<span class="chip error">错误 {severity_counts["error"]}</span>
|
||||
<span class="chip warning">警告 {severity_counts["warning"]}</span>
|
||||
<span class="chip info">提示 {severity_counts["info"]}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="content-grid">
|
||||
<aside class="card">
|
||||
<div class="card-head">
|
||||
<div class="card-title">侧边摘要</div>
|
||||
<div class="card-subtitle">工作台侧栏语义</div>
|
||||
</div>
|
||||
<div class="sidebar-body">
|
||||
<article class="summary-row">
|
||||
<div class="summary-row-label">命中最多规则</div>
|
||||
<div class="summary-row-value">{escape(top_rule_id)}</div>
|
||||
<div class="summary-row-desc">当前命中最多的规则共 {top_rule_count} 项,适合在正式版中作为摘要提示保留。</div>
|
||||
</article>
|
||||
<article class="summary-row">
|
||||
<div class="summary-row-label">集中行号</div>
|
||||
<div class="summary-row-value">{escape(line_range)}</div>
|
||||
<div class="summary-row-desc">问题主要集中在这一段行号范围,便于阅读者快速判断问题分布区域。</div>
|
||||
</article>
|
||||
<article class="summary-row">
|
||||
<div class="summary-row-label">实体状态</div>
|
||||
<div class="summary-row-value">{escape(entity_summary)}</div>
|
||||
<div class="summary-row-desc">按现有识别结果汇总实体抽取状态,用于辅助理解顶部结构类问题。</div>
|
||||
</article>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<article class="card">
|
||||
<div class="table-toolbar">
|
||||
<div class="toolbar-left">
|
||||
<div class="toolbar-title">问题明细</div>
|
||||
<div class="toolbar-desc">保留当前报告语义,只收敛版式、层级和配色。</div>
|
||||
</div>
|
||||
<div class="toolbar-filters">
|
||||
<span class="filter active">全部</span>
|
||||
<span class="filter">错误</span>
|
||||
<span class="filter">警告</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>编号</th>
|
||||
<th>规则</th>
|
||||
<th>严重度</th>
|
||||
<th>类别</th>
|
||||
<th>位置</th>
|
||||
<th>说明</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{''.join(rows) or '<tr><td colspan="6" class="empty">未发现问题</td></tr>'}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</article>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<table>
|
||||
<thead><tr>
|
||||
<th>编号</th><th>规则</th><th>严重度</th><th>类别</th><th>位置</th><th>说明</th>
|
||||
</tr></thead>
|
||||
<tbody>{''.join(rows) or '<tr><td colspan=6>未发现问题</td></tr>'}</tbody>
|
||||
</table>
|
||||
</body></html>"""
|
||||
return body
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
def _severity_counts(result: AuditResult) -> dict[str, int]:
|
||||
counts = Counter(finding.severity for finding in result.findings)
|
||||
return {
|
||||
"error": counts.get("error", 0),
|
||||
"warning": counts.get("warning", 0),
|
||||
"info": counts.get("info", 0),
|
||||
}
|
||||
|
||||
|
||||
def _top_rule(result: AuditResult) -> tuple[str, int]:
|
||||
counter = Counter(finding.rule_id for finding in result.findings if finding.rule_id)
|
||||
if not counter:
|
||||
return "无", 0
|
||||
rule_id, count = counter.most_common(1)[0]
|
||||
return rule_id, count
|
||||
|
||||
|
||||
def _line_range(result: AuditResult) -> str:
|
||||
indices = sorted(
|
||||
{
|
||||
int(finding.location.paragraph_index) + 1
|
||||
for finding in result.findings
|
||||
if finding.location.paragraph_index is not None
|
||||
}
|
||||
)
|
||||
if not indices:
|
||||
return "未定位"
|
||||
if len(indices) == 1:
|
||||
return f"第 {indices[0]} 行"
|
||||
return f"第 {indices[0]} 行 - 第 {indices[-1]} 行"
|
||||
|
||||
|
||||
def _entity_summary(result: AuditResult) -> str:
|
||||
expected = ["title", "doc_number", "recipient", "date"]
|
||||
missing = [key for key in expected if not result.entities.get(key)]
|
||||
if not missing:
|
||||
return "核心实体齐全"
|
||||
if len(missing) == len(expected):
|
||||
return "标题 / 发文"
|
||||
return "缺少 " + " / ".join(missing[:2])
|
||||
|
||||
|
||||
def _format_location(paragraph_index: int | None) -> str:
|
||||
if paragraph_index is None:
|
||||
return "未定位"
|
||||
return f"第 {int(paragraph_index) + 1} 行"
|
||||
|
||||
@@ -7,7 +7,7 @@ import json
|
||||
import mimetypes
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -60,6 +60,20 @@ class GovdocServiceImpl(IGovdocService):
|
||||
self.OssService = OssService or OssServiceImpl()
|
||||
self.Storage = StorageAdapter()
|
||||
|
||||
def _parse_date_filter(self, value: str | None, field_name: str) -> date | None:
|
||||
if value is None:
|
||||
return None
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
try:
|
||||
return date.fromisoformat(normalized)
|
||||
except ValueError as exc:
|
||||
raise LeauditException(
|
||||
StatusCodeEnum.HTTP_400_BAD_REQUEST,
|
||||
f"{field_name} 格式非法,应为 YYYY-MM-DD",
|
||||
) from exc
|
||||
|
||||
# ── 文档 ──────────────────────────────────────────────
|
||||
|
||||
async def UploadDocument(
|
||||
@@ -250,12 +264,14 @@ class GovdocServiceImpl(IGovdocService):
|
||||
if resultStatus:
|
||||
filters.append("COALESCE(gr.result_status, '') = :result_status")
|
||||
params["result_status"] = resultStatus.strip()
|
||||
if dateFrom:
|
||||
filters.append("d.created_at >= CAST(:date_from AS date)")
|
||||
params["date_from"] = dateFrom.strip()
|
||||
if dateTo:
|
||||
filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')")
|
||||
params["date_to"] = dateTo.strip()
|
||||
parsedDateFrom = self._parse_date_filter(dateFrom, "dateFrom")
|
||||
parsedDateTo = self._parse_date_filter(dateTo, "dateTo")
|
||||
if parsedDateFrom:
|
||||
filters.append("d.created_at::date >= :date_from")
|
||||
params["date_from"] = parsedDateFrom
|
||||
if parsedDateTo:
|
||||
filters.append("d.created_at::date <= :date_to")
|
||||
params["date_to"] = parsedDateTo
|
||||
|
||||
whereClause = " AND ".join(filters)
|
||||
|
||||
@@ -901,9 +917,10 @@ class GovdocServiceImpl(IGovdocService):
|
||||
artifact = await self._get_report_artifact(runId, "html_report")
|
||||
if not artifact:
|
||||
return {"runId": runId, "htmlUrl": ""}
|
||||
content = await self.OssService.DownloadBytes(str(artifact["oss_url"]))
|
||||
return {
|
||||
"runId": runId,
|
||||
"htmlUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])),
|
||||
"html": content.decode("utf-8"),
|
||||
}
|
||||
|
||||
async def GetReportDocx(self, runId: int) -> dict[str, Any]:
|
||||
|
||||
@@ -4,6 +4,7 @@ import json
|
||||
import uuid
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import text
|
||||
|
||||
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
|
||||
@@ -25,6 +26,7 @@ from fastapi_modules.fastapi_leaudit.domian.vo.ragChatVo import (
|
||||
RagMessagePageVO,
|
||||
RagOperationResultVO,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.rag_engine.config import RAG_CONFIG
|
||||
from fastapi_modules.fastapi_leaudit.rag_engine.generator import generate_stream
|
||||
from fastapi_modules.fastapi_leaudit.rag_engine.question_chains import generate_followups
|
||||
from fastapi_modules.fastapi_leaudit.services.ragChatService import IRagChatService
|
||||
@@ -194,7 +196,7 @@ class RagChatServiceImpl(IRagChatService):
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT message_id, role, content, sources, feedback, created_at
|
||||
SELECT message_id, role, content, sources, metadata, feedback, created_at
|
||||
FROM rag_message
|
||||
WHERE conversation_id = :conversation_id
|
||||
ORDER BY created_at ASC
|
||||
@@ -216,6 +218,11 @@ class RagChatServiceImpl(IRagChatService):
|
||||
row = items[idx]
|
||||
if row["role"] == "user":
|
||||
answer = items[idx + 1] if idx + 1 < len(items) and items[idx + 1]["role"] == "assistant" else None
|
||||
answer_sources = self._parse_json_field(answer.get("sources")) if answer else []
|
||||
answer_metadata = self._parse_json_field(answer.get("metadata")) if answer else {}
|
||||
suggested_questions = answer_metadata.get("suggested_questions") if isinstance(answer_metadata, dict) else []
|
||||
if not isinstance(suggested_questions, list):
|
||||
suggested_questions = []
|
||||
data.append(
|
||||
RagMessageItemVO(
|
||||
id=(answer["message_id"] if answer else row["message_id"]),
|
||||
@@ -223,7 +230,8 @@ class RagChatServiceImpl(IRagChatService):
|
||||
query=row["content"],
|
||||
answer=answer["content"] if answer else "",
|
||||
feedback=({"rating": answer["feedback"]} if answer and answer.get("feedback") else None),
|
||||
retrieverResources=(answer.get("sources") if answer else None),
|
||||
retrieverResources=answer_sources or None,
|
||||
suggestedQuestions=[str(item) for item in suggested_questions],
|
||||
createdAt=int(row["created_at"].timestamp()) if row.get("created_at") else 0,
|
||||
)
|
||||
)
|
||||
@@ -392,6 +400,18 @@ class RagChatServiceImpl(IRagChatService):
|
||||
area = row.get("area") or ""
|
||||
return area in ("", "省级", user_area or "") or bool(row.get("dataset_public"))
|
||||
|
||||
def _parse_json_field(self, value):
|
||||
if value is None:
|
||||
return {}
|
||||
if isinstance(value, (dict, list)):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
return json.loads(value)
|
||||
except Exception:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
async def _ensure_conversation(self, user_id: int, conversation_id: str | None, app_id: int | None) -> str:
|
||||
if conversation_id and conversation_id != "-1":
|
||||
async with GetAsyncSession() as session:
|
||||
@@ -450,7 +470,7 @@ class RagChatServiceImpl(IRagChatService):
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT id, name, collection_name, retrieval_model
|
||||
SELECT id, name, collection_name, retrieval_model, embedding_model
|
||||
FROM rag_dataset
|
||||
WHERE id = :dataset_id AND deleted_at IS NULL
|
||||
LIMIT 1
|
||||
@@ -475,7 +495,12 @@ class RagChatServiceImpl(IRagChatService):
|
||||
return [], dataset.get("name") or ""
|
||||
try:
|
||||
collection = get_chroma().get_or_create_collection(dataset["collection_name"])
|
||||
result = collection.query(query_texts=[query], n_results=max(top_k, 1))
|
||||
query_embedding = await self._embed_texts([query], dataset.get("embedding_model") or "")
|
||||
result = collection.query(
|
||||
query_embeddings=query_embedding,
|
||||
n_results=max(top_k, 1),
|
||||
include=["documents", "metadatas", "distances"],
|
||||
)
|
||||
docs = (result.get("documents") or [[]])[0]
|
||||
metas = (result.get("metadatas") or [[]])[0]
|
||||
distances = (result.get("distances") or [[]])[0]
|
||||
@@ -483,7 +508,8 @@ class RagChatServiceImpl(IRagChatService):
|
||||
for idx, doc in enumerate(docs):
|
||||
meta = metas[idx] if idx < len(metas) else {}
|
||||
dist = distances[idx] if idx < len(distances) else 0.0
|
||||
score = 1 - float(dist or 0.0)
|
||||
distance = max(0.0, float(dist or 0.0))
|
||||
score = 1.0 / (1.0 + distance)
|
||||
if score_threshold is not None and score < score_threshold:
|
||||
continue
|
||||
chunks.append(
|
||||
@@ -501,6 +527,46 @@ class RagChatServiceImpl(IRagChatService):
|
||||
except Exception:
|
||||
return [], dataset.get("name") or ""
|
||||
|
||||
async def _embed_texts(self, texts: list[str], model_name: str) -> list[list[float]]:
|
||||
embed_url = (RAG_CONFIG.get("EMBED_URL") or "").strip() or f"{RAG_CONFIG['LLM_BASE_URL'].rstrip('/')}/embeddings"
|
||||
embed_key = (RAG_CONFIG.get("EMBED_KEY") or "").strip() or RAG_CONFIG["LLM_API_KEY"]
|
||||
embed_model = model_name or (RAG_CONFIG.get("EMBED_MODEL") or "").strip() or "text-embedding-v4"
|
||||
batch_size = max(1, int(RAG_CONFIG.get("EMBED_BATCH_SIZE") or 10))
|
||||
if not embed_url or not embed_key:
|
||||
raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "未配置可用的向量化服务")
|
||||
|
||||
embeddings: list[list[float]] = []
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
for start in range(0, len(texts), batch_size):
|
||||
batch_texts = texts[start:start + batch_size]
|
||||
try:
|
||||
response = await client.post(
|
||||
embed_url,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {embed_key}",
|
||||
},
|
||||
json={"model": embed_model, "input": batch_texts},
|
||||
)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
error_message = exc.response.text.strip() or f"{exc.response.status_code} {exc.response.reason_phrase}"
|
||||
raise LeauditException(
|
||||
StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
f"向量化服务调用失败: {error_message[:300]}",
|
||||
) from exc
|
||||
|
||||
payload = response.json()
|
||||
rows = payload.get("data") or []
|
||||
batch_embeddings = [row.get("embedding") for row in rows if isinstance(row, dict) and row.get("embedding")]
|
||||
if len(batch_embeddings) != len(batch_texts):
|
||||
raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常")
|
||||
embeddings.extend(batch_embeddings)
|
||||
|
||||
if len(embeddings) != len(texts):
|
||||
raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常")
|
||||
return embeddings
|
||||
|
||||
def _build_sources(self, context_chunks: list[dict], dataset_name: str) -> list[dict]:
|
||||
return [
|
||||
{
|
||||
|
||||
@@ -1186,7 +1186,7 @@ class RagDatasetServiceImpl(IRagDatasetService):
|
||||
content = documents[index] if index < len(documents) else ""
|
||||
metadata = metadatas[index] if index < len(metadatas) and isinstance(metadatas[index], dict) else {}
|
||||
distance = float(distances[index]) if index < len(distances) and distances[index] is not None else 1.0
|
||||
score = max(0.0, min(1.0, 1.0 - distance))
|
||||
score = max(0.0, min(1.0, 1.0 / (1.0 + max(0.0, distance))))
|
||||
if score_threshold_enabled and score_threshold is not None and score < score_threshold:
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user