feat: integrate govdoc platform updates

2026-05-18 14:35:25 +08:00
parent a73826dc1d
commit 1bacfe41b7
10 changed files with 2151 additions and 92 deletions
@@ -34,6 +34,7 @@ class RagMessageItemVO(BaseModel):
    answer: str = Field(...)
    feedback: dict | None = Field(None)
    retrieverResources: list[dict] | None = Field(None)
+    suggestedQuestions: list[str] = Field(default_factory=list)
    createdAt: int = Field(0)


@@ -1,76 +1,594 @@
 """把 AuditResult 渲染成单文件 HTML 报告。"""

 from __future__ import annotations
+
+from collections import Counter
 from html import escape
+
 from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult


 _CSS = """
-body { font-family: -apple-system, "PingFang SC", sans-serif; margin: 0; padding: 24px;
-       background: #f7f7f9; color: #1a1a1a; }
-.header { display: flex; align-items: center; gap: 16px; margin-bottom: 24px; }
-.score { width: 96px; height: 96px; border-radius: 50%;
-         background: conic-gradient(#22c55e var(--p), #e5e7eb var(--p));
-         display: grid; place-items: center; font-weight: 700; font-size: 22px; color: #111; }
-.score-inner { background: white; width: 76px; height: 76px; border-radius: 50%;
-               display: grid; place-items: center; }
-.tag { padding: 2px 8px; border-radius: 999px; font-size: 12px; }
-.error { background: #fee2e2; color: #b91c1c; }
-.warning { background: #fef9c3; color: #a16207; }
-.info { background: #dbeafe; color: #1d4ed8; }
-table { width: 100%; border-collapse: collapse; background: white; border-radius: 8px;
-        overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); }
-th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid #f1f5f9; vertical-align: top; }
-th { background: #f8fafc; font-size: 13px; }
-td.msg { max-width: 480px; }
-.context { color: #64748b; font-size: 12px; margin-top: 4px; }
+* { box-sizing: border-box; }
+html, body { margin: 0; padding: 0; }
+body {
+  font-family: -apple-system, "PingFang SC", "Microsoft YaHei", sans-serif;
+  background: #f3f6f5;
+  color: #0f172a;
+}
+a { color: inherit; }
+.page {
+  width: 100%;
+  padding: 20px 24px 32px;
+}
+.stack {
+  display: flex;
+  flex-direction: column;
+  gap: 20px;
+}
+.card {
+  background: #ffffff;
+  border: 1px solid #e2e8f0;
+  border-radius: 12px;
+  box-shadow: 0 1px 3px rgba(15, 23, 42, 0.08);
+  overflow: hidden;
+}
+.card-head {
+  height: 48px;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  padding: 0 20px;
+  border-bottom: 1px solid #e2e8f0;
+  background: #fcfdfd;
+}
+.card-title {
+  font-size: 14px;
+  font-weight: 600;
+  color: #1e293b;
+}
+.card-subtitle {
+  font-size: 12px;
+  color: #64748b;
+}
+.summary-grid {
+  display: grid;
+  grid-template-columns: 220px minmax(0, 1fr);
+  gap: 20px;
+  padding: 20px;
+}
+.score-box {
+  border: 1px solid #cfe4dc;
+  background: #f7fbf9;
+  border-radius: 10px;
+  padding: 20px;
+}
+.score-label {
+  font-size: 12px;
+  font-weight: 500;
+  color: #475569;
+}
+.score-value {
+  margin-top: 12px;
+  font-size: 42px;
+  line-height: 1;
+  font-weight: 600;
+  letter-spacing: -0.05em;
+  color: #0f172a;
+}
+.score-track {
+  margin-top: 16px;
+  height: 8px;
+  background: #dbe8e3;
+  border-radius: 999px;
+  overflow: hidden;
+}
+.score-fill {
+  height: 100%;
+  background: #00684a;
+}
+.score-note {
+  margin-top: 16px;
+  font-size: 12px;
+  line-height: 1.75;
+  color: #475569;
+}
+.summary-main {
+  min-width: 0;
+}
+.eyebrow {
+  display: inline-flex;
+  align-items: center;
+  height: 28px;
+  padding: 0 12px;
+  border: 1px solid #cfe4dc;
+  border-radius: 6px;
+  background: #e8f3ef;
+  color: #00684a;
+  font-size: 12px;
+  font-weight: 500;
+}
+.report-title {
+  margin: 12px 0 0;
+  font-size: 32px;
+  line-height: 1.25;
+  letter-spacing: -0.03em;
+  font-weight: 600;
+  color: #0f172a;
+}
+.report-meta {
+  margin-top: 8px;
+  font-size: 15px;
+  color: #475569;
+}
+.metrics {
+  margin-top: 20px;
+  display: grid;
+  grid-template-columns: repeat(4, minmax(0, 1fr));
+  gap: 16px;
+}
+.metric {
+  border: 1px solid #e2e8f0;
+  border-radius: 10px;
+  background: #fcfdfd;
+  padding: 16px 20px;
+}
+.metric-label {
+  font-size: 13px;
+  font-weight: 500;
+  color: #64748b;
+}
+.metric-value {
+  margin-top: 12px;
+  display: flex;
+  align-items: baseline;
+  gap: 8px;
+}
+.metric-value strong {
+  font-size: 30px;
+  line-height: 1;
+  letter-spacing: -0.04em;
+  font-weight: 600;
+  color: #0f172a;
+}
+.metric-value span {
+  font-size: 13px;
+  color: #64748b;
+}
+.chips {
+  margin-top: 20px;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 12px;
+}
+.chip,
+.severity-tag {
+  display: inline-flex;
+  align-items: center;
+  border: 1px solid transparent;
+  border-radius: 6px;
+  font-weight: 600;
+}
+.chip {
+  height: 32px;
+  padding: 0 12px;
+  font-size: 12px;
+}
+.severity-tag {
+  height: 32px;
+  padding: 0 12px;
+  font-size: 12px;
+  text-transform: uppercase;
+}
+.error {
+  border-color: #fecaca;
+  background: #fef2f2;
+  color: #b91c1c;
+}
+.warning {
+  border-color: #fde68a;
+  background: #fffbeb;
+  color: #b45309;
+}
+.info {
+  border-color: #bfdbfe;
+  background: #eff6ff;
+  color: #1d4ed8;
+}
+.content-grid {
+  display: grid;
+  grid-template-columns: 340px minmax(0, 1fr);
+  gap: 20px;
+}
+.sidebar-body {
+  padding: 16px;
+  display: flex;
+  flex-direction: column;
+  gap: 16px;
+}
+.summary-row {
+  border: 1px solid #e2e8f0;
+  border-radius: 10px;
+  background: #fcfdfd;
+  padding: 16px;
+}
+.summary-row-label {
+  font-size: 12px;
+  font-weight: 500;
+  color: #64748b;
+}
+.summary-row-value {
+  margin-top: 8px;
+  font-size: 22px;
+  line-height: 1;
+  letter-spacing: -0.03em;
+  font-weight: 600;
+  color: #0f172a;
+}
+.summary-row-desc {
+  margin-top: 12px;
+  font-size: 13px;
+  line-height: 1.75;
+  color: #475569;
+}
+.table-toolbar {
+  height: 48px;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  padding: 0 20px;
+  border-bottom: 1px solid #e2e8f0;
+  background: #fcfdfd;
+}
+.toolbar-left {
+  min-width: 0;
+}
+.toolbar-title {
+  font-size: 14px;
+  font-weight: 600;
+  color: #1e293b;
+}
+.toolbar-desc {
+  margin-top: 2px;
+  font-size: 12px;
+  color: #64748b;
+}
+.toolbar-filters {
+  display: flex;
+  gap: 8px;
+}
+.filter {
+  display: inline-flex;
+  align-items: center;
+  height: 32px;
+  padding: 0 12px;
+  border: 1px solid #e2e8f0;
+  border-radius: 6px;
+  background: #ffffff;
+  color: #64748b;
+  font-size: 12px;
+  font-weight: 500;
+}
+.filter.active {
+  border-color: rgba(0, 104, 74, 0.2);
+  background: #e8f3ef;
+  color: #00684a;
+}
+.table-wrap {
+  overflow-x: auto;
+}
+table {
+  width: 100%;
+  min-width: 1320px;
+  border-collapse: collapse;
+}
+thead tr {
+  background: #f8fafc;
+  color: #475569;
+  font-size: 13px;
+  font-weight: 500;
+}
+th {
+  padding: 16px 20px;
+  text-align: left;
+  border-bottom: 1px solid #e2e8f0;
+  white-space: nowrap;
+}
+td {
+  padding: 20px;
+  vertical-align: top;
+  border-bottom: 1px solid #f1f5f9;
+}
+tbody tr:hover {
+  background: #f8fafc;
+}
+.mono {
+  font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+}
+.id-cell {
+  font-size: 13px;
+  color: #64748b;
+}
+.rule-id {
+  font-size: 15px;
+  font-weight: 600;
+  color: #1e293b;
+}
+.rule-name {
+  margin-top: 4px;
+  font-size: 13px;
+  color: #64748b;
+}
+.category-cell {
+  font-size: 14px;
+  color: #334155;
+}
+.location-cell {
+  font-size: 13px;
+  color: #334155;
+}
+.message-cell {
+  min-width: 560px;
+}
+.message-main {
+  font-size: 15px;
+  line-height: 1.8;
+  color: #0f172a;
+}
+.context-box,
+.suggestion-box {
+  margin-top: 12px;
+  border-radius: 6px;
+  padding: 12px 16px;
+  font-size: 13px;
+  line-height: 1.8;
+}
+.context-box {
+  border: 1px solid #e2e8f0;
+  background: #f8fafc;
+  color: #475569;
+}
+.suggestion-box {
+  border: 1px solid #cfe4dc;
+  background: #f4faf7;
+  color: #0d6b4d;
+}
+.empty {
+  padding: 24px 20px;
+  text-align: center;
+  color: #64748b;
+  font-size: 14px;
+}
+@media (max-width: 1200px) {
+  .summary-grid,
+  .content-grid {
+    grid-template-columns: 1fr;
+  }
+  .metrics {
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+  }
+}
+@media (max-width: 720px) {
+  .page {
+    padding: 16px;
+  }
+  .metrics {
+    grid-template-columns: 1fr;
+  }
+  .table-toolbar,
+  .card-head {
+    height: auto;
+    min-height: 48px;
+    padding-top: 12px;
+    padding-bottom: 12px;
+    align-items: flex-start;
+    flex-direction: column;
+  }
+}
 """


 def render_html(result: AuditResult) -> str:
-    s = result.summary
-    score = s.score
-    pct = f"{score}%"
-    rows = []
-    for f in result.findings:
-        loc = f.location
-        suggest = (
-            f'<div style="color:#0369a1">建议: {escape(f.suggestion)}</div>'
-            if f.suggestion else ""
-        )
-        rows.append(f"""
-<tr>
-  <td>{escape(f.finding_id)}</td>
-  <td>{escape(f.rule_id)}<br><span style="color:#64748b;font-size:12px">{escape(f.rule_name)}</span></td>
-  <td><span class="tag {f.severity}">{f.severity}</span></td>
-  <td>{escape(f.category)}</td>
-  <td>P{loc.paragraph_index} ({escape(loc.role or '')})</td>
-  <td class="msg">{escape(f.message)}
-    <div class="context">原文: {escape((loc.context or '')[:80])}</div>
-    {suggest}
-  </td>
-</tr>""")
+    summary = result.summary
+    score = int(summary.score or 0)
+    score_pct = max(0, min(score, 100))
+    severity_counts = _severity_counts(result)
+    category_count = len([key for key, value in (summary.by_category or {}).items() if key and value])
+    filename = escape(str(result.document.get("filename", "")))
+    top_rule_id, top_rule_count = _top_rule(result)
+    line_range = _line_range(result)
+    entity_summary = _entity_summary(result)

-    body = f"""<!doctype html>
-<html lang="zh"><head><meta charset="utf-8"><title>公文审核报告</title>
-<style>{_CSS}</style></head><body>
-<div class="header">
-  <div class="score" style="--p:{pct}"><div class="score-inner">{score}</div></div>
-  <div>
-    <h1 style="margin:0">公文格式审核报告</h1>
-    <div style="color:#64748b">{escape(result.document.get('filename', ''))} · 共 {s.total_findings} 项</div>
-    <div style="margin-top:6px">
-      <span class="tag error">错误 {s.by_severity.get('error', 0)}</span>
-      <span class="tag warning">警告 {s.by_severity.get('warning', 0)}</span>
-      <span class="tag info">提示 {s.by_severity.get('info', 0)}</span>
+    rows = []
+    for finding in result.findings:
+        location_label = _format_location(finding.location.paragraph_index)
+        context = escape((finding.location.context or "").strip())
+        message = escape(finding.message)
+        suggestion = escape(finding.suggestion) if finding.suggestion else "按规则要求修正对应内容。"
+
+        rows.append(
+            f"""
+<tr>
+  <td class="mono id-cell">{escape(finding.finding_id)}</td>
+  <td>
+    <div class="rule-id">{escape(finding.rule_id)}</div>
+    <div class="rule-name">{escape(finding.rule_name)}</div>
+  </td>
+  <td><span class="severity-tag {escape(finding.severity)}">{escape(finding.severity)}</span></td>
+  <td class="category-cell">{escape(finding.category)}</td>
+  <td class="mono location-cell">{location_label}</td>
+  <td class="message-cell">
+    <div class="message-main">{message}</div>
+    <div class="context-box">原文：{context or "未提取到上下文"}</div>
+    <div class="suggestion-box">建议：{suggestion}</div>
+  </td>
+</tr>"""
+        )
+
+    return f"""<!doctype html>
+<html lang="zh">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>公文审核报告</title>
+  <style>{_CSS}</style>
+</head>
+<body>
+  <div class="page">
+    <div class="stack">
+      <section class="card">
+        <div class="card-head">
+          <div class="card-title">报告摘要</div>
+          <div class="card-subtitle">不改报告语义，仅收敛样式、配色与信息层级</div>
+        </div>
+        <div class="summary-grid">
+          <div class="score-box">
+            <div class="score-label">综合得分</div>
+            <div class="score-value">{score}</div>
+            <div class="score-track"><div class="score-fill" style="width:{score_pct}%"></div></div>
+            <div class="score-note">这份正式 HTML 报告沿用平台工作台的版式语言，突出摘要、明细和建议三层信息。</div>
+          </div>
+
+          <div class="summary-main">
+            <div class="eyebrow">正式报告样式方向</div>
+            <h1 class="report-title">公文格式审核报告</h1>
+            <div class="report-meta">{filename} · 共 {summary.total_findings} 项问题 · 用作正式 HTML 报告输出</div>
+
+            <div class="metrics">
+              <div class="metric">
+                <div class="metric-label">错误项</div>
+                <div class="metric-value"><strong>{severity_counts["error"]}</strong><span>error</span></div>
+              </div>
+              <div class="metric">
+                <div class="metric-label">警告项</div>
+                <div class="metric-value"><strong>{severity_counts["warning"]}</strong><span>warning</span></div>
+              </div>
+              <div class="metric">
+                <div class="metric-label">提示项</div>
+                <div class="metric-value"><strong>{severity_counts["info"]}</strong><span>info</span></div>
+              </div>
+              <div class="metric">
+                <div class="metric-label">问题类别</div>
+                <div class="metric-value"><strong>{category_count}</strong><span>标题 / 发文 / 格式 / 其他</span></div>
+              </div>
+            </div>
+
+            <div class="chips">
+              <span class="chip error">错误 {severity_counts["error"]}</span>
+              <span class="chip warning">警告 {severity_counts["warning"]}</span>
+              <span class="chip info">提示 {severity_counts["info"]}</span>
+            </div>
+          </div>
+        </div>
+      </section>
+
+      <section class="content-grid">
+        <aside class="card">
+          <div class="card-head">
+            <div class="card-title">侧边摘要</div>
+            <div class="card-subtitle">工作台侧栏语义</div>
+          </div>
+          <div class="sidebar-body">
+            <article class="summary-row">
+              <div class="summary-row-label">命中最多规则</div>
+              <div class="summary-row-value">{escape(top_rule_id)}</div>
+              <div class="summary-row-desc">当前命中最多的规则共 {top_rule_count} 项，适合在正式版中作为摘要提示保留。</div>
+            </article>
+            <article class="summary-row">
+              <div class="summary-row-label">集中行号</div>
+              <div class="summary-row-value">{escape(line_range)}</div>
+              <div class="summary-row-desc">问题主要集中在这一段行号范围，便于阅读者快速判断问题分布区域。</div>
+            </article>
+            <article class="summary-row">
+              <div class="summary-row-label">实体状态</div>
+              <div class="summary-row-value">{escape(entity_summary)}</div>
+              <div class="summary-row-desc">按现有识别结果汇总实体抽取状态，用于辅助理解顶部结构类问题。</div>
+            </article>
+          </div>
+        </aside>
+
+        <article class="card">
+          <div class="table-toolbar">
+            <div class="toolbar-left">
+              <div class="toolbar-title">问题明细</div>
+              <div class="toolbar-desc">保留当前报告语义，只收敛版式、层级和配色。</div>
+            </div>
+            <div class="toolbar-filters">
+              <span class="filter active">全部</span>
+              <span class="filter">错误</span>
+              <span class="filter">警告</span>
+            </div>
+          </div>
+          <div class="table-wrap">
+            <table>
+              <thead>
+                <tr>
+                  <th>编号</th>
+                  <th>规则</th>
+                  <th>严重度</th>
+                  <th>类别</th>
+                  <th>位置</th>
+                  <th>说明</th>
+                </tr>
+              </thead>
+              <tbody>
+                {''.join(rows) or '<tr><td colspan="6" class="empty">未发现问题</td></tr>'}
+              </tbody>
+            </table>
+          </div>
+        </article>
+      </section>
    </div>
  </div>
-</div>
-<table>
-  <thead><tr>
-    <th>编号</th><th>规则</th><th>严重度</th><th>类别</th><th>位置</th><th>说明</th>
-  </tr></thead>
-  <tbody>{''.join(rows) or '<tr><td colspan=6>未发现问题</td></tr>'}</tbody>
-</table>
-</body></html>"""
-    return body
+</body>
+</html>"""
+
+
+def _severity_counts(result: AuditResult) -> dict[str, int]:
+    counts = Counter(finding.severity for finding in result.findings)
+    return {
+      "error": counts.get("error", 0),
+      "warning": counts.get("warning", 0),
+      "info": counts.get("info", 0),
+    }
+
+
+def _top_rule(result: AuditResult) -> tuple[str, int]:
+    counter = Counter(finding.rule_id for finding in result.findings if finding.rule_id)
+    if not counter:
+        return "无", 0
+    rule_id, count = counter.most_common(1)[0]
+    return rule_id, count
+
+
+def _line_range(result: AuditResult) -> str:
+    indices = sorted(
+        {
+            int(finding.location.paragraph_index) + 1
+            for finding in result.findings
+            if finding.location.paragraph_index is not None
+        }
+    )
+    if not indices:
+        return "未定位"
+    if len(indices) == 1:
+        return f"第 {indices[0]} 行"
+    return f"第 {indices[0]} 行 - 第 {indices[-1]} 行"
+
+
+def _entity_summary(result: AuditResult) -> str:
+    expected = ["title", "doc_number", "recipient", "date"]
+    missing = [key for key in expected if not result.entities.get(key)]
+    if not missing:
+        return "核心实体齐全"
+    if len(missing) == len(expected):
+        return "标题 / 发文"
+    return "缺少 " + " / ".join(missing[:2])
+
+
+def _format_location(paragraph_index: int | None) -> str:
+    if paragraph_index is None:
+        return "未定位"
+    return f"第 {int(paragraph_index) + 1} 行"
@@ -7,7 +7,7 @@ import json
 import mimetypes
 import time
 from dataclasses import dataclass
-from datetime import datetime
+from datetime import date, datetime
 from pathlib import Path
 from typing import Any

@@ -60,6 +60,20 @@ class GovdocServiceImpl(IGovdocService):
        self.OssService = OssService or OssServiceImpl()
        self.Storage = StorageAdapter()

+    def _parse_date_filter(self, value: str | None, field_name: str) -> date | None:
+        if value is None:
+            return None
+        normalized = value.strip()
+        if not normalized:
+            return None
+        try:
+            return date.fromisoformat(normalized)
+        except ValueError as exc:
+            raise LeauditException(
+                StatusCodeEnum.HTTP_400_BAD_REQUEST,
+                f"{field_name} 格式非法，应为 YYYY-MM-DD",
+            ) from exc
+
    # ── 文档 ──────────────────────────────────────────────

    async def UploadDocument(
@@ -250,12 +264,14 @@ class GovdocServiceImpl(IGovdocService):
        if resultStatus:
            filters.append("COALESCE(gr.result_status, '') = :result_status")
            params["result_status"] = resultStatus.strip()
-        if dateFrom:
-            filters.append("d.created_at >= CAST(:date_from AS date)")
-            params["date_from"] = dateFrom.strip()
-        if dateTo:
-            filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')")
-            params["date_to"] = dateTo.strip()
+        parsedDateFrom = self._parse_date_filter(dateFrom, "dateFrom")
+        parsedDateTo = self._parse_date_filter(dateTo, "dateTo")
+        if parsedDateFrom:
+            filters.append("d.created_at::date >= :date_from")
+            params["date_from"] = parsedDateFrom
+        if parsedDateTo:
+            filters.append("d.created_at::date <= :date_to")
+            params["date_to"] = parsedDateTo

        whereClause = " AND ".join(filters)

@@ -901,9 +917,10 @@ class GovdocServiceImpl(IGovdocService):
        artifact = await self._get_report_artifact(runId, "html_report")
        if not artifact:
            return {"runId": runId, "htmlUrl": ""}
+        content = await self.OssService.DownloadBytes(str(artifact["oss_url"]))
        return {
            "runId": runId,
-            "htmlUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])),
+            "html": content.decode("utf-8"),
        }

    async def GetReportDocx(self, runId: int) -> dict[str, Any]:
@@ -4,6 +4,7 @@ import json
 import uuid
 from typing import AsyncGenerator

+import httpx
 from sqlalchemy import text

 from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
@@ -25,6 +26,7 @@ from fastapi_modules.fastapi_leaudit.domian.vo.ragChatVo import (
    RagMessagePageVO,
    RagOperationResultVO,
 )
+from fastapi_modules.fastapi_leaudit.rag_engine.config import RAG_CONFIG
 from fastapi_modules.fastapi_leaudit.rag_engine.generator import generate_stream
 from fastapi_modules.fastapi_leaudit.rag_engine.question_chains import generate_followups
 from fastapi_modules.fastapi_leaudit.services.ragChatService import IRagChatService
@@ -194,7 +196,7 @@ class RagChatServiceImpl(IRagChatService):
                await session.execute(
                    text(
                        """
-                        SELECT message_id, role, content, sources, feedback, created_at
+                        SELECT message_id, role, content, sources, metadata, feedback, created_at
                        FROM rag_message
                        WHERE conversation_id = :conversation_id
                        ORDER BY created_at ASC
@@ -216,6 +218,11 @@ class RagChatServiceImpl(IRagChatService):
            row = items[idx]
            if row["role"] == "user":
                answer = items[idx + 1] if idx + 1 < len(items) and items[idx + 1]["role"] == "assistant" else None
+                answer_sources = self._parse_json_field(answer.get("sources")) if answer else []
+                answer_metadata = self._parse_json_field(answer.get("metadata")) if answer else {}
+                suggested_questions = answer_metadata.get("suggested_questions") if isinstance(answer_metadata, dict) else []
+                if not isinstance(suggested_questions, list):
+                    suggested_questions = []
                data.append(
                    RagMessageItemVO(
                        id=(answer["message_id"] if answer else row["message_id"]),
@@ -223,7 +230,8 @@ class RagChatServiceImpl(IRagChatService):
                        query=row["content"],
                        answer=answer["content"] if answer else "",
                        feedback=({"rating": answer["feedback"]} if answer and answer.get("feedback") else None),
-                        retrieverResources=(answer.get("sources") if answer else None),
+                        retrieverResources=answer_sources or None,
+                        suggestedQuestions=[str(item) for item in suggested_questions],
                        createdAt=int(row["created_at"].timestamp()) if row.get("created_at") else 0,
                    )
                )
@@ -392,6 +400,18 @@ class RagChatServiceImpl(IRagChatService):
        area = row.get("area") or ""
        return area in ("", "省级", user_area or "") or bool(row.get("dataset_public"))

+    def _parse_json_field(self, value):
+        if value is None:
+            return {}
+        if isinstance(value, (dict, list)):
+            return value
+        if isinstance(value, str):
+            try:
+                return json.loads(value)
+            except Exception:
+                return {}
+        return {}
+
    async def _ensure_conversation(self, user_id: int, conversation_id: str | None, app_id: int | None) -> str:
        if conversation_id and conversation_id != "-1":
            async with GetAsyncSession() as session:
@@ -450,7 +470,7 @@ class RagChatServiceImpl(IRagChatService):
                await session.execute(
                    text(
                        """
-                        SELECT id, name, collection_name, retrieval_model
+                        SELECT id, name, collection_name, retrieval_model, embedding_model
                        FROM rag_dataset
                        WHERE id = :dataset_id AND deleted_at IS NULL
                        LIMIT 1
@@ -475,7 +495,12 @@ class RagChatServiceImpl(IRagChatService):
            return [], dataset.get("name") or ""
        try:
            collection = get_chroma().get_or_create_collection(dataset["collection_name"])
-            result = collection.query(query_texts=[query], n_results=max(top_k, 1))
+            query_embedding = await self._embed_texts([query], dataset.get("embedding_model") or "")
+            result = collection.query(
+                query_embeddings=query_embedding,
+                n_results=max(top_k, 1),
+                include=["documents", "metadatas", "distances"],
+            )
            docs = (result.get("documents") or [[]])[0]
            metas = (result.get("metadatas") or [[]])[0]
            distances = (result.get("distances") or [[]])[0]
@@ -483,7 +508,8 @@ class RagChatServiceImpl(IRagChatService):
            for idx, doc in enumerate(docs):
                meta = metas[idx] if idx < len(metas) else {}
                dist = distances[idx] if idx < len(distances) else 0.0
-                score = 1 - float(dist or 0.0)
+                distance = max(0.0, float(dist or 0.0))
+                score = 1.0 / (1.0 + distance)
                if score_threshold is not None and score < score_threshold:
                    continue
                chunks.append(
@@ -501,6 +527,46 @@ class RagChatServiceImpl(IRagChatService):
        except Exception:
            return [], dataset.get("name") or ""

+    async def _embed_texts(self, texts: list[str], model_name: str) -> list[list[float]]:
+        embed_url = (RAG_CONFIG.get("EMBED_URL") or "").strip() or f"{RAG_CONFIG['LLM_BASE_URL'].rstrip('/')}/embeddings"
+        embed_key = (RAG_CONFIG.get("EMBED_KEY") or "").strip() or RAG_CONFIG["LLM_API_KEY"]
+        embed_model = model_name or (RAG_CONFIG.get("EMBED_MODEL") or "").strip() or "text-embedding-v4"
+        batch_size = max(1, int(RAG_CONFIG.get("EMBED_BATCH_SIZE") or 10))
+        if not embed_url or not embed_key:
+            raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "未配置可用的向量化服务")
+
+        embeddings: list[list[float]] = []
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            for start in range(0, len(texts), batch_size):
+                batch_texts = texts[start:start + batch_size]
+                try:
+                    response = await client.post(
+                        embed_url,
+                        headers={
+                            "Content-Type": "application/json",
+                            "Authorization": f"Bearer {embed_key}",
+                        },
+                        json={"model": embed_model, "input": batch_texts},
+                    )
+                    response.raise_for_status()
+                except httpx.HTTPStatusError as exc:
+                    error_message = exc.response.text.strip() or f"{exc.response.status_code} {exc.response.reason_phrase}"
+                    raise LeauditException(
+                        StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR,
+                        f"向量化服务调用失败: {error_message[:300]}",
+                    ) from exc
+
+                payload = response.json()
+                rows = payload.get("data") or []
+                batch_embeddings = [row.get("embedding") for row in rows if isinstance(row, dict) and row.get("embedding")]
+                if len(batch_embeddings) != len(batch_texts):
+                    raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常")
+                embeddings.extend(batch_embeddings)
+
+        if len(embeddings) != len(texts):
+            raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "向量化结果数量异常")
+        return embeddings
+
    def _build_sources(self, context_chunks: list[dict], dataset_name: str) -> list[dict]:
        return [
            {
@@ -1186,7 +1186,7 @@ class RagDatasetServiceImpl(IRagDatasetService):
            content = documents[index] if index < len(documents) else ""
            metadata = metadatas[index] if index < len(metadatas) and isinstance(metadatas[index], dict) else {}
            distance = float(distances[index]) if index < len(distances) and distances[index] is not None else 1.0
-            score = max(0.0, min(1.0, 1.0 - distance))
+            score = max(0.0, min(1.0, 1.0 / (1.0 + max(0.0, distance))))
            if score_threshold_enabled and score_threshold is not None and score < score_threshold:
                continue