fix: harden rag streaming and queue status reporting
This commit is contained in:
+32
-3
@@ -1,6 +1,7 @@
|
|||||||
# RAG 聊天接口
|
# RAG 聊天接口
|
||||||
|
|
||||||
> 最后整理:2026-05-07
|
> 最后整理:2026-05-11
|
||||||
|
> 本文已按当前后端实现逐项核对
|
||||||
> 对应后端:`fastapi_modules/fastapi_leaudit/controllers/ragChatController.py`
|
> 对应后端:`fastapi_modules/fastapi_leaudit/controllers/ragChatController.py`
|
||||||
> 统一前缀:`/api/v3/rag`
|
> 统一前缀:`/api/v3/rag`
|
||||||
|
|
||||||
@@ -237,6 +238,27 @@ JWT payload 至少会被后端消费这些字段:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
补充说明:
|
||||||
|
|
||||||
|
- 如果 `appId` 不存在、不可见,或者系统默认应用 / 排序第一条应用均不可见,当前实现不会报 404,而是返回一个空参数对象:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 200,
|
||||||
|
"msg": "success",
|
||||||
|
"data": {
|
||||||
|
"openingStatement": "",
|
||||||
|
"suggestedQuestions": [],
|
||||||
|
"userInputForm": [],
|
||||||
|
"fileUpload": {
|
||||||
|
"image": {
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### 4.5 发起流式对话
|
### 4.5 发起流式对话
|
||||||
|
|
||||||
`POST /api/v3/rag/chat/messages`
|
`POST /api/v3/rag/chat/messages`
|
||||||
@@ -256,7 +278,7 @@ JWT payload 至少会被后端消费这些字段:
|
|||||||
| 字段 | 类型 | 必填 | 说明 |
|
| 字段 | 类型 | 必填 | 说明 |
|
||||||
|------|------|------|------|
|
|------|------|------|------|
|
||||||
| `query` | string | 是 | 用户问题,不能为空 |
|
| `query` | string | 是 | 用户问题,不能为空 |
|
||||||
| `conversationId` | string \| null | 否 | 会话 ID;新对话可传 `null` 或不传 |
|
| `conversationId` | string \| null | 否 | 会话 ID;新对话可传 `null`、不传,当前实现也把 `"-1"` 视为新对话 |
|
||||||
| `appId` | int \| null | 否 | 应用 ID;不传则自动回退默认应用 |
|
| `appId` | int \| null | 否 | 应用 ID;不传则自动回退默认应用 |
|
||||||
|
|
||||||
返回类型:
|
返回类型:
|
||||||
@@ -291,6 +313,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me
|
|||||||
- 流结束后会落一条 `role = assistant` 消息
|
- 流结束后会落一条 `role = assistant` 消息
|
||||||
- 若命中知识库,会把引用结果写入 `sources / metadata`
|
- 若命中知识库,会把引用结果写入 `sources / metadata`
|
||||||
- 会根据对话内容追加 `suggested_questions`
|
- 会根据对话内容追加 `suggested_questions`
|
||||||
|
- 当前应用解析顺序是:指定 `appId` -> 任意默认应用 -> 排序第一条应用;每一步都只检查当前命中的那一条记录是否可见,不会遍历全部可见应用
|
||||||
|
|
||||||
### 4.6 获取会话列表
|
### 4.6 获取会话列表
|
||||||
|
|
||||||
@@ -382,6 +405,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me
|
|||||||
说明:
|
说明:
|
||||||
|
|
||||||
- 返回结构是按“问答对”聚合后的结果,不是底层 `rag_message` 原始逐条结果。
|
- 返回结构是按“问答对”聚合后的结果,不是底层 `rag_message` 原始逐条结果。
|
||||||
|
- 分页查询实际先按底层 `rag_message` 行分页,再在服务层折叠成“用户问 + 助手答”结构,所以单页返回条数可能少于 `pageSize`。
|
||||||
|
|
||||||
### 4.8 重命名会话
|
### 4.8 重命名会话
|
||||||
|
|
||||||
@@ -440,12 +464,16 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
可选值:
|
约定值:
|
||||||
|
|
||||||
- `like`
|
- `like`
|
||||||
- `dislike`
|
- `dislike`
|
||||||
- `null`
|
- `null`
|
||||||
|
|
||||||
|
补充说明:
|
||||||
|
|
||||||
|
- 当前 DTO 仅声明 `rating: str | null`,后端不会强校验枚举,除 `null` 外其余字符串也会被写入 `rag_message.feedback`。
|
||||||
|
|
||||||
成功响应示例:
|
成功响应示例:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
@@ -474,6 +502,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me
|
|||||||
- `fileUpload.image.enabled` 固定为 `false`
|
- `fileUpload.image.enabled` 固定为 `false`
|
||||||
- 检索依赖 Chroma;Chroma 不可用时,接口仍可回答,但会退化成无知识库上下文
|
- 检索依赖 Chroma;Chroma 不可用时,接口仍可回答,但会退化成无知识库上下文
|
||||||
- 建议问题 `suggestedQuestions` 由二次模型调用生成,失败时会降级为空数组
|
- 建议问题 `suggestedQuestions` 由二次模型调用生成,失败时会降级为空数组
|
||||||
|
- `GET /chat/parameters` / `POST /chat/messages` 的应用回退逻辑目前不是“从全部可见应用里挑第一条”,而是“默认应用优先,其次全表第一条”;当默认应用或首条应用不可见时,可能出现空参数或 `未配置可用聊天应用`
|
||||||
|
|
||||||
## 7. 联调建议
|
## 7. 联调建议
|
||||||
|
|
||||||
|
|||||||
@@ -294,64 +294,62 @@ class DocumentController(BaseController):
|
|||||||
async def GetQueueStatus():
|
async def GetQueueStatus():
|
||||||
"""获取文档处理队列状态。"""
|
"""获取文档处理队列状态。"""
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from fastapi_admin.config import LEAUDIT_WORKER_CONCURRENCY
|
||||||
|
|
||||||
async with GetAsyncSession() as Session:
|
async with GetAsyncSession() as Session:
|
||||||
statusRows = (
|
runStats = (
|
||||||
await Session.execute(
|
await Session.execute(
|
||||||
text(
|
text(
|
||||||
"""
|
"""
|
||||||
SELECT processing_status, COUNT(*) AS cnt
|
SELECT
|
||||||
FROM leaudit_documents
|
COUNT(*) FILTER (WHERE r.status IN ('queued', 'pending', 'retrying'))::int AS pending_tasks,
|
||||||
WHERE deleted_at IS NULL AND is_latest_version = true
|
COUNT(*) FILTER (WHERE r.status = 'running')::int AS processing_tasks
|
||||||
GROUP BY processing_status
|
FROM leaudit_audit_runs r
|
||||||
|
JOIN leaudit_documents d
|
||||||
|
ON d.current_run_id = r.id
|
||||||
|
WHERE d.deleted_at IS NULL
|
||||||
|
AND d.is_latest_version = true
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
).mappings().all()
|
).mappings().first()
|
||||||
|
|
||||||
waiting = 0
|
pending = int((runStats or {}).get("pending_tasks") or 0)
|
||||||
processing = 0
|
processing = int((runStats or {}).get("processing_tasks") or 0)
|
||||||
for row in statusRows:
|
|
||||||
s = str(row["processing_status"] or "")
|
|
||||||
c = int(row["cnt"] or 0)
|
|
||||||
if s == "waiting":
|
|
||||||
waiting = c
|
|
||||||
elif s in ("processing", "running"):
|
|
||||||
processing += c
|
|
||||||
|
|
||||||
processingIdsRows: list[int] = []
|
processingIdsRows = (
|
||||||
if processing > 0:
|
await Session.execute(
|
||||||
async with GetAsyncSession() as Session:
|
text(
|
||||||
idRows = (
|
"""
|
||||||
await Session.execute(
|
SELECT d.id
|
||||||
text(
|
FROM leaudit_documents d
|
||||||
"""
|
JOIN leaudit_audit_runs r
|
||||||
SELECT id FROM leaudit_documents
|
ON r.id = d.current_run_id
|
||||||
WHERE deleted_at IS NULL
|
WHERE d.deleted_at IS NULL
|
||||||
AND is_latest_version = true
|
AND d.is_latest_version = true
|
||||||
AND processing_status IN ('processing', 'running')
|
AND r.status = 'running'
|
||||||
ORDER BY updated_at DESC
|
ORDER BY COALESCE(r.started_at, r.created_at) DESC, d.id DESC
|
||||||
LIMIT 50
|
LIMIT 50
|
||||||
"""
|
"""
|
||||||
)
|
|
||||||
)
|
)
|
||||||
).fetchall()
|
)
|
||||||
processingIdsRows = [int(r[0]) for r in idRows]
|
).fetchall()
|
||||||
|
processingIds = [int(r[0]) for r in processingIdsRows]
|
||||||
|
|
||||||
return Result.success(
|
return Result.success(
|
||||||
data=QueueStatusVO(
|
data=QueueStatusVO(
|
||||||
success=True,
|
success=True,
|
||||||
timestamp=datetime.now().isoformat(),
|
timestamp=datetime.now().isoformat(),
|
||||||
queue={
|
queue={
|
||||||
"pending_tasks": waiting,
|
"pending_tasks": pending,
|
||||||
"processing_tasks": processing,
|
"processing_tasks": processing,
|
||||||
"available_slots": max(0, 4 - processing),
|
"available_slots": max(0, LEAUDIT_WORKER_CONCURRENCY - processing),
|
||||||
"max_concurrent": 4,
|
"max_concurrent": LEAUDIT_WORKER_CONCURRENCY,
|
||||||
},
|
},
|
||||||
documents={
|
documents={
|
||||||
"waiting": waiting,
|
"waiting": pending,
|
||||||
"processing": processing,
|
"processing": processing,
|
||||||
"processing_ids": processingIdsRows,
|
"processing_ids": processingIds,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -77,7 +77,9 @@ async def generate_stream(
|
|||||||
if payload == "[DONE]":
|
if payload == "[DONE]":
|
||||||
break
|
break
|
||||||
chunk = json.loads(payload)
|
chunk = json.loads(payload)
|
||||||
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
choices = chunk.get("choices") or []
|
||||||
|
first_choice = choices[0] if choices and isinstance(choices[0], dict) else {}
|
||||||
|
delta = first_choice.get("delta", {})
|
||||||
text = delta.get("content", "")
|
text = delta.get("content", "")
|
||||||
if text:
|
if text:
|
||||||
yield _sse_line(
|
yield _sse_line(
|
||||||
@@ -141,4 +143,4 @@ async def generate_stream(
|
|||||||
|
|
||||||
|
|
||||||
def _sse_line(data: dict) -> str:
|
def _sse_line(data: dict) -> str:
|
||||||
return f"data: {json.dumps(data, ensure_ascii=False)}\\n\\n"
|
return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user