diff --git a/docs/RAG/RAG聊天接口.md b/docs/RAG/RAG聊天接口.md index b2a14a7..44a4313 100644 --- a/docs/RAG/RAG聊天接口.md +++ b/docs/RAG/RAG聊天接口.md @@ -1,6 +1,7 @@ # RAG 聊天接口 -> 最后整理:2026-05-07 +> 最后整理:2026-05-11 +> 本文已按当前后端实现逐项核对 > 对应后端:`fastapi_modules/fastapi_leaudit/controllers/ragChatController.py` > 统一前缀:`/api/v3/rag` @@ -237,6 +238,27 @@ JWT payload 至少会被后端消费这些字段: } ``` +补充说明: + +- 如果 `appId` 不存在、不可见,或者系统默认应用 / 排序第一条应用均不可见,当前实现不会报 404,而是返回一个空参数对象: + +```json +{ + "code": 200, + "msg": "success", + "data": { + "openingStatement": "", + "suggestedQuestions": [], + "userInputForm": [], + "fileUpload": { + "image": { + "enabled": false + } + } + } +} +``` + ### 4.5 发起流式对话 `POST /api/v3/rag/chat/messages` @@ -256,7 +278,7 @@ JWT payload 至少会被后端消费这些字段: | 字段 | 类型 | 必填 | 说明 | |------|------|------|------| | `query` | string | 是 | 用户问题,不能为空 | -| `conversationId` | string \| null | 否 | 会话 ID;新对话可传 `null` 或不传 | +| `conversationId` | string \| null | 否 | 会话 ID;新对话可传 `null`、不传,当前实现也把 `"-1"` 视为新对话 | | `appId` | int \| null | 否 | 应用 ID;不传则自动回退默认应用 | 返回类型: @@ -291,6 +313,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me - 流结束后会落一条 `role = assistant` 消息 - 若命中知识库,会把引用结果写入 `sources / metadata` - 会根据对话内容追加 `suggested_questions` +- 当前应用解析顺序是:指定 `appId` -> 任意默认应用 -> 排序第一条应用;每一步都只检查当前命中的那一条记录是否可见,不会遍历全部可见应用 ### 4.6 获取会话列表 @@ -382,6 +405,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me 说明: - 返回结构是按“问答对”聚合后的结果,不是底层 `rag_message` 原始逐条结果。 +- 分页查询实际先按底层 `rag_message` 行分页,再在服务层折叠成“用户问 + 助手答”结构,所以单页返回条数可能少于 `pageSize`。 ### 4.8 重命名会话 @@ -440,12 +464,16 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me } ``` -可选值: +约定值: - `like` - `dislike` - `null` +补充说明: + +- 当前 DTO 仅声明 `rating: str | null`,后端不会强校验枚举,除 `null` 外其余字符串也会被写入 `rag_message.feedback`。 + 成功响应示例: ```json @@ -474,6 +502,7 @@ data: {"event":"error","task_id":"...","message_id":"...","code":"llm_error","me - `fileUpload.image.enabled` 固定为 `false` - 检索依赖 Chroma;Chroma 不可用时,接口仍可回答,但会退化成无知识库上下文 - 建议问题 `suggestedQuestions` 由二次模型调用生成,失败时会降级为空数组 +- `GET /chat/parameters` / `POST /chat/messages` 的应用回退逻辑目前不是“从全部可见应用里挑第一条”,而是“默认应用优先,其次全表第一条”;当默认应用或首条应用不可见时,可能出现空参数或 `未配置可用聊天应用` ## 7. 联调建议 diff --git a/fastapi_modules/fastapi_leaudit/controllers/documentController.py b/fastapi_modules/fastapi_leaudit/controllers/documentController.py index d5ddc03..e9c6e4b 100644 --- a/fastapi_modules/fastapi_leaudit/controllers/documentController.py +++ b/fastapi_modules/fastapi_leaudit/controllers/documentController.py @@ -294,64 +294,62 @@ class DocumentController(BaseController): async def GetQueueStatus(): """获取文档处理队列状态。""" from datetime import datetime + from fastapi_admin.config import LEAUDIT_WORKER_CONCURRENCY async with GetAsyncSession() as Session: - statusRows = ( + runStats = ( await Session.execute( text( """ - SELECT processing_status, COUNT(*) AS cnt - FROM leaudit_documents - WHERE deleted_at IS NULL AND is_latest_version = true - GROUP BY processing_status + SELECT + COUNT(*) FILTER (WHERE r.status IN ('queued', 'pending', 'retrying'))::int AS pending_tasks, + COUNT(*) FILTER (WHERE r.status = 'running')::int AS processing_tasks + FROM leaudit_audit_runs r + JOIN leaudit_documents d + ON d.current_run_id = r.id + WHERE d.deleted_at IS NULL + AND d.is_latest_version = true """ ) ) - ).mappings().all() + ).mappings().first() - waiting = 0 - processing = 0 - for row in statusRows: - s = str(row["processing_status"] or "") - c = int(row["cnt"] or 0) - if s == "waiting": - waiting = c - elif s in ("processing", "running"): - processing += c + pending = int((runStats or {}).get("pending_tasks") or 0) + processing = int((runStats or {}).get("processing_tasks") or 0) - processingIdsRows: list[int] = [] - if processing > 0: - async with GetAsyncSession() as Session: - idRows = ( - await Session.execute( - text( - """ - SELECT id FROM leaudit_documents - WHERE deleted_at IS NULL - AND is_latest_version = true - AND processing_status IN ('processing', 'running') - ORDER BY updated_at DESC - LIMIT 50 - """ - ) + processingIdsRows = ( + await Session.execute( + text( + """ + SELECT d.id + FROM leaudit_documents d + JOIN leaudit_audit_runs r + ON r.id = d.current_run_id + WHERE d.deleted_at IS NULL + AND d.is_latest_version = true + AND r.status = 'running' + ORDER BY COALESCE(r.started_at, r.created_at) DESC, d.id DESC + LIMIT 50 + """ ) - ).fetchall() - processingIdsRows = [int(r[0]) for r in idRows] + ) + ).fetchall() + processingIds = [int(r[0]) for r in processingIdsRows] return Result.success( data=QueueStatusVO( success=True, timestamp=datetime.now().isoformat(), queue={ - "pending_tasks": waiting, + "pending_tasks": pending, "processing_tasks": processing, - "available_slots": max(0, 4 - processing), - "max_concurrent": 4, + "available_slots": max(0, LEAUDIT_WORKER_CONCURRENCY - processing), + "max_concurrent": LEAUDIT_WORKER_CONCURRENCY, }, documents={ - "waiting": waiting, + "waiting": pending, "processing": processing, - "processing_ids": processingIdsRows, + "processing_ids": processingIds, }, ) ) diff --git a/fastapi_modules/fastapi_leaudit/rag_engine/generator.py b/fastapi_modules/fastapi_leaudit/rag_engine/generator.py index bf9c92f..08076c9 100644 --- a/fastapi_modules/fastapi_leaudit/rag_engine/generator.py +++ b/fastapi_modules/fastapi_leaudit/rag_engine/generator.py @@ -77,7 +77,9 @@ async def generate_stream( if payload == "[DONE]": break chunk = json.loads(payload) - delta = chunk.get("choices", [{}])[0].get("delta", {}) + choices = chunk.get("choices") or [] + first_choice = choices[0] if choices and isinstance(choices[0], dict) else {} + delta = first_choice.get("delta", {}) text = delta.get("content", "") if text: yield _sse_line( @@ -141,4 +143,4 @@ async def generate_stream( def _sse_line(data: dict) -> str: - return f"data: {json.dumps(data, ensure_ascii=False)}\\n\\n" + return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"