From dcdc21b90e32d5f9372d2f42020223bf3948cd34 Mon Sep 17 00:00:00 2001 From: PingChuan <1259732256@qq.com> Date: Thu, 4 Dec 2025 17:47:44 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E6=96=B0=E5=A2=9Edify=E6=8E=A5?= =?UTF-8?q?=E5=85=A5=E7=9F=A5=E8=AF=86=E5=BA=93=E6=97=B6=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E5=B5=8C=E5=85=A5=E5=8F=82=E6=95=B0=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/dify-dataset/api/documentApi.ts | 39 +++++ .../dify-dataset-manager/document-detail.tsx | 164 +++++++++++------- .../dify-dataset-manager/document-list.tsx | 38 ++-- .../dify-dataset-manager/document-upload.tsx | 2 +- .../dify-dataset-manager/document-detail.ts | 122 ++++++++++++- app/routes/api.dataset.dify-files.$.tsx | 80 +++++++++ .../components/dify-dataset-manager/index.css | 11 ++ .../dify-dataset-manager/document-detail.ts | 32 +++- 8 files changed, 392 insertions(+), 96 deletions(-) create mode 100644 app/routes/api.dataset.dify-files.$.tsx diff --git a/app/api/dify-dataset/api/documentApi.ts b/app/api/dify-dataset/api/documentApi.ts index 9f69f0f..fd9fc8c 100644 --- a/app/api/dify-dataset/api/documentApi.ts +++ b/app/api/dify-dataset/api/documentApi.ts @@ -225,6 +225,44 @@ export async function fetchUploadFileInfo( return response.data; } +/** + * 下载文档原始文件 + * 通过代理路由下载 Dify 知识库中的原始文件 + * + * @param uploadFileInfo - 上传文件信息(从 fetchUploadFileInfo 获取) + * @returns File 对象 + */ +export async function downloadOriginalFile( + uploadFileInfo: UploadFileInfo +): Promise { + if (!uploadFileInfo.download_url) { + throw new Error('无法获取原始文件下载地址'); + } + + // download_url 格式: /files/xxx/file-preview?... + // 转换为代理路由: /api/dataset/dify-files/xxx/file-preview?... + const downloadPath = uploadFileInfo.download_url.replace(/^\/files\//, ''); + const proxyUrl = `${API_URL}/dify-files/${downloadPath}`; + + console.log('[Dataset Client] 下载原始文件:', { + originalUrl: uploadFileInfo.download_url, + proxyUrl, + }); + + const response = await axios.get(proxyUrl, { + responseType: 'blob', + withCredentials: true, + }); + + const file = new File( + [response.data], + uploadFileInfo.name || 'document', + { type: uploadFileInfo.mime_type || 'application/octet-stream' } + ); + + return file; +} + /** * 预处理规则 ID */ @@ -244,6 +282,7 @@ export interface PreProcessingRule { export interface SegmentationConfig { separator: string; max_tokens: number; + chunk_overlap?: number; } /** diff --git a/app/components/dify-dataset-manager/document-detail.tsx b/app/components/dify-dataset-manager/document-detail.tsx index a59fc8b..a087490 100644 --- a/app/components/dify-dataset-manager/document-detail.tsx +++ b/app/components/dify-dataset-manager/document-detail.tsx @@ -3,12 +3,12 @@ import { Button, InputNumber, Checkbox, - Select, Card, Empty, Spin, Divider, Tooltip, + Progress, } from 'antd'; import { QuestionCircleOutlined, @@ -17,6 +17,7 @@ import { } from '@ant-design/icons'; import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail'; import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail'; +import { INDEXING_STATUS_CONFIG } from '~/types/dify-dataset-manager/document-detail'; /** * 文档详情组件 @@ -32,6 +33,8 @@ export default function DocumentDetail({ previewLoading, showPreview, saving, + isProcessing, + indexingStatus, updateSettings, handleReset, handlePreview, @@ -56,7 +59,7 @@ export default function DocumentDetail({

分段设置

{/* 分块模式 */} -
+ {/*
@@ -66,13 +69,13 @@ export default function DocumentDetail({ 通用文本分块模式,检索和召回的块是相同的
-
+
*/} {/* 分段标识符 */}
@@ -80,6 +83,7 @@ export default function DocumentDetail({ value={settings.separator} onChange={(e) => updateSettings('separator', e.target.value)} placeholder="\n\n" + disabled={isProcessing} className="setting-input" />
@@ -98,6 +102,28 @@ export default function DocumentDetail({ onChange={(value) => updateSettings('maxTokens', value || 500)} min={100} max={4000} + disabled={isProcessing} + className="setting-input-number" + /> + characters + + + + {/* 分段重叠长度 */} +
+ +
+ updateSettings('chunkOverlap', value || 50)} + min={0} + max={500} + disabled={isProcessing} className="setting-input-number" /> characters @@ -115,6 +141,7 @@ export default function DocumentDetail({ updateSettings('removeExtraSpaces', e.target.checked)} + disabled={isProcessing} > 替换掉连续的空格、换行符和制表符 @@ -122,6 +149,7 @@ export default function DocumentDetail({ updateSettings('removeUrlsEmails', e.target.checked)} + disabled={isProcessing} > 删除所有 URL 和电子邮件地址 @@ -130,27 +158,25 @@ export default function DocumentDetail({ - {/* Q&A 分段 */} + {/* 索引方式 */}
-
- updateSettings('useQASegment', e.target.checked)} +

索引方式

+
+
!isProcessing && updateSettings('indexingTechnique', 'high_quality')} > - 使用 Q&A 分段,语言 - - +
{document.name}
{showPreview ? `${previewSegments.length} 段块` : '0 段块'} @@ -208,36 +229,59 @@ export default function DocumentDetail({ } className="preview-card" > - {previewLoading ? ( -
- -
加载中...
-
- ) : !showPreview ? ( -
-
- + {/* 处理进度显示 */} + {isProcessing && indexingStatus && ( +
+
+ + 正在处理文档... +
+ +
+ {INDEXING_STATUS_CONFIG[indexingStatus]?.text || '处理中...'}
-

点击左侧的"预览块"按钮来预览

- ) : previewSegments.length === 0 ? ( - - ) : ( -
- {previewSegments.map((segment, index) => ( -
-
- #{index + 1} - - {segment.word_count} 字符 - -
-
- {segment.content} -
+ )} + + {/* 预览内容 */} + {!isProcessing && ( + <> + {previewLoading ? ( +
+ +
加载中...
- ))} -
+ ) : !showPreview ? ( +
+
+ +
+

点击左侧的"预览块"按钮来预览

+
+ ) : previewSegments.length === 0 ? ( + + ) : ( +
+ {previewSegments.map((segment, index) => ( +
+
+ #{index + 1} + + {segment.word_count} 字符 + +
+
+ {segment.content} +
+
+ ))} +
+ )} + )}
diff --git a/app/components/dify-dataset-manager/document-list.tsx b/app/components/dify-dataset-manager/document-list.tsx index 4da0c78..8ba10e4 100644 --- a/app/components/dify-dataset-manager/document-list.tsx +++ b/app/components/dify-dataset-manager/document-list.tsx @@ -1,29 +1,29 @@ import { - Button, - Input, - Table, - Tag, - Space, - Tooltip, - Popconfirm, - Switch, - Empty, - Spin, -} from 'antd'; -import { - SearchOutlined, - ReloadOutlined, + CloudUploadOutlined, DeleteOutlined, FileTextOutlined, - CloudUploadOutlined, - EyeOutlined, + ReloadOutlined, + SearchOutlined, + UnorderedListOutlined } from '@ant-design/icons'; +import { + Button, + Empty, + Input, + Popconfirm, + Space, + Spin, + Switch, + Table, + Tag, + Tooltip, +} from 'antd'; import type { ColumnsType } from 'antd/es/table'; import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; import { useDocumentList } from '~/hooks/dify-dataset-manager/document-list'; import type { DocumentListProps } from '~/types/dify-dataset-manager/document-list'; -import DocumentUpload from './document-upload'; import '../../styles/components/dify-dataset-manager/index.css'; +import DocumentUpload from './document-upload'; /** * 文档列表组件 @@ -128,11 +128,11 @@ export default function DocumentList({ width: 120, render: (_, record) => ( - +