diff --git a/app/api/dify-dataset/api/documentApi.ts b/app/api/dify-dataset/api/documentApi.ts index 9f69f0f..fd9fc8c 100644 --- a/app/api/dify-dataset/api/documentApi.ts +++ b/app/api/dify-dataset/api/documentApi.ts @@ -225,6 +225,44 @@ export async function fetchUploadFileInfo( return response.data; } +/** + * 下载文档原始文件 + * 通过代理路由下载 Dify 知识库中的原始文件 + * + * @param uploadFileInfo - 上传文件信息(从 fetchUploadFileInfo 获取) + * @returns File 对象 + */ +export async function downloadOriginalFile( + uploadFileInfo: UploadFileInfo +): Promise { + if (!uploadFileInfo.download_url) { + throw new Error('无法获取原始文件下载地址'); + } + + // download_url 格式: /files/xxx/file-preview?... + // 转换为代理路由: /api/dataset/dify-files/xxx/file-preview?... + const downloadPath = uploadFileInfo.download_url.replace(/^\/files\//, ''); + const proxyUrl = `${API_URL}/dify-files/${downloadPath}`; + + console.log('[Dataset Client] 下载原始文件:', { + originalUrl: uploadFileInfo.download_url, + proxyUrl, + }); + + const response = await axios.get(proxyUrl, { + responseType: 'blob', + withCredentials: true, + }); + + const file = new File( + [response.data], + uploadFileInfo.name || 'document', + { type: uploadFileInfo.mime_type || 'application/octet-stream' } + ); + + return file; +} + /** * 预处理规则 ID */ @@ -244,6 +282,7 @@ export interface PreProcessingRule { export interface SegmentationConfig { separator: string; max_tokens: number; + chunk_overlap?: number; } /** diff --git a/app/components/dify-dataset-manager/document-detail.tsx b/app/components/dify-dataset-manager/document-detail.tsx index a59fc8b..a087490 100644 --- a/app/components/dify-dataset-manager/document-detail.tsx +++ b/app/components/dify-dataset-manager/document-detail.tsx @@ -3,12 +3,12 @@ import { Button, InputNumber, Checkbox, - Select, Card, Empty, Spin, Divider, Tooltip, + Progress, } from 'antd'; import { QuestionCircleOutlined, @@ -17,6 +17,7 @@ import { } from '@ant-design/icons'; import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail'; import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail'; +import { INDEXING_STATUS_CONFIG } from '~/types/dify-dataset-manager/document-detail'; /** * 文档详情组件 @@ -32,6 +33,8 @@ export default function DocumentDetail({ previewLoading, showPreview, saving, + isProcessing, + indexingStatus, updateSettings, handleReset, handlePreview, @@ -56,7 +59,7 @@ export default function DocumentDetail({

分段设置

{/* 分块模式 */} -
+ {/*
@@ -66,13 +69,13 @@ export default function DocumentDetail({ 通用文本分块模式,检索和召回的块是相同的
-
+
*/} {/* 分段标识符 */}
@@ -80,6 +83,7 @@ export default function DocumentDetail({ value={settings.separator} onChange={(e) => updateSettings('separator', e.target.value)} placeholder="\n\n" + disabled={isProcessing} className="setting-input" />
@@ -98,6 +102,28 @@ export default function DocumentDetail({ onChange={(value) => updateSettings('maxTokens', value || 500)} min={100} max={4000} + disabled={isProcessing} + className="setting-input-number" + /> + characters + + + + {/* 分段重叠长度 */} +
+ +
+ updateSettings('chunkOverlap', value || 50)} + min={0} + max={500} + disabled={isProcessing} className="setting-input-number" /> characters @@ -115,6 +141,7 @@ export default function DocumentDetail({ updateSettings('removeExtraSpaces', e.target.checked)} + disabled={isProcessing} > 替换掉连续的空格、换行符和制表符 @@ -122,6 +149,7 @@ export default function DocumentDetail({ updateSettings('removeUrlsEmails', e.target.checked)} + disabled={isProcessing} > 删除所有 URL 和电子邮件地址 @@ -130,27 +158,25 @@ export default function DocumentDetail({ - {/* Q&A 分段 */} + {/* 索引方式 */}
-
- updateSettings('useQASegment', e.target.checked)} +

索引方式

+
+
!isProcessing && updateSettings('indexingTechnique', 'high_quality')} > - 使用 Q&A 分段,语言 - - +
{document.name}
{showPreview ? `${previewSegments.length} 段块` : '0 段块'} @@ -208,36 +229,59 @@ export default function DocumentDetail({ } className="preview-card" > - {previewLoading ? ( -
- -
加载中...
-
- ) : !showPreview ? ( -
-
- + {/* 处理进度显示 */} + {isProcessing && indexingStatus && ( +
+
+ + 正在处理文档... +
+ +
+ {INDEXING_STATUS_CONFIG[indexingStatus]?.text || '处理中...'}
-

点击左侧的"预览块"按钮来预览

- ) : previewSegments.length === 0 ? ( - - ) : ( -
- {previewSegments.map((segment, index) => ( -
-
- #{index + 1} - - {segment.word_count} 字符 - -
-
- {segment.content} -
+ )} + + {/* 预览内容 */} + {!isProcessing && ( + <> + {previewLoading ? ( +
+ +
加载中...
- ))} -
+ ) : !showPreview ? ( +
+
+ +
+

点击左侧的"预览块"按钮来预览

+
+ ) : previewSegments.length === 0 ? ( + + ) : ( +
+ {previewSegments.map((segment, index) => ( +
+
+ #{index + 1} + + {segment.word_count} 字符 + +
+
+ {segment.content} +
+
+ ))} +
+ )} + )}
diff --git a/app/components/dify-dataset-manager/document-list.tsx b/app/components/dify-dataset-manager/document-list.tsx index 4da0c78..8ba10e4 100644 --- a/app/components/dify-dataset-manager/document-list.tsx +++ b/app/components/dify-dataset-manager/document-list.tsx @@ -1,29 +1,29 @@ import { - Button, - Input, - Table, - Tag, - Space, - Tooltip, - Popconfirm, - Switch, - Empty, - Spin, -} from 'antd'; -import { - SearchOutlined, - ReloadOutlined, + CloudUploadOutlined, DeleteOutlined, FileTextOutlined, - CloudUploadOutlined, - EyeOutlined, + ReloadOutlined, + SearchOutlined, + UnorderedListOutlined } from '@ant-design/icons'; +import { + Button, + Empty, + Input, + Popconfirm, + Space, + Spin, + Switch, + Table, + Tag, + Tooltip, +} from 'antd'; import type { ColumnsType } from 'antd/es/table'; import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; import { useDocumentList } from '~/hooks/dify-dataset-manager/document-list'; import type { DocumentListProps } from '~/types/dify-dataset-manager/document-list'; -import DocumentUpload from './document-upload'; import '../../styles/components/dify-dataset-manager/index.css'; +import DocumentUpload from './document-upload'; /** * 文档列表组件 @@ -128,11 +128,11 @@ export default function DocumentList({ width: 120, render: (_, record) => ( - +