diff --git a/app/api/dify-dataset/api/documentApi.ts b/app/api/dify-dataset/api/documentApi.ts index d147edf..9f69f0f 100644 --- a/app/api/dify-dataset/api/documentApi.ts +++ b/app/api/dify-dataset/api/documentApi.ts @@ -121,7 +121,7 @@ export async function toggleDocumentStatus( } /** - * 上传文件到知识库 + * 上传文件到知识库(使用默认配置) * * @param datasetId - 知识库 ID * @param file - 文件对象 @@ -132,19 +132,44 @@ export async function uploadDocument( datasetId: string, file: File, onProgress?: (percent: number) => void -): Promise { +): Promise { + return uploadDocumentWithConfig( + datasetId, + file, + { + indexing_technique: 'high_quality', + process_rule: { mode: 'automatic' }, + }, + onProgress + ); +} + +/** + * 上传文件到知识库(使用自定义配置) + * + * @param datasetId - 知识库 ID + * @param file - 文件对象 + * @param config - 上传配置(索引模式、分段规则等) + * @param onProgress - 上传进度回调 + * @returns 创建的文档信息,包含 batch 用于查询索引进度 + */ +export async function uploadDocumentWithConfig( + datasetId: string, + file: File, + config: UploadDocumentConfig, + onProgress?: (percent: number) => void +): Promise { const formData = new FormData(); formData.append('file', file); - formData.append('data', JSON.stringify({ - indexing_technique: 'high_quality', - process_rule: { - mode: 'automatic', - }, - })); + formData.append('data', JSON.stringify(config)); - console.log('[Dataset Client] 上传文档:', { datasetId, fileName: file.name }); + console.log('[Dataset Client] 上传文档:', { + datasetId, + fileName: file.name, + config, + }); - const response = await axios.post( + const response = await axios.post( `${API_URL}/datasets/${datasetId}/documents`, formData, { @@ -200,21 +225,41 @@ export async function fetchUploadFileInfo( return response.data; } +/** + * 预处理规则 ID + */ +export type PreProcessingRuleId = 'remove_extra_spaces' | 'remove_urls_emails'; + +/** + * 预处理规则配置 + */ +export interface PreProcessingRule { + id: PreProcessingRuleId; + enabled: boolean; +} + +/** + * 分段配置 + */ +export interface SegmentationConfig { + separator: string; + max_tokens: number; +} + +/** + * 自定义处理规则 + */ +export interface CustomRules { + pre_processing_rules?: PreProcessingRule[]; + segmentation?: SegmentationConfig; +} + /** * 文档处理规则配置 */ export interface ProcessRule { mode: 'automatic' | 'custom'; - rules?: { - pre_processing_rules?: Array<{ - id: 'remove_extra_spaces' | 'remove_urls_emails'; - enabled: boolean; - }>; - segmentation?: { - separator: string; - max_tokens: number; - }; - }; + rules?: CustomRules; } /** @@ -225,6 +270,45 @@ export interface UpdateDocumentSettings { process_rule?: ProcessRule; } +/** + * 上传文档配置参数 + */ +export interface UploadDocumentConfig { + indexing_technique: 'high_quality' | 'economy'; + process_rule: ProcessRule; +} + +/** + * 上传文档响应 + */ +export interface UploadDocumentResponse { + document: { + id: string; + position: number; + data_source_type: string; + data_source_info: { + upload_file_id: string; + }; + dataset_process_rule_id: string; + name: string; + created_from: string; + created_by: string; + created_at: number; + tokens: number; + indexing_status: string; + error: string | null; + enabled: boolean; + disabled_at: number | null; + disabled_by: string | null; + archived: boolean; + display_status: string; + word_count: number; + hit_count: number; + doc_form: string; + }; + batch: string; +} + /** * 更新文档设置并重新处理 * 注意:Dify API 不直接支持修改已有文档的分段设置 @@ -252,3 +336,50 @@ export async function updateDocumentWithSettings( ); return response.data; } + +/** + * 通过文件更新文档 + * Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file + * + * 用于在用户修改分段参数后,使用同一文件重新处理文档 + * + * @param datasetId - 知识库 ID + * @param documentId - 文档 ID + * @param file - 文件对象(使用原上传的文件) + * @param config - 新的分段配置 + * @param onProgress - 上传进度回调 + * @returns 更新后的文档信息,包含新的 batch 用于查询索引进度 + */ +export async function updateDocumentByFile( + datasetId: string, + documentId: string, + file: File, + config: UploadDocumentConfig, + onProgress?: (percent: number) => void +): Promise { + const formData = new FormData(); + formData.append('file', file); + formData.append('data', JSON.stringify(config)); + + console.log('[Dataset Client] 通过文件更新文档:', { + datasetId, + documentId, + fileName: file.name, + config, + }); + + const response = await axios.post( + `${API_URL}/datasets/${datasetId}/documents/${documentId}/update-by-file`, + formData, + { + withCredentials: true, + onUploadProgress: (progressEvent) => { + if (progressEvent.total && onProgress) { + const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total); + onProgress(percent); + } + }, + } + ); + return response.data; +} diff --git a/app/api/dify-dataset/api/index.ts b/app/api/dify-dataset/api/index.ts index 3e69d9e..1ed6ee0 100644 --- a/app/api/dify-dataset/api/index.ts +++ b/app/api/dify-dataset/api/index.ts @@ -18,10 +18,19 @@ export { deleteDocument, toggleDocumentStatus, uploadDocument, + uploadDocumentWithConfig, + updateDocumentByFile, fetchIndexingStatus, fetchUploadFileInfo, } from './documentApi'; +// 文档 API 类型 +export type { + ProcessRule, + UploadDocumentConfig, + UploadDocumentResponse, +} from './documentApi'; + // 分段、子分段、检索 API export { fetchSegments, diff --git a/app/components/dify-dataset-manager/document-list.tsx b/app/components/dify-dataset-manager/document-list.tsx index 90945fc..4c204c8 100644 --- a/app/components/dify-dataset-manager/document-list.tsx +++ b/app/components/dify-dataset-manager/document-list.tsx @@ -11,7 +11,6 @@ import { message, Empty, Spin, - Upload, } from 'antd'; import { SearchOutlined, @@ -28,7 +27,8 @@ import { } from '@ant-design/icons'; import type { ColumnsType } from 'antd/es/table'; import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; -import { deleteDocument, toggleDocumentStatus, uploadDocument } from '~/api/dify-dataset/api/documentApi'; +import { deleteDocument, toggleDocumentStatus } from '~/api/dify-dataset/api/documentApi'; +import DocumentUpload from './document-upload'; import '../../styles/components/dify-dataset-manager/index.css'; interface DocumentListProps { @@ -63,9 +63,11 @@ export default function DocumentList({ onViewDocument, }: DocumentListProps) { const [searchValue, setSearchValue] = useState(''); - const [uploading, setUploading] = useState(false); const [deletingId, setDeletingId] = useState(null); + // 显示上传页面的状态 + const [showUploadPage, setShowUploadPage] = useState(false); + /** * 获取状态标签配置 */ @@ -141,29 +143,29 @@ export default function DocumentList({ }; /** - * 处理文件上传 + * 点击上传按钮,显示上传页面 */ - const handleUpload = async (file: File) => { + const handleUploadClick = () => { if (!datasetId) { message.error('请先选择知识库'); - return false; + return; } + setShowUploadPage(true); + }; - setUploading(true); - try { - await uploadDocument(datasetId, file, (percent) => { - console.log('上传进度:', percent); - }); - message.success('上传成功,正在处理...'); - onRefresh(); - } catch (err: any) { - console.error('上传文件失败:', err); - message.error(err.message || '上传失败'); - } finally { - setUploading(false); - } + /** + * 关闭上传页面 + */ + const handleUploadClose = () => { + setShowUploadPage(false); + }; - return false; + /** + * 上传成功回调 + */ + const handleUploadSuccess = () => { + setShowUploadPage(false); + onRefresh(); }; // 过滤文档 @@ -271,115 +273,114 @@ export default function DocumentList({ ]; return ( -
- {/* 页面头部 */} -
-
-

文档

- {/*

- 知识库的所有文件都在这里显示,整个知识库都可以被接到 Dify 引用或通过 Chat 插件进行索引。 -

*/} -
-
- - - -
-
- - {/* 搜索栏 */} -
- } - value={searchValue} - onChange={(e) => setSearchValue(e.target.value)} - allowClear - style={{ width: 280 }} + <> + {/* 上传页面 */} + {showUploadPage ? ( + -
- - {/* 文档表格 */} -
- {loading && documents.length === 0 ? ( -
- -
加载中...
+ ) : ( +
+ {/* 页面头部 */} +
+
+

文档

+
+
+ + +
- ) : filteredDocuments.length === 0 ? ( -
- - {!searchValue && ( - + } + value={searchValue} + onChange={(e) => setSearchValue(e.target.value)} + allowClear + style={{ width: 280 }} + /> +
+ + {/* 文档表格 */} +
+ {loading && documents.length === 0 ? ( +
+ +
加载中...
+
+ ) : filteredDocuments.length === 0 ? ( +
+ + {!searchValue && ( + + )} + +
+ ) : ( + + )} + + + {/* 底部分页器 */} + {filteredDocuments.length > 0 && ( +
+ 共 {total} 条 +
+ - - )} - -
- ) : ( -
- )} - - - {/* 底部分页器 */} - {filteredDocuments.length > 0 && ( -
- 共 {total} 条 -
- - - 第 {page} 页 / 共 {Math.ceil(total / pageSize)} 页 - - -
+ 上一页 + + + 第 {page} 页 / 共 {Math.ceil(total / pageSize)} 页 + + +
+ + )} )} - + ); } diff --git a/app/components/dify-dataset-manager/document-upload.tsx b/app/components/dify-dataset-manager/document-upload.tsx new file mode 100644 index 0000000..8085b8e --- /dev/null +++ b/app/components/dify-dataset-manager/document-upload.tsx @@ -0,0 +1,914 @@ +import { useState, useEffect, useRef, useCallback } from 'react'; +import { + Input, + Button, + InputNumber, + Checkbox, + Select, + Card, + Empty, + Spin, + message, + Divider, + Tooltip, + Progress, + Upload, +} from 'antd'; +import type { UploadFile, UploadProps } from 'antd'; +import { + QuestionCircleOutlined, + FileTextOutlined, + CheckCircleOutlined, + LoadingOutlined, + ExclamationCircleOutlined, + InboxOutlined, + DeleteOutlined, + ArrowLeftOutlined, +} from '@ant-design/icons'; +import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; +import type { Segment } from '~/api/dify-dataset/type'; +import { + uploadDocumentWithConfig, + updateDocumentByFile, + fetchIndexingStatus, +} from '~/api/dify-dataset/api/documentApi'; +import { fetchSegments } from '~/api/dify-dataset/api/segmentApi'; + +const { Dragger } = Upload; + +interface DocumentUploadProps { + datasetId: string; + onClose: () => void; + onSuccess: () => void; +} + +/** + * 分段设置配置 + */ +interface SegmentationSettings { + separator: string; + maxTokens: number; + chunkOverlap: number; + removeExtraSpaces: boolean; + removeUrlsEmails: boolean; + indexingTechnique: 'high_quality' | 'economy'; +} + +/** + * 默认分段设置 + */ +const DEFAULT_SETTINGS: SegmentationSettings = { + separator: '\\n\\n', + maxTokens: 1024, + chunkOverlap: 50, + removeExtraSpaces: true, + removeUrlsEmails: false, + indexingTechnique: 'high_quality', +}; + +/** + * 单个文档的上传状态 + */ +type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error'; + +/** + * 上传的文档信息(支持多文件) + */ +interface UploadedDocument { + file: File; + documentId: string; + batch: string; + stage: DocumentStage; + indexingStatus: IndexingStatus; + uploadProgress: number; + error?: string; + settings: SegmentationSettings; + segments: Segment[]; +} + +/** + * 索引状态配置 + */ +const INDEXING_STATUS_CONFIG: Record = { + waiting: { text: '等待处理...', percent: 10 }, + parsing: { text: '解析文档...', percent: 30 }, + cleaning: { text: '清洗文本...', percent: 50 }, + splitting: { text: '分段处理...', percent: 70 }, + indexing: { text: '建立索引...', percent: 85 }, + completed: { text: '处理完成', percent: 100 }, + paused: { text: '已暂停', percent: 0 }, + error: { text: '处理失败', percent: 0 }, +}; + +/** + * 支持的文件格式 + */ +const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM'; + +/** + * 文档上传组件 + * 支持多文件上传,两步流程:选择文件 → 上传并配置分段 + */ +export default function DocumentUpload({ + datasetId, + onClose, + onSuccess, +}: DocumentUploadProps) { + // 步骤控制 + const [step, setStep] = useState<1 | 2>(1); + + // 文件相关 + const [selectedFiles, setSelectedFiles] = useState([]); + const [fileList, setFileList] = useState([]); + + // 多文档状态管理 + const [uploadedDocuments, setUploadedDocuments] = useState([]); + // 当前选中查看的文档索引 + const [currentDocIndex, setCurrentDocIndex] = useState(0); + + // 当前显示的分段设置(来自当前选中的文档) + const [currentSettings, setCurrentSettings] = useState(DEFAULT_SETTINGS); + + // 预览相关 + const [previewLoading, setPreviewLoading] = useState(false); + + // 轮询定时器(支持多个文档) + const pollingTimersRef = useRef>(new Map()); + + // 清理所有轮询定时器 + useEffect(() => { + return () => { + pollingTimersRef.current.forEach(timer => clearInterval(timer)); + pollingTimersRef.current.clear(); + }; + }, []); + + /** + * 停止指定文档的轮询 + */ + const stopPolling = useCallback((documentId: string) => { + const timer = pollingTimersRef.current.get(documentId); + if (timer) { + clearInterval(timer); + pollingTimersRef.current.delete(documentId); + } + }, []); + + /** + * 停止所有轮询 + */ + const stopAllPolling = useCallback(() => { + pollingTimersRef.current.forEach(timer => clearInterval(timer)); + pollingTimersRef.current.clear(); + }, []); + + /** + * 加载分段预览 + */ + const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => { + setPreviewLoading(true); + try { + const response = await fetchSegments(datasetId, documentId, 1, 50); + const segments = response.data || []; + // 更新对应文档的分段 + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === docIndex ? { ...doc, segments } : doc + )); + } catch (err: any) { + console.error('加载分段预览失败:', err); + message.error('加载分段预览失败'); + } finally { + setPreviewLoading(false); + } + }, [datasetId]); + + /** + * 轮询索引状态 + */ + const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => { + try { + const response = await fetchIndexingStatus(datasetId, batch); + const documentStatus = response.data?.[0]; + + if (documentStatus) { + const status = documentStatus.indexing_status as IndexingStatus; + + // 更新文档状态 + setUploadedDocuments(prev => prev.map((doc, idx) => { + if (idx !== docIndex) return doc; + return { ...doc, indexingStatus: status }; + })); + + if (status === 'completed') { + stopPolling(documentId); + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === docIndex ? { ...doc, stage: 'completed' } : doc + )); + // message.success(`文档 "${uploadedDocuments[docIndex]?.file.name}" 处理完成!`); + // 自动加载分段预览 + loadSegmentsPreview(documentId, docIndex); + } else if (status === 'error') { + stopPolling(documentId); + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === docIndex ? { ...doc, stage: 'error', error: documentStatus.error || '处理失败' } : doc + )); + } + } + } catch (err) { + console.error('获取索引状态失败:', err); + } + }, [datasetId, stopPolling, loadSegmentsPreview, uploadedDocuments]); + + /** + * 开始轮询 + */ + const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => { + // 先停止之前的轮询 + stopPolling(documentId); + + // 开始新的轮询 + const timer = setInterval(() => { + pollIndexingStatus(batch, documentId, docIndex); + }, 2000); + pollingTimersRef.current.set(documentId, timer); + + // 立即执行一次 + pollIndexingStatus(batch, documentId, docIndex); + }, [stopPolling, pollIndexingStatus]); + + /** + * 构建上传配置 + */ + const buildConfig = (s: SegmentationSettings) => ({ + indexing_technique: s.indexingTechnique, + process_rule: { + mode: 'custom' as const, + rules: { + pre_processing_rules: [ + { id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces }, + { id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails }, + ], + segmentation: { + separator: s.separator.replace(/\\n/g, '\n'), + max_tokens: s.maxTokens, + }, + }, + }, + }); + + /** + * 更新当前文档的设置 + */ + const updateCurrentSettings = (key: keyof SegmentationSettings, value: any) => { + const newSettings = { ...currentSettings, [key]: value }; + setCurrentSettings(newSettings); + // 同步更新到文档列表 + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === currentDocIndex ? { ...doc, settings: newSettings } : doc + )); + }; + + /** + * 处理文件选择变化 + */ + const handleFileChange: UploadProps['onChange'] = ({ fileList: newFileList }) => { + setFileList(newFileList); + // 提取实际文件对象 + const files = newFileList + .filter(f => f.originFileObj) + .map(f => f.originFileObj as File); + setSelectedFiles(files); + }; + + /** + * 移除文件 + */ + const handleRemoveFile = (file: UploadFile) => { + const newFileList = fileList.filter(f => f.uid !== file.uid); + setFileList(newFileList); + const files = newFileList + .filter(f => f.originFileObj) + .map(f => f.originFileObj as File); + setSelectedFiles(files); + }; + + /** + * 上传单个文件 + */ + const uploadSingleFile = async (file: File, index: number): Promise => { + try { + // 更新状态为上传中 + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc + )); + + const config = buildConfig(DEFAULT_SETTINGS); + const result = await uploadDocumentWithConfig( + datasetId, + file, + config, + (percent) => { + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === index ? { ...doc, uploadProgress: percent } : doc + )); + } + ); + + // 更新文档信息 + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === index ? { + ...doc, + documentId: result.document.id, + batch: result.batch, + stage: 'indexing' as DocumentStage, + indexingStatus: 'waiting' as IndexingStatus, + } : doc + )); + + // 开始轮询索引状态 + startPolling(result.batch, result.document.id, index); + } catch (err: any) { + console.error(`上传文档 ${file.name} 失败:`, err); + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === index ? { + ...doc, + stage: 'error' as DocumentStage, + error: err.message || '上传失败', + } : doc + )); + } + }; + + /** + * 点击"下一步":立即上传所有文件 + */ + const handleNextStep = async () => { + if (selectedFiles.length === 0) { + message.warning('请先选择文件'); + return; + } + + // 初始化所有文档状态 + const docs: UploadedDocument[] = selectedFiles.map(file => ({ + file, + documentId: '', + batch: '', + stage: 'pending' as DocumentStage, + indexingStatus: 'waiting' as IndexingStatus, + uploadProgress: 0, + settings: { ...DEFAULT_SETTINGS }, + segments: [], + })); + setUploadedDocuments(docs); + setCurrentDocIndex(0); + setCurrentSettings({ ...DEFAULT_SETTINGS }); + setStep(2); + + // 依次上传所有文件 + for (let i = 0; i < selectedFiles.length; i++) { + await uploadSingleFile(selectedFiles[i], i); + } + }; + + /** + * 切换查看的文档 + */ + const handleDocumentChange = (docId: string) => { + const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId); + if (index !== -1) { + setCurrentDocIndex(index); + const doc = uploadedDocuments[index]; + setCurrentSettings(doc.settings); + } + }; + + /** + * 修改参数后重新处理当前文档 + */ + const handleReprocess = async () => { + const currentDoc = uploadedDocuments[currentDocIndex]; + if (!currentDoc || !currentDoc.documentId) return; + + // 更新状态 + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === currentDocIndex ? { + ...doc, + stage: 'uploading' as DocumentStage, + uploadProgress: 0, + segments: [], + } : doc + )); + + try { + const config = buildConfig(currentSettings); + const result = await updateDocumentByFile( + datasetId, + currentDoc.documentId, + currentDoc.file, + config, + (percent) => { + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc + )); + } + ); + + // 更新 batch + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === currentDocIndex ? { + ...doc, + batch: result.batch, + stage: 'indexing' as DocumentStage, + indexingStatus: 'waiting' as IndexingStatus, + } : doc + )); + + startPolling(result.batch, currentDoc.documentId, currentDocIndex); + } catch (err: any) { + console.error('重新处理失败:', err); + setUploadedDocuments(prev => prev.map((doc, idx) => + idx === currentDocIndex ? { + ...doc, + stage: 'error' as DocumentStage, + error: err.message || '重新处理失败', + } : doc + )); + message.error(err.message || '重新处理失败'); + } + }; + + /** + * 返回上一步 + */ + const handlePrevStep = () => { + // 检查是否有文档正在处理 + const hasProcessing = uploadedDocuments.some(doc => + doc.stage === 'uploading' || doc.stage === 'indexing' + ); + if (hasProcessing) { + message.warning('还有文档正在处理中,请等待完成'); + return; + } + stopAllPolling(); + setStep(1); + setUploadedDocuments([]); + setCurrentDocIndex(0); + setCurrentSettings(DEFAULT_SETTINGS); + }; + + /** + * 返回文档列表 + */ + const handleGoToDocuments = () => { + stopAllPolling(); + const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed'); + if (hasCompleted) { + onSuccess(); + } + onClose(); + }; + + /** + * 获取当前文档 + */ + const getCurrentDocument = (): UploadedDocument | null => { + return uploadedDocuments[currentDocIndex] || null; + }; + + /** + * 获取当前文档的进度 + */ + const getCurrentProgress = () => { + const doc = getCurrentDocument(); + if (!doc) return 0; + if (doc.stage === 'uploading') { + return doc.uploadProgress; + } + if (doc.stage === 'indexing' || doc.stage === 'completed') { + return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0; + } + return 0; + }; + + /** + * 获取当前文档的状态文本 + */ + const getStatusText = () => { + const doc = getCurrentDocument(); + if (!doc) return ''; + if (doc.stage === 'uploading') { + return `正在上传... ${doc.uploadProgress}%`; + } + if (doc.stage === 'indexing') { + return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...'; + } + if (doc.stage === 'completed') { + return '处理完成'; + } + if (doc.stage === 'error') { + return doc.error || '处理失败'; + } + return ''; + }; + + /** + * 判断当前文档是否正在处理 + */ + const isCurrentDocProcessing = () => { + const doc = getCurrentDocument(); + return doc?.stage === 'uploading' || doc?.stage === 'indexing'; + }; + + /** + * 获取所有文档的完成状态统计 + */ + const getCompletionStats = () => { + const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length; + const total = uploadedDocuments.length; + return { completed, total }; + }; + + /** + * 渲染步骤指示器(两步流程) + */ + const renderSteps = () => ( +
+
1 ? 'completed' : ''}`}> + 1 + 选择数据源 +
+
1 ? 'completed' : ''}`}>
+
+ 2 + 文本分段与清洗 +
+
+ ); + + /** + * 渲染第一步:选择文件(支持多文件) + */ + const renderStep1 = () => ( +
+

上传文本文件

+

+ 文档需上传至知识智能理解法治知识库,广东烟草智能理解将按照于知识库,你可以在聊后指数文档所据案中检索它 +

+ +
+ false} + multiple={true} + accept=".txt,.md,.mdx,.pdf,.html,.htm,.xlsx,.xls,.docx,.csv,.vtt,.properties" + showUploadList={false} + > +

+ +

+

拖拽文件或文件夹至此,或者 选择文件

+

+ 已支持 {SUPPORTED_FORMATS},每个文件不超过 15MB。支持批量上传多个文件。 +

+
+
+ + {/* 已选文件列表 */} + {selectedFiles.length > 0 && ( +
+

嵌入已就绪 ({selectedFiles.length} 个文件)

+
+ {fileList.map((file) => ( +
+ +
+ {file.name} + + {file.originFileObj + ? `${file.originFileObj.type?.split('/')[1]?.toUpperCase() || 'FILE'},${(file.originFileObj.size / 1024 / 1024).toFixed(2)}MB` + : ''} + +
+
+ ))} +
+
+ )} + +
+ +
+
+ ); + + /** + * 渲染第二步:分段配置与预览 + * 左侧始终显示配置面板,右侧预览框内显示进度或分段内容 + */ + const renderStep2 = () => { + const currentDoc = getCurrentDocument(); + const isProcessing = isCurrentDocProcessing(); + const stats = getCompletionStats(); + + return ( +
+ {/* 分段配置与预览 */} +
+ {/* 左侧设置区域 */} +
+
+

分段设置

+ + {/* 分段标识符 */} +
+ + updateCurrentSettings('separator', e.target.value)} + placeholder="\n\n" + className="setting-input" + disabled={isProcessing} + /> +
+ + {/* 分段最大长度 */} +
+ +
+ updateCurrentSettings('maxTokens', value || 1024)} + min={100} + max={4000} + className="setting-input-number" + disabled={isProcessing} + /> + characters +
+
+ + {/* 分段重叠长度 */} +
+ +
+ updateCurrentSettings('chunkOverlap', value || 50)} + min={0} + max={500} + className="setting-input-number" + disabled={isProcessing} + /> + characters +
+
+
+ + + + {/* 文本预处理规则 */} +
+

文本预处理规则

+
+ updateCurrentSettings('removeExtraSpaces', e.target.checked)} + disabled={isProcessing} + > + 替换掉连续的空格、换行符和制表符 + + updateCurrentSettings('removeUrlsEmails', e.target.checked)} + disabled={isProcessing} + > + 删除所有 URL 和电子邮件地址 + +
+
+ + + + {/* 索引方式 */} +
+

索引方式

+
+
!isProcessing && updateCurrentSettings('indexingTechnique', 'high_quality')} + > + + 高质量 + 推荐 +
+
!isProcessing && updateCurrentSettings('indexingTechnique', 'economy')} + > + + 经济 +
+
+
+ + {/* 操作按钮 */} +
+ + +
+
+ + {/* 右侧预览区域 */} +
+ + 预览 + {uploadedDocuments.length > 0 && ( + <> +