import { useState, useEffect, useRef, useCallback } from 'react'; import { Input, Button, InputNumber, Checkbox, Select, Card, Empty, Spin, message, Divider, Tooltip, Progress, Upload, } from 'antd'; import type { UploadFile, UploadProps } from 'antd'; import { QuestionCircleOutlined, FileTextOutlined, CheckCircleOutlined, LoadingOutlined, ExclamationCircleOutlined, InboxOutlined, DeleteOutlined, ArrowLeftOutlined, } from '@ant-design/icons'; import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; import type { Segment } from '~/api/dify-dataset/type'; import { uploadDocumentWithConfig, updateDocumentByFile, fetchIndexingStatus, } from '~/api/dify-dataset/api/documentApi'; import { fetchSegments } from '~/api/dify-dataset/api/segmentApi'; const { Dragger } = Upload; interface DocumentUploadProps { datasetId: string; onClose: () => void; onSuccess: () => void; } /** * 分段设置配置 */ interface SegmentationSettings { separator: string; maxTokens: number; chunkOverlap: number; removeExtraSpaces: boolean; removeUrlsEmails: boolean; indexingTechnique: 'high_quality' | 'economy'; } /** * 默认分段设置 */ const DEFAULT_SETTINGS: SegmentationSettings = { separator: '\\n\\n', maxTokens: 1024, chunkOverlap: 50, removeExtraSpaces: true, removeUrlsEmails: false, indexingTechnique: 'high_quality', }; /** * 单个文档的上传状态 */ type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error'; /** * 上传的文档信息(支持多文件) */ interface UploadedDocument { file: File; documentId: string; batch: string; stage: DocumentStage; indexingStatus: IndexingStatus; uploadProgress: number; error?: string; settings: SegmentationSettings; segments: Segment[]; } /** * 索引状态配置 */ const INDEXING_STATUS_CONFIG: Record = { waiting: { text: '等待处理...', percent: 10 }, parsing: { text: '解析文档...', percent: 30 }, cleaning: { text: '清洗文本...', percent: 50 }, splitting: { text: '分段处理...', percent: 70 }, indexing: { text: '建立索引...', percent: 85 }, completed: { text: '处理完成', percent: 100 }, paused: { text: '已暂停', percent: 0 }, error: { text: '处理失败', percent: 0 }, }; /** * 支持的文件格式 */ const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM'; /** * 文档上传组件 * 支持多文件上传,两步流程:选择文件 → 上传并配置分段 */ export default function DocumentUpload({ datasetId, onClose, onSuccess, }: DocumentUploadProps) { // 步骤控制 const [step, setStep] = useState<1 | 2>(1); // 文件相关 const [selectedFiles, setSelectedFiles] = useState([]); const [fileList, setFileList] = useState([]); // 多文档状态管理 const [uploadedDocuments, setUploadedDocuments] = useState([]); // 当前选中查看的文档索引 const [currentDocIndex, setCurrentDocIndex] = useState(0); // 当前显示的分段设置(来自当前选中的文档) const [currentSettings, setCurrentSettings] = useState(DEFAULT_SETTINGS); // 预览相关 const [previewLoading, setPreviewLoading] = useState(false); // 轮询定时器(支持多个文档) const pollingTimersRef = useRef>(new Map()); // 清理所有轮询定时器 useEffect(() => { return () => { pollingTimersRef.current.forEach(timer => clearInterval(timer)); pollingTimersRef.current.clear(); }; }, []); /** * 停止指定文档的轮询 */ const stopPolling = useCallback((documentId: string) => { const timer = pollingTimersRef.current.get(documentId); if (timer) { clearInterval(timer); pollingTimersRef.current.delete(documentId); } }, []); /** * 停止所有轮询 */ const stopAllPolling = useCallback(() => { pollingTimersRef.current.forEach(timer => clearInterval(timer)); pollingTimersRef.current.clear(); }, []); /** * 加载分段预览 */ const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => { setPreviewLoading(true); try { const response = await fetchSegments(datasetId, documentId, 1, 50); const segments = response.data || []; // 更新对应文档的分段 setUploadedDocuments(prev => prev.map((doc, idx) => idx === docIndex ? { ...doc, segments } : doc )); } catch (err: any) { console.error('加载分段预览失败:', err); message.error('加载分段预览失败'); } finally { setPreviewLoading(false); } }, [datasetId]); /** * 轮询索引状态 */ const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => { try { const response = await fetchIndexingStatus(datasetId, batch); const documentStatus = response.data?.[0]; if (documentStatus) { const status = documentStatus.indexing_status as IndexingStatus; // 更新文档状态 setUploadedDocuments(prev => prev.map((doc, idx) => { if (idx !== docIndex) return doc; return { ...doc, indexingStatus: status }; })); if (status === 'completed') { stopPolling(documentId); setUploadedDocuments(prev => prev.map((doc, idx) => idx === docIndex ? { ...doc, stage: 'completed' } : doc )); // message.success(`文档 "${uploadedDocuments[docIndex]?.file.name}" 处理完成!`); // 自动加载分段预览 loadSegmentsPreview(documentId, docIndex); } else if (status === 'error') { stopPolling(documentId); setUploadedDocuments(prev => prev.map((doc, idx) => idx === docIndex ? { ...doc, stage: 'error', error: documentStatus.error || '处理失败' } : doc )); } } } catch (err) { console.error('获取索引状态失败:', err); } }, [datasetId, stopPolling, loadSegmentsPreview, uploadedDocuments]); /** * 开始轮询 */ const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => { // 先停止之前的轮询 stopPolling(documentId); // 开始新的轮询 const timer = setInterval(() => { pollIndexingStatus(batch, documentId, docIndex); }, 2000); pollingTimersRef.current.set(documentId, timer); // 立即执行一次 pollIndexingStatus(batch, documentId, docIndex); }, [stopPolling, pollIndexingStatus]); /** * 构建上传配置 */ const buildConfig = (s: SegmentationSettings) => ({ indexing_technique: s.indexingTechnique, process_rule: { mode: 'custom' as const, rules: { pre_processing_rules: [ { id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces }, { id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails }, ], segmentation: { separator: s.separator.replace(/\\n/g, '\n'), max_tokens: s.maxTokens, }, }, }, }); /** * 更新当前文档的设置 */ const updateCurrentSettings = (key: keyof SegmentationSettings, value: any) => { const newSettings = { ...currentSettings, [key]: value }; setCurrentSettings(newSettings); // 同步更新到文档列表 setUploadedDocuments(prev => prev.map((doc, idx) => idx === currentDocIndex ? { ...doc, settings: newSettings } : doc )); }; /** * 处理文件选择变化 */ const handleFileChange: UploadProps['onChange'] = ({ fileList: newFileList }) => { setFileList(newFileList); // 提取实际文件对象 const files = newFileList .filter(f => f.originFileObj) .map(f => f.originFileObj as File); setSelectedFiles(files); }; /** * 移除文件 */ const handleRemoveFile = (file: UploadFile) => { const newFileList = fileList.filter(f => f.uid !== file.uid); setFileList(newFileList); const files = newFileList .filter(f => f.originFileObj) .map(f => f.originFileObj as File); setSelectedFiles(files); }; /** * 上传单个文件 */ const uploadSingleFile = async (file: File, index: number): Promise => { try { // 更新状态为上传中 setUploadedDocuments(prev => prev.map((doc, idx) => idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc )); const config = buildConfig(DEFAULT_SETTINGS); const result = await uploadDocumentWithConfig( datasetId, file, config, (percent) => { setUploadedDocuments(prev => prev.map((doc, idx) => idx === index ? { ...doc, uploadProgress: percent } : doc )); } ); // 更新文档信息 setUploadedDocuments(prev => prev.map((doc, idx) => idx === index ? { ...doc, documentId: result.document.id, batch: result.batch, stage: 'indexing' as DocumentStage, indexingStatus: 'waiting' as IndexingStatus, } : doc )); // 开始轮询索引状态 startPolling(result.batch, result.document.id, index); } catch (err: any) { console.error(`上传文档 ${file.name} 失败:`, err); setUploadedDocuments(prev => prev.map((doc, idx) => idx === index ? { ...doc, stage: 'error' as DocumentStage, error: err.message || '上传失败', } : doc )); } }; /** * 点击"下一步":立即上传所有文件 */ const handleNextStep = async () => { if (selectedFiles.length === 0) { message.warning('请先选择文件'); return; } // 初始化所有文档状态 const docs: UploadedDocument[] = selectedFiles.map(file => ({ file, documentId: '', batch: '', stage: 'pending' as DocumentStage, indexingStatus: 'waiting' as IndexingStatus, uploadProgress: 0, settings: { ...DEFAULT_SETTINGS }, segments: [], })); setUploadedDocuments(docs); setCurrentDocIndex(0); setCurrentSettings({ ...DEFAULT_SETTINGS }); setStep(2); // 依次上传所有文件 for (let i = 0; i < selectedFiles.length; i++) { await uploadSingleFile(selectedFiles[i], i); } }; /** * 切换查看的文档 */ const handleDocumentChange = (docId: string) => { const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId); if (index !== -1) { setCurrentDocIndex(index); const doc = uploadedDocuments[index]; setCurrentSettings(doc.settings); } }; /** * 修改参数后重新处理当前文档 */ const handleReprocess = async () => { const currentDoc = uploadedDocuments[currentDocIndex]; if (!currentDoc || !currentDoc.documentId) return; // 更新状态 setUploadedDocuments(prev => prev.map((doc, idx) => idx === currentDocIndex ? { ...doc, stage: 'uploading' as DocumentStage, uploadProgress: 0, segments: [], } : doc )); try { const config = buildConfig(currentSettings); const result = await updateDocumentByFile( datasetId, currentDoc.documentId, currentDoc.file, config, (percent) => { setUploadedDocuments(prev => prev.map((doc, idx) => idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc )); } ); // 更新 batch setUploadedDocuments(prev => prev.map((doc, idx) => idx === currentDocIndex ? { ...doc, batch: result.batch, stage: 'indexing' as DocumentStage, indexingStatus: 'waiting' as IndexingStatus, } : doc )); startPolling(result.batch, currentDoc.documentId, currentDocIndex); } catch (err: any) { console.error('重新处理失败:', err); setUploadedDocuments(prev => prev.map((doc, idx) => idx === currentDocIndex ? { ...doc, stage: 'error' as DocumentStage, error: err.message || '重新处理失败', } : doc )); message.error(err.message || '重新处理失败'); } }; /** * 返回上一步 */ const handlePrevStep = () => { // 检查是否有文档正在处理 const hasProcessing = uploadedDocuments.some(doc => doc.stage === 'uploading' || doc.stage === 'indexing' ); if (hasProcessing) { message.warning('还有文档正在处理中,请等待完成'); return; } stopAllPolling(); setStep(1); setUploadedDocuments([]); setCurrentDocIndex(0); setCurrentSettings(DEFAULT_SETTINGS); }; /** * 返回文档列表 */ const handleGoToDocuments = () => { stopAllPolling(); const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed'); if (hasCompleted) { onSuccess(); } onClose(); }; /** * 获取当前文档 */ const getCurrentDocument = (): UploadedDocument | null => { return uploadedDocuments[currentDocIndex] || null; }; /** * 获取当前文档的进度 */ const getCurrentProgress = () => { const doc = getCurrentDocument(); if (!doc) return 0; if (doc.stage === 'uploading') { return doc.uploadProgress; } if (doc.stage === 'indexing' || doc.stage === 'completed') { return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0; } return 0; }; /** * 获取当前文档的状态文本 */ const getStatusText = () => { const doc = getCurrentDocument(); if (!doc) return ''; if (doc.stage === 'uploading') { return `正在上传... ${doc.uploadProgress}%`; } if (doc.stage === 'indexing') { return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...'; } if (doc.stage === 'completed') { return '处理完成'; } if (doc.stage === 'error') { return doc.error || '处理失败'; } return ''; }; /** * 判断当前文档是否正在处理 */ const isCurrentDocProcessing = () => { const doc = getCurrentDocument(); return doc?.stage === 'uploading' || doc?.stage === 'indexing'; }; /** * 获取所有文档的完成状态统计 */ const getCompletionStats = () => { const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length; const total = uploadedDocuments.length; return { completed, total }; }; /** * 渲染步骤指示器(两步流程) */ const renderSteps = () => (
1 ? 'completed' : ''}`}> 1 选择数据源
1 ? 'completed' : ''}`}>
2 文本分段与清洗
); /** * 渲染第一步:选择文件(支持多文件) */ const renderStep1 = () => (

上传文本文件

文档需上传至知识智能理解法治知识库,广东烟草智能理解将按照于知识库,你可以在聊后指数文档所据案中检索它

false} multiple={true} accept=".txt,.md,.mdx,.pdf,.html,.htm,.xlsx,.xls,.docx,.csv,.vtt,.properties" showUploadList={false} >

拖拽文件或文件夹至此,或者 选择文件

已支持 {SUPPORTED_FORMATS},每个文件不超过 15MB。支持批量上传多个文件。

{/* 已选文件列表 */} {selectedFiles.length > 0 && (

嵌入已就绪 ({selectedFiles.length} 个文件)

{fileList.map((file) => (
{file.name} {file.originFileObj ? `${file.originFileObj.type?.split('/')[1]?.toUpperCase() || 'FILE'},${(file.originFileObj.size / 1024 / 1024).toFixed(2)}MB` : ''}
))}
)}
); /** * 渲染第二步:分段配置与预览 * 左侧始终显示配置面板,右侧预览框内显示进度或分段内容 */ const renderStep2 = () => { const currentDoc = getCurrentDocument(); const isProcessing = isCurrentDocProcessing(); const stats = getCompletionStats(); return (
{/* 分段配置与预览 */}
{/* 左侧设置区域 */}

分段设置

{/* 分段标识符 */}
updateCurrentSettings('separator', e.target.value)} placeholder="\n\n" className="setting-input" disabled={isProcessing} />
{/* 分段最大长度 */}
updateCurrentSettings('maxTokens', value || 1024)} min={100} max={4000} className="setting-input-number" disabled={isProcessing} /> characters
{/* 分段重叠长度 */}
updateCurrentSettings('chunkOverlap', value || 50)} min={0} max={500} className="setting-input-number" disabled={isProcessing} /> characters
{/* 文本预处理规则 */}

文本预处理规则

updateCurrentSettings('removeExtraSpaces', e.target.checked)} disabled={isProcessing} > 替换掉连续的空格、换行符和制表符 updateCurrentSettings('removeUrlsEmails', e.target.checked)} disabled={isProcessing} > 删除所有 URL 和电子邮件地址
{/* 索引方式 */}

索引方式

!isProcessing && updateCurrentSettings('indexingTechnique', 'high_quality')} > 高质量 推荐
!isProcessing && updateCurrentSettings('indexingTechnique', 'economy')} > 经济
{/* 操作按钮 */}
{/* 右侧预览区域 */}
预览 {uploadedDocuments.length > 0 && ( <>