534 lines
19 KiB
TypeScript
534 lines
19 KiB
TypeScript
import type { UploadFile, UploadProps } from 'antd';
|
|
import { message } from 'antd';
|
|
import { useCallback, useEffect, useRef, useState } from 'react';
|
|
import {
|
|
fetchIndexingStatus,
|
|
updateDocumentByFile,
|
|
uploadDocumentWithConfig,
|
|
} from '~/api/dify-dataset/api/documentApi';
|
|
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
|
|
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
|
|
import type {
|
|
DocumentStage,
|
|
SegmentationSettings,
|
|
UploadedDocument,
|
|
} from '~/types/dify-dataset-manager/document-upload';
|
|
import {
|
|
DEFAULT_SEGMENTATION_SETTINGS,
|
|
INDEXING_STATUS_CONFIG,
|
|
} from '~/types/dify-dataset-manager/document-upload';
|
|
|
|
/**
|
|
* 文档上传状态管理 Hook
|
|
*/
|
|
export function useDocumentUpload(datasetId: string, onClose: () => void, onSuccess: () => void) {
|
|
// 步骤控制
|
|
const [step, setStep] = useState<1 | 2>(1);
|
|
|
|
// 文件相关
|
|
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
|
|
const [fileList, setFileList] = useState<UploadFile[]>([]);
|
|
|
|
// 多文档状态管理
|
|
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
|
|
// 当前选中查看的文档索引
|
|
const [currentDocIndex, setCurrentDocIndex] = useState(0);
|
|
|
|
// 当前显示的分段设置(来自当前选中的文档)
|
|
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SEGMENTATION_SETTINGS);
|
|
|
|
// 预览相关
|
|
const [previewLoading, setPreviewLoading] = useState(false);
|
|
|
|
// 轮询定时器(支持多个文档)
|
|
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
|
// 状态追赶定时器
|
|
const statusCatchUpTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
|
|
|
// 状态顺序
|
|
const STATUS_ORDER: IndexingStatus[] = ['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'];
|
|
|
|
/**
|
|
* 停止指定文档的轮询
|
|
*/
|
|
const stopPolling = useCallback((documentId: string) => {
|
|
const timer = pollingTimersRef.current.get(documentId);
|
|
if (timer) {
|
|
clearInterval(timer);
|
|
pollingTimersRef.current.delete(documentId);
|
|
}
|
|
}, []);
|
|
|
|
/**
|
|
* 停止所有轮询
|
|
*/
|
|
const stopAllPolling = useCallback(() => {
|
|
pollingTimersRef.current.forEach(timer => clearInterval(timer));
|
|
pollingTimersRef.current.clear();
|
|
}, []);
|
|
|
|
/**
|
|
* 加载分段预览
|
|
*/
|
|
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
|
|
setPreviewLoading(true);
|
|
try {
|
|
const response = await fetchSegments(datasetId, documentId, 1, 50);
|
|
const segments = response.data || [];
|
|
// 更新对应文档的分段
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === docIndex ? { ...doc, segments } : doc
|
|
));
|
|
} catch (err: any) {
|
|
console.error('加载分段预览失败:', err);
|
|
message.error('加载分段预览失败');
|
|
} finally {
|
|
setPreviewLoading(false);
|
|
}
|
|
}, [datasetId]);
|
|
|
|
// 清理所有轮询定时器
|
|
useEffect(() => {
|
|
return () => {
|
|
pollingTimersRef.current.forEach(timer => clearInterval(timer));
|
|
pollingTimersRef.current.clear();
|
|
statusCatchUpTimersRef.current.forEach(timer => clearTimeout(timer));
|
|
statusCatchUpTimersRef.current.clear();
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* 状态追赶逻辑
|
|
*/
|
|
useEffect(() => {
|
|
uploadedDocuments.forEach((doc, index) => {
|
|
// 如果没有真实状态,或者已经完成/错误,或者正在追赶中(有定时器),则跳过
|
|
if (!doc.realIndexingStatus || doc.stage === 'error' || statusCatchUpTimersRef.current.has(doc.documentId)) {
|
|
return;
|
|
}
|
|
|
|
const currentIndex = STATUS_ORDER.indexOf(doc.indexingStatus);
|
|
const targetIndex = STATUS_ORDER.indexOf(doc.realIndexingStatus);
|
|
|
|
// 如果当前显示状态落后于真实状态
|
|
if (currentIndex < targetIndex) {
|
|
// 设置定时器,1秒后更新到下一个状态
|
|
const timer = setTimeout(() => {
|
|
setUploadedDocuments(prev => prev.map((d, idx) => {
|
|
if (idx !== index) return d;
|
|
|
|
const nextStatus = STATUS_ORDER[currentIndex + 1];
|
|
const isCompleted = nextStatus === 'completed';
|
|
|
|
// 如果到达完成状态,且真实状态也是完成,则触发完成逻辑
|
|
if (isCompleted && d.realIndexingStatus === 'completed') {
|
|
stopPolling(d.documentId);
|
|
// 自动加载分段预览
|
|
loadSegmentsPreview(d.documentId, index);
|
|
return { ...d, indexingStatus: nextStatus, stage: 'completed' };
|
|
}
|
|
|
|
return { ...d, indexingStatus: nextStatus };
|
|
}));
|
|
|
|
// 清除定时器引用
|
|
statusCatchUpTimersRef.current.delete(doc.documentId);
|
|
}, 1000); // 至少停留1秒
|
|
|
|
statusCatchUpTimersRef.current.set(doc.documentId, timer);
|
|
}
|
|
});
|
|
}, [uploadedDocuments, stopPolling, loadSegmentsPreview]);
|
|
|
|
/**
|
|
* 轮询索引状态
|
|
*/
|
|
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
|
|
try {
|
|
const response = await fetchIndexingStatus(datasetId, batch);
|
|
const documentStatus = response.data?.[0];
|
|
|
|
if (documentStatus) {
|
|
const realStatus = documentStatus.indexing_status as IndexingStatus;
|
|
|
|
// 更新文档状态(只更新真实状态和统计信息,显示状态由 useEffect 控制)
|
|
setUploadedDocuments(prev => prev.map((doc, idx) => {
|
|
if (idx !== docIndex) return doc;
|
|
|
|
// 如果已经是 error 状态,直接更新
|
|
if (realStatus === 'error') {
|
|
stopPolling(documentId);
|
|
return {
|
|
...doc,
|
|
stage: 'error',
|
|
error: documentStatus.error || '处理失败',
|
|
realIndexingStatus: realStatus
|
|
};
|
|
}
|
|
|
|
return {
|
|
...doc,
|
|
realIndexingStatus: realStatus,
|
|
completedSegments: documentStatus.completed_segments,
|
|
totalSegments: documentStatus.total_segments
|
|
};
|
|
}));
|
|
}
|
|
} catch (err) {
|
|
console.error('获取索引状态失败:', err);
|
|
}
|
|
}, [datasetId, stopPolling]);
|
|
|
|
/**
|
|
* 开始轮询
|
|
*/
|
|
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
|
|
// 先停止之前的轮询
|
|
stopPolling(documentId);
|
|
|
|
// 开始新的轮询
|
|
const timer = setInterval(() => {
|
|
pollIndexingStatus(batch, documentId, docIndex);
|
|
}, 2000);
|
|
pollingTimersRef.current.set(documentId, timer);
|
|
|
|
// 立即执行一次
|
|
pollIndexingStatus(batch, documentId, docIndex);
|
|
}, [stopPolling, pollIndexingStatus]);
|
|
|
|
/**
|
|
* 构建上传配置
|
|
*/
|
|
const buildConfig = useCallback((s: SegmentationSettings) => ({
|
|
indexing_technique: s.indexingTechnique,
|
|
process_rule: {
|
|
mode: 'custom' as const,
|
|
rules: {
|
|
pre_processing_rules: [
|
|
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
|
|
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
|
|
],
|
|
segmentation: {
|
|
separator: s.separator.replace(/\\n/g, '\n'),
|
|
max_tokens: s.maxTokens,
|
|
},
|
|
},
|
|
},
|
|
}), []);
|
|
|
|
/**
|
|
* 更新当前文档的设置
|
|
*/
|
|
const updateCurrentSettings = useCallback((key: keyof SegmentationSettings, value: any) => {
|
|
setCurrentSettings(prev => {
|
|
const newSettings = { ...prev, [key]: value };
|
|
// 同步更新到文档列表
|
|
setUploadedDocuments(prevDocs => prevDocs.map((doc, idx) =>
|
|
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
|
|
));
|
|
return newSettings;
|
|
});
|
|
}, [currentDocIndex]);
|
|
|
|
/**
|
|
* 处理文件选择变化
|
|
*/
|
|
const handleFileChange: UploadProps['onChange'] = useCallback(({ fileList: newFileList }: { fileList: UploadFile[] }) => {
|
|
setFileList(newFileList);
|
|
// 提取实际文件对象
|
|
const files = newFileList
|
|
.filter((f: UploadFile) => f.originFileObj)
|
|
.map((f: UploadFile) => f.originFileObj as File);
|
|
setSelectedFiles(files);
|
|
}, []);
|
|
|
|
/**
|
|
* 移除文件
|
|
*/
|
|
const handleRemoveFile = useCallback((file: UploadFile) => {
|
|
setFileList(prev => {
|
|
const newFileList = prev.filter(f => f.uid !== file.uid);
|
|
const files = newFileList
|
|
.filter(f => f.originFileObj)
|
|
.map(f => f.originFileObj as File);
|
|
setSelectedFiles(files);
|
|
return newFileList;
|
|
});
|
|
}, []);
|
|
|
|
/**
|
|
* 上传单个文件
|
|
*/
|
|
const uploadSingleFile = useCallback(async (file: File, index: number): Promise<void> => {
|
|
try {
|
|
// 更新状态为上传中
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
|
|
));
|
|
|
|
const config = buildConfig(DEFAULT_SEGMENTATION_SETTINGS);
|
|
const result = await uploadDocumentWithConfig(
|
|
datasetId,
|
|
file,
|
|
config,
|
|
(percent) => {
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === index ? { ...doc, uploadProgress: percent } : doc
|
|
));
|
|
}
|
|
);
|
|
|
|
// 更新文档信息
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === index ? {
|
|
...doc,
|
|
documentId: result.document.id,
|
|
batch: result.batch,
|
|
stage: 'indexing' as DocumentStage,
|
|
indexingStatus: 'waiting' as IndexingStatus,
|
|
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
|
} : doc
|
|
));
|
|
|
|
// 开始轮询索引状态
|
|
startPolling(result.batch, result.document.id, index);
|
|
} catch (err: any) {
|
|
console.error(`上传文档 ${file.name} 失败:`, err);
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === index ? {
|
|
...doc,
|
|
stage: 'error' as DocumentStage,
|
|
error: err.message || '上传失败',
|
|
} : doc
|
|
));
|
|
}
|
|
}, [datasetId, buildConfig, startPolling]);
|
|
|
|
/**
|
|
* 点击"下一步":立即上传所有文件
|
|
*/
|
|
const handleNextStep = useCallback(async () => {
|
|
if (selectedFiles.length === 0) {
|
|
message.warning('请先选择文件');
|
|
return;
|
|
}
|
|
|
|
// 初始化所有文档状态
|
|
const docs: UploadedDocument[] = selectedFiles.map(file => ({
|
|
file,
|
|
documentId: '',
|
|
batch: '',
|
|
stage: 'pending' as DocumentStage,
|
|
indexingStatus: 'waiting' as IndexingStatus,
|
|
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
|
uploadProgress: 0,
|
|
settings: { ...DEFAULT_SEGMENTATION_SETTINGS },
|
|
segments: [],
|
|
}));
|
|
setUploadedDocuments(docs);
|
|
setCurrentDocIndex(0);
|
|
setCurrentSettings({ ...DEFAULT_SEGMENTATION_SETTINGS });
|
|
setStep(2);
|
|
|
|
// 依次上传所有文件
|
|
for (let i = 0; i < selectedFiles.length; i++) {
|
|
await uploadSingleFile(selectedFiles[i], i);
|
|
}
|
|
}, [selectedFiles, uploadSingleFile]);
|
|
|
|
/**
|
|
* 切换查看的文档
|
|
*/
|
|
const handleDocumentChange = useCallback((docId: string) => {
|
|
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
|
|
if (index !== -1) {
|
|
setCurrentDocIndex(index);
|
|
const doc = uploadedDocuments[index];
|
|
setCurrentSettings(doc.settings);
|
|
}
|
|
}, [uploadedDocuments]);
|
|
|
|
/**
|
|
* 修改参数后重新处理当前文档
|
|
*/
|
|
const handleReprocess = useCallback(async () => {
|
|
const currentDoc = uploadedDocuments[currentDocIndex];
|
|
if (!currentDoc || !currentDoc.documentId) return;
|
|
|
|
// 更新状态
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === currentDocIndex ? {
|
|
...doc,
|
|
stage: 'uploading' as DocumentStage,
|
|
uploadProgress: 0,
|
|
segments: [],
|
|
} : doc
|
|
));
|
|
|
|
try {
|
|
const config = buildConfig(currentSettings);
|
|
const result = await updateDocumentByFile(
|
|
datasetId,
|
|
currentDoc.documentId,
|
|
currentDoc.file,
|
|
config,
|
|
(percent) => {
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
|
|
));
|
|
}
|
|
);
|
|
|
|
// 更新 batch
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === currentDocIndex ? {
|
|
...doc,
|
|
batch: result.batch,
|
|
stage: 'indexing' as DocumentStage,
|
|
indexingStatus: 'waiting' as IndexingStatus,
|
|
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
|
} : doc
|
|
));
|
|
|
|
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
|
|
} catch (err: any) {
|
|
console.error('重新处理失败:', err);
|
|
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
|
idx === currentDocIndex ? {
|
|
...doc,
|
|
stage: 'error' as DocumentStage,
|
|
error: err.message || '重新处理失败',
|
|
} : doc
|
|
));
|
|
message.error(err.message || '重新处理失败');
|
|
}
|
|
}, [uploadedDocuments, currentDocIndex, currentSettings, datasetId, buildConfig, startPolling]);
|
|
|
|
/**
|
|
* 返回上一步
|
|
*/
|
|
const handlePrevStep = useCallback(() => {
|
|
// 检查是否有文档正在处理
|
|
const hasProcessing = uploadedDocuments.some(doc =>
|
|
doc.stage === 'uploading' || doc.stage === 'indexing'
|
|
);
|
|
if (hasProcessing) {
|
|
message.warning('还有文档正在处理中,请等待完成');
|
|
return;
|
|
}
|
|
stopAllPolling();
|
|
setStep(1);
|
|
setUploadedDocuments([]);
|
|
setCurrentDocIndex(0);
|
|
setCurrentSettings(DEFAULT_SEGMENTATION_SETTINGS);
|
|
}, [uploadedDocuments, stopAllPolling]);
|
|
|
|
/**
|
|
* 返回文档列表
|
|
*/
|
|
const handleGoToDocuments = useCallback(() => {
|
|
stopAllPolling();
|
|
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
|
|
if (hasCompleted) {
|
|
onSuccess();
|
|
}
|
|
onClose();
|
|
}, [uploadedDocuments, stopAllPolling, onSuccess, onClose]);
|
|
|
|
/**
|
|
* 获取当前文档
|
|
*/
|
|
const getCurrentDocument = useCallback((): UploadedDocument | null => {
|
|
return uploadedDocuments[currentDocIndex] || null;
|
|
}, [uploadedDocuments, currentDocIndex]);
|
|
|
|
/**
|
|
* 获取当前文档的进度
|
|
*/
|
|
const getCurrentProgress = useCallback(() => {
|
|
const doc = getCurrentDocument();
|
|
if (!doc) return 0;
|
|
if (doc.stage === 'uploading') {
|
|
return doc.uploadProgress;
|
|
}
|
|
if (doc.stage === 'indexing' || doc.stage === 'completed') {
|
|
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
|
|
}
|
|
return 0;
|
|
}, [getCurrentDocument]);
|
|
|
|
/**
|
|
* 获取当前文档的状态文本
|
|
*/
|
|
const getStatusText = useCallback(() => {
|
|
const doc = getCurrentDocument();
|
|
if (!doc) return '';
|
|
if (doc.stage === 'uploading') {
|
|
return `正在上传... ${doc.uploadProgress}%`;
|
|
}
|
|
if (doc.stage === 'indexing') {
|
|
const baseText = INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
|
|
// 如果有分段信息,且处于分段或索引阶段,显示进度
|
|
if ((doc.indexingStatus === 'splitting' || doc.indexingStatus === 'indexing') &&
|
|
doc.totalSegments && doc.totalSegments > 0) {
|
|
return `${baseText} (${doc.completedSegments || 0}/${doc.totalSegments})`;
|
|
}
|
|
return baseText;
|
|
}
|
|
if (doc.stage === 'completed') {
|
|
return `处理完成 (${doc.totalSegments || doc.segments?.length || 0} 段)`;
|
|
}
|
|
if (doc.stage === 'error') {
|
|
return doc.error || '处理失败';
|
|
}
|
|
return '';
|
|
}, [getCurrentDocument]);
|
|
|
|
/**
|
|
* 判断当前文档是否正在处理
|
|
*/
|
|
const isCurrentDocProcessing = useCallback(() => {
|
|
const doc = getCurrentDocument();
|
|
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
|
|
}, [getCurrentDocument]);
|
|
|
|
/**
|
|
* 获取所有文档的完成状态统计
|
|
*/
|
|
const getCompletionStats = useCallback(() => {
|
|
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
|
|
const total = uploadedDocuments.length;
|
|
return { completed, total };
|
|
}, [uploadedDocuments]);
|
|
|
|
return {
|
|
// 状态
|
|
step,
|
|
selectedFiles,
|
|
fileList,
|
|
uploadedDocuments,
|
|
currentDocIndex,
|
|
currentSettings,
|
|
previewLoading,
|
|
|
|
// 方法
|
|
handleFileChange,
|
|
handleRemoveFile,
|
|
handleNextStep,
|
|
handleDocumentChange,
|
|
handleReprocess,
|
|
handlePrevStep,
|
|
handleGoToDocuments,
|
|
updateCurrentSettings,
|
|
|
|
// 计算属性方法
|
|
getCurrentDocument,
|
|
getCurrentProgress,
|
|
getStatusText,
|
|
isCurrentDocProcessing,
|
|
getCompletionStats,
|
|
};
|
|
}
|
|
|
|
export type UseDocumentUploadReturn = ReturnType<typeof useDocumentUpload>;
|