feat: 完善Dify知识库管理召回测试模块,优化知识库上传文件时的分段配置设置
This commit is contained in:
@@ -0,0 +1,533 @@
|
||||
import type { UploadFile, UploadProps } from 'antd';
|
||||
import { message } from 'antd';
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import {
|
||||
fetchIndexingStatus,
|
||||
updateDocumentByFile,
|
||||
uploadDocumentWithConfig,
|
||||
} from '~/api/dify-dataset/api/documentApi';
|
||||
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
|
||||
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
|
||||
import type {
|
||||
DocumentStage,
|
||||
SegmentationSettings,
|
||||
UploadedDocument,
|
||||
} from '~/types/dify-dataset-manager/document-upload';
|
||||
import {
|
||||
DEFAULT_SEGMENTATION_SETTINGS,
|
||||
INDEXING_STATUS_CONFIG,
|
||||
} from '~/types/dify-dataset-manager/document-upload';
|
||||
|
||||
/**
|
||||
* 文档上传状态管理 Hook
|
||||
*/
|
||||
export function useDocumentUpload(datasetId: string, onClose: () => void, onSuccess: () => void) {
|
||||
// 步骤控制
|
||||
const [step, setStep] = useState<1 | 2>(1);
|
||||
|
||||
// 文件相关
|
||||
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
|
||||
const [fileList, setFileList] = useState<UploadFile[]>([]);
|
||||
|
||||
// 多文档状态管理
|
||||
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
|
||||
// 当前选中查看的文档索引
|
||||
const [currentDocIndex, setCurrentDocIndex] = useState(0);
|
||||
|
||||
// 当前显示的分段设置(来自当前选中的文档)
|
||||
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SEGMENTATION_SETTINGS);
|
||||
|
||||
// 预览相关
|
||||
const [previewLoading, setPreviewLoading] = useState(false);
|
||||
|
||||
// 轮询定时器(支持多个文档)
|
||||
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
||||
// 状态追赶定时器
|
||||
const statusCatchUpTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
|
||||
|
||||
// 状态顺序
|
||||
const STATUS_ORDER: IndexingStatus[] = ['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'];
|
||||
|
||||
/**
|
||||
* 停止指定文档的轮询
|
||||
*/
|
||||
const stopPolling = useCallback((documentId: string) => {
|
||||
const timer = pollingTimersRef.current.get(documentId);
|
||||
if (timer) {
|
||||
clearInterval(timer);
|
||||
pollingTimersRef.current.delete(documentId);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* 停止所有轮询
|
||||
*/
|
||||
const stopAllPolling = useCallback(() => {
|
||||
pollingTimersRef.current.forEach(timer => clearInterval(timer));
|
||||
pollingTimersRef.current.clear();
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* 加载分段预览
|
||||
*/
|
||||
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
|
||||
setPreviewLoading(true);
|
||||
try {
|
||||
const response = await fetchSegments(datasetId, documentId, 1, 50);
|
||||
const segments = response.data || [];
|
||||
// 更新对应文档的分段
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === docIndex ? { ...doc, segments } : doc
|
||||
));
|
||||
} catch (err: any) {
|
||||
console.error('加载分段预览失败:', err);
|
||||
message.error('加载分段预览失败');
|
||||
} finally {
|
||||
setPreviewLoading(false);
|
||||
}
|
||||
}, [datasetId]);
|
||||
|
||||
// 清理所有轮询定时器
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
pollingTimersRef.current.forEach(timer => clearInterval(timer));
|
||||
pollingTimersRef.current.clear();
|
||||
statusCatchUpTimersRef.current.forEach(timer => clearTimeout(timer));
|
||||
statusCatchUpTimersRef.current.clear();
|
||||
};
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* 状态追赶逻辑
|
||||
*/
|
||||
useEffect(() => {
|
||||
uploadedDocuments.forEach((doc, index) => {
|
||||
// 如果没有真实状态,或者已经完成/错误,或者正在追赶中(有定时器),则跳过
|
||||
if (!doc.realIndexingStatus || doc.stage === 'error' || statusCatchUpTimersRef.current.has(doc.documentId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const currentIndex = STATUS_ORDER.indexOf(doc.indexingStatus);
|
||||
const targetIndex = STATUS_ORDER.indexOf(doc.realIndexingStatus);
|
||||
|
||||
// 如果当前显示状态落后于真实状态
|
||||
if (currentIndex < targetIndex) {
|
||||
// 设置定时器,1秒后更新到下一个状态
|
||||
const timer = setTimeout(() => {
|
||||
setUploadedDocuments(prev => prev.map((d, idx) => {
|
||||
if (idx !== index) return d;
|
||||
|
||||
const nextStatus = STATUS_ORDER[currentIndex + 1];
|
||||
const isCompleted = nextStatus === 'completed';
|
||||
|
||||
// 如果到达完成状态,且真实状态也是完成,则触发完成逻辑
|
||||
if (isCompleted && d.realIndexingStatus === 'completed') {
|
||||
stopPolling(d.documentId);
|
||||
// 自动加载分段预览
|
||||
loadSegmentsPreview(d.documentId, index);
|
||||
return { ...d, indexingStatus: nextStatus, stage: 'completed' };
|
||||
}
|
||||
|
||||
return { ...d, indexingStatus: nextStatus };
|
||||
}));
|
||||
|
||||
// 清除定时器引用
|
||||
statusCatchUpTimersRef.current.delete(doc.documentId);
|
||||
}, 1000); // 至少停留1秒
|
||||
|
||||
statusCatchUpTimersRef.current.set(doc.documentId, timer);
|
||||
}
|
||||
});
|
||||
}, [uploadedDocuments, stopPolling, loadSegmentsPreview]);
|
||||
|
||||
/**
|
||||
* 轮询索引状态
|
||||
*/
|
||||
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
|
||||
try {
|
||||
const response = await fetchIndexingStatus(datasetId, batch);
|
||||
const documentStatus = response.data?.[0];
|
||||
|
||||
if (documentStatus) {
|
||||
const realStatus = documentStatus.indexing_status as IndexingStatus;
|
||||
|
||||
// 更新文档状态(只更新真实状态和统计信息,显示状态由 useEffect 控制)
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) => {
|
||||
if (idx !== docIndex) return doc;
|
||||
|
||||
// 如果已经是 error 状态,直接更新
|
||||
if (realStatus === 'error') {
|
||||
stopPolling(documentId);
|
||||
return {
|
||||
...doc,
|
||||
stage: 'error',
|
||||
error: documentStatus.error || '处理失败',
|
||||
realIndexingStatus: realStatus
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...doc,
|
||||
realIndexingStatus: realStatus,
|
||||
completedSegments: documentStatus.completed_segments,
|
||||
totalSegments: documentStatus.total_segments
|
||||
};
|
||||
}));
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('获取索引状态失败:', err);
|
||||
}
|
||||
}, [datasetId, stopPolling]);
|
||||
|
||||
/**
|
||||
* 开始轮询
|
||||
*/
|
||||
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
|
||||
// 先停止之前的轮询
|
||||
stopPolling(documentId);
|
||||
|
||||
// 开始新的轮询
|
||||
const timer = setInterval(() => {
|
||||
pollIndexingStatus(batch, documentId, docIndex);
|
||||
}, 2000);
|
||||
pollingTimersRef.current.set(documentId, timer);
|
||||
|
||||
// 立即执行一次
|
||||
pollIndexingStatus(batch, documentId, docIndex);
|
||||
}, [stopPolling, pollIndexingStatus]);
|
||||
|
||||
/**
|
||||
* 构建上传配置
|
||||
*/
|
||||
const buildConfig = useCallback((s: SegmentationSettings) => ({
|
||||
indexing_technique: s.indexingTechnique,
|
||||
process_rule: {
|
||||
mode: 'custom' as const,
|
||||
rules: {
|
||||
pre_processing_rules: [
|
||||
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
|
||||
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
|
||||
],
|
||||
segmentation: {
|
||||
separator: s.separator.replace(/\\n/g, '\n'),
|
||||
max_tokens: s.maxTokens,
|
||||
},
|
||||
},
|
||||
},
|
||||
}), []);
|
||||
|
||||
/**
|
||||
* 更新当前文档的设置
|
||||
*/
|
||||
const updateCurrentSettings = useCallback((key: keyof SegmentationSettings, value: any) => {
|
||||
setCurrentSettings(prev => {
|
||||
const newSettings = { ...prev, [key]: value };
|
||||
// 同步更新到文档列表
|
||||
setUploadedDocuments(prevDocs => prevDocs.map((doc, idx) =>
|
||||
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
|
||||
));
|
||||
return newSettings;
|
||||
});
|
||||
}, [currentDocIndex]);
|
||||
|
||||
/**
|
||||
* 处理文件选择变化
|
||||
*/
|
||||
const handleFileChange: UploadProps['onChange'] = useCallback(({ fileList: newFileList }: { fileList: UploadFile[] }) => {
|
||||
setFileList(newFileList);
|
||||
// 提取实际文件对象
|
||||
const files = newFileList
|
||||
.filter((f: UploadFile) => f.originFileObj)
|
||||
.map((f: UploadFile) => f.originFileObj as File);
|
||||
setSelectedFiles(files);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* 移除文件
|
||||
*/
|
||||
const handleRemoveFile = useCallback((file: UploadFile) => {
|
||||
setFileList(prev => {
|
||||
const newFileList = prev.filter(f => f.uid !== file.uid);
|
||||
const files = newFileList
|
||||
.filter(f => f.originFileObj)
|
||||
.map(f => f.originFileObj as File);
|
||||
setSelectedFiles(files);
|
||||
return newFileList;
|
||||
});
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* 上传单个文件
|
||||
*/
|
||||
const uploadSingleFile = useCallback(async (file: File, index: number): Promise<void> => {
|
||||
try {
|
||||
// 更新状态为上传中
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
|
||||
));
|
||||
|
||||
const config = buildConfig(DEFAULT_SEGMENTATION_SETTINGS);
|
||||
const result = await uploadDocumentWithConfig(
|
||||
datasetId,
|
||||
file,
|
||||
config,
|
||||
(percent) => {
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === index ? { ...doc, uploadProgress: percent } : doc
|
||||
));
|
||||
}
|
||||
);
|
||||
|
||||
// 更新文档信息
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === index ? {
|
||||
...doc,
|
||||
documentId: result.document.id,
|
||||
batch: result.batch,
|
||||
stage: 'indexing' as DocumentStage,
|
||||
indexingStatus: 'waiting' as IndexingStatus,
|
||||
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
||||
} : doc
|
||||
));
|
||||
|
||||
// 开始轮询索引状态
|
||||
startPolling(result.batch, result.document.id, index);
|
||||
} catch (err: any) {
|
||||
console.error(`上传文档 ${file.name} 失败:`, err);
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === index ? {
|
||||
...doc,
|
||||
stage: 'error' as DocumentStage,
|
||||
error: err.message || '上传失败',
|
||||
} : doc
|
||||
));
|
||||
}
|
||||
}, [datasetId, buildConfig, startPolling]);
|
||||
|
||||
/**
|
||||
* 点击"下一步":立即上传所有文件
|
||||
*/
|
||||
const handleNextStep = useCallback(async () => {
|
||||
if (selectedFiles.length === 0) {
|
||||
message.warning('请先选择文件');
|
||||
return;
|
||||
}
|
||||
|
||||
// 初始化所有文档状态
|
||||
const docs: UploadedDocument[] = selectedFiles.map(file => ({
|
||||
file,
|
||||
documentId: '',
|
||||
batch: '',
|
||||
stage: 'pending' as DocumentStage,
|
||||
indexingStatus: 'waiting' as IndexingStatus,
|
||||
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
||||
uploadProgress: 0,
|
||||
settings: { ...DEFAULT_SEGMENTATION_SETTINGS },
|
||||
segments: [],
|
||||
}));
|
||||
setUploadedDocuments(docs);
|
||||
setCurrentDocIndex(0);
|
||||
setCurrentSettings({ ...DEFAULT_SEGMENTATION_SETTINGS });
|
||||
setStep(2);
|
||||
|
||||
// 依次上传所有文件
|
||||
for (let i = 0; i < selectedFiles.length; i++) {
|
||||
await uploadSingleFile(selectedFiles[i], i);
|
||||
}
|
||||
}, [selectedFiles, uploadSingleFile]);
|
||||
|
||||
/**
|
||||
* 切换查看的文档
|
||||
*/
|
||||
const handleDocumentChange = useCallback((docId: string) => {
|
||||
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
|
||||
if (index !== -1) {
|
||||
setCurrentDocIndex(index);
|
||||
const doc = uploadedDocuments[index];
|
||||
setCurrentSettings(doc.settings);
|
||||
}
|
||||
}, [uploadedDocuments]);
|
||||
|
||||
/**
|
||||
* 修改参数后重新处理当前文档
|
||||
*/
|
||||
const handleReprocess = useCallback(async () => {
|
||||
const currentDoc = uploadedDocuments[currentDocIndex];
|
||||
if (!currentDoc || !currentDoc.documentId) return;
|
||||
|
||||
// 更新状态
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === currentDocIndex ? {
|
||||
...doc,
|
||||
stage: 'uploading' as DocumentStage,
|
||||
uploadProgress: 0,
|
||||
segments: [],
|
||||
} : doc
|
||||
));
|
||||
|
||||
try {
|
||||
const config = buildConfig(currentSettings);
|
||||
const result = await updateDocumentByFile(
|
||||
datasetId,
|
||||
currentDoc.documentId,
|
||||
currentDoc.file,
|
||||
config,
|
||||
(percent) => {
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
|
||||
));
|
||||
}
|
||||
);
|
||||
|
||||
// 更新 batch
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === currentDocIndex ? {
|
||||
...doc,
|
||||
batch: result.batch,
|
||||
stage: 'indexing' as DocumentStage,
|
||||
indexingStatus: 'waiting' as IndexingStatus,
|
||||
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
|
||||
} : doc
|
||||
));
|
||||
|
||||
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
|
||||
} catch (err: any) {
|
||||
console.error('重新处理失败:', err);
|
||||
setUploadedDocuments(prev => prev.map((doc, idx) =>
|
||||
idx === currentDocIndex ? {
|
||||
...doc,
|
||||
stage: 'error' as DocumentStage,
|
||||
error: err.message || '重新处理失败',
|
||||
} : doc
|
||||
));
|
||||
message.error(err.message || '重新处理失败');
|
||||
}
|
||||
}, [uploadedDocuments, currentDocIndex, currentSettings, datasetId, buildConfig, startPolling]);
|
||||
|
||||
/**
|
||||
* 返回上一步
|
||||
*/
|
||||
const handlePrevStep = useCallback(() => {
|
||||
// 检查是否有文档正在处理
|
||||
const hasProcessing = uploadedDocuments.some(doc =>
|
||||
doc.stage === 'uploading' || doc.stage === 'indexing'
|
||||
);
|
||||
if (hasProcessing) {
|
||||
message.warning('还有文档正在处理中,请等待完成');
|
||||
return;
|
||||
}
|
||||
stopAllPolling();
|
||||
setStep(1);
|
||||
setUploadedDocuments([]);
|
||||
setCurrentDocIndex(0);
|
||||
setCurrentSettings(DEFAULT_SEGMENTATION_SETTINGS);
|
||||
}, [uploadedDocuments, stopAllPolling]);
|
||||
|
||||
/**
|
||||
* 返回文档列表
|
||||
*/
|
||||
const handleGoToDocuments = useCallback(() => {
|
||||
stopAllPolling();
|
||||
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
|
||||
if (hasCompleted) {
|
||||
onSuccess();
|
||||
}
|
||||
onClose();
|
||||
}, [uploadedDocuments, stopAllPolling, onSuccess, onClose]);
|
||||
|
||||
/**
|
||||
* 获取当前文档
|
||||
*/
|
||||
const getCurrentDocument = useCallback((): UploadedDocument | null => {
|
||||
return uploadedDocuments[currentDocIndex] || null;
|
||||
}, [uploadedDocuments, currentDocIndex]);
|
||||
|
||||
/**
|
||||
* 获取当前文档的进度
|
||||
*/
|
||||
const getCurrentProgress = useCallback(() => {
|
||||
const doc = getCurrentDocument();
|
||||
if (!doc) return 0;
|
||||
if (doc.stage === 'uploading') {
|
||||
return doc.uploadProgress;
|
||||
}
|
||||
if (doc.stage === 'indexing' || doc.stage === 'completed') {
|
||||
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
|
||||
}
|
||||
return 0;
|
||||
}, [getCurrentDocument]);
|
||||
|
||||
/**
|
||||
* 获取当前文档的状态文本
|
||||
*/
|
||||
const getStatusText = useCallback(() => {
|
||||
const doc = getCurrentDocument();
|
||||
if (!doc) return '';
|
||||
if (doc.stage === 'uploading') {
|
||||
return `正在上传... ${doc.uploadProgress}%`;
|
||||
}
|
||||
if (doc.stage === 'indexing') {
|
||||
const baseText = INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
|
||||
// 如果有分段信息,且处于分段或索引阶段,显示进度
|
||||
if ((doc.indexingStatus === 'splitting' || doc.indexingStatus === 'indexing') &&
|
||||
doc.totalSegments && doc.totalSegments > 0) {
|
||||
return `${baseText} (${doc.completedSegments || 0}/${doc.totalSegments})`;
|
||||
}
|
||||
return baseText;
|
||||
}
|
||||
if (doc.stage === 'completed') {
|
||||
return `处理完成 (${doc.totalSegments || doc.segments?.length || 0} 段)`;
|
||||
}
|
||||
if (doc.stage === 'error') {
|
||||
return doc.error || '处理失败';
|
||||
}
|
||||
return '';
|
||||
}, [getCurrentDocument]);
|
||||
|
||||
/**
|
||||
* 判断当前文档是否正在处理
|
||||
*/
|
||||
const isCurrentDocProcessing = useCallback(() => {
|
||||
const doc = getCurrentDocument();
|
||||
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
|
||||
}, [getCurrentDocument]);
|
||||
|
||||
/**
|
||||
* 获取所有文档的完成状态统计
|
||||
*/
|
||||
const getCompletionStats = useCallback(() => {
|
||||
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
|
||||
const total = uploadedDocuments.length;
|
||||
return { completed, total };
|
||||
}, [uploadedDocuments]);
|
||||
|
||||
return {
|
||||
// 状态
|
||||
step,
|
||||
selectedFiles,
|
||||
fileList,
|
||||
uploadedDocuments,
|
||||
currentDocIndex,
|
||||
currentSettings,
|
||||
previewLoading,
|
||||
|
||||
// 方法
|
||||
handleFileChange,
|
||||
handleRemoveFile,
|
||||
handleNextStep,
|
||||
handleDocumentChange,
|
||||
handleReprocess,
|
||||
handlePrevStep,
|
||||
handleGoToDocuments,
|
||||
updateCurrentSettings,
|
||||
|
||||
// 计算属性方法
|
||||
getCurrentDocument,
|
||||
getCurrentProgress,
|
||||
getStatusText,
|
||||
isCurrentDocProcessing,
|
||||
getCompletionStats,
|
||||
};
|
||||
}
|
||||
|
||||
export type UseDocumentUploadReturn = ReturnType<typeof useDocumentUpload>;
|
||||
Reference in New Issue
Block a user