Files
leaudit-platform-frontend/app/hooks/dify-dataset-manager/document-upload.ts
T

534 lines
19 KiB
TypeScript

import type { UploadFile, UploadProps } from 'antd';
import { message } from 'antd';
import { useCallback, useEffect, useRef, useState } from 'react';
import {
fetchIndexingStatus,
updateDocumentByFile,
uploadDocumentWithConfig,
} from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import type {
DocumentStage,
SegmentationSettings,
UploadedDocument,
} from '~/types/dify-dataset-manager/document-upload';
import {
DEFAULT_SEGMENTATION_SETTINGS,
INDEXING_STATUS_CONFIG,
} from '~/types/dify-dataset-manager/document-upload';
/**
* 文档上传状态管理 Hook
*/
export function useDocumentUpload(datasetId: string, onClose: () => void, onSuccess: () => void) {
// 步骤控制
const [step, setStep] = useState<1 | 2>(1);
// 文件相关
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [fileList, setFileList] = useState<UploadFile[]>([]);
// 多文档状态管理
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
// 当前选中查看的文档索引
const [currentDocIndex, setCurrentDocIndex] = useState(0);
// 当前显示的分段设置(来自当前选中的文档)
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SEGMENTATION_SETTINGS);
// 预览相关
const [previewLoading, setPreviewLoading] = useState(false);
// 轮询定时器(支持多个文档)
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 状态追赶定时器
const statusCatchUpTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 状态顺序
const STATUS_ORDER: IndexingStatus[] = ['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'];
/**
* 停止指定文档的轮询
*/
const stopPolling = useCallback((documentId: string) => {
const timer = pollingTimersRef.current.get(documentId);
if (timer) {
clearInterval(timer);
pollingTimersRef.current.delete(documentId);
}
}, []);
/**
* 停止所有轮询
*/
const stopAllPolling = useCallback(() => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
}, []);
/**
* 加载分段预览
*/
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
setPreviewLoading(true);
try {
const response = await fetchSegments(datasetId, documentId, 1, 50);
const segments = response.data || [];
// 更新对应文档的分段
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, segments } : doc
));
} catch (err: any) {
console.error('加载分段预览失败:', err);
message.error('加载分段预览失败');
} finally {
setPreviewLoading(false);
}
}, [datasetId]);
// 清理所有轮询定时器
useEffect(() => {
return () => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
statusCatchUpTimersRef.current.forEach(timer => clearTimeout(timer));
statusCatchUpTimersRef.current.clear();
};
}, []);
/**
* 状态追赶逻辑
*/
useEffect(() => {
uploadedDocuments.forEach((doc, index) => {
// 如果没有真实状态,或者已经完成/错误,或者正在追赶中(有定时器),则跳过
if (!doc.realIndexingStatus || doc.stage === 'error' || statusCatchUpTimersRef.current.has(doc.documentId)) {
return;
}
const currentIndex = STATUS_ORDER.indexOf(doc.indexingStatus);
const targetIndex = STATUS_ORDER.indexOf(doc.realIndexingStatus);
// 如果当前显示状态落后于真实状态
if (currentIndex < targetIndex) {
// 设置定时器,1秒后更新到下一个状态
const timer = setTimeout(() => {
setUploadedDocuments(prev => prev.map((d, idx) => {
if (idx !== index) return d;
const nextStatus = STATUS_ORDER[currentIndex + 1];
const isCompleted = nextStatus === 'completed';
// 如果到达完成状态,且真实状态也是完成,则触发完成逻辑
if (isCompleted && d.realIndexingStatus === 'completed') {
stopPolling(d.documentId);
// 自动加载分段预览
loadSegmentsPreview(d.documentId, index);
return { ...d, indexingStatus: nextStatus, stage: 'completed' };
}
return { ...d, indexingStatus: nextStatus };
}));
// 清除定时器引用
statusCatchUpTimersRef.current.delete(doc.documentId);
}, 1000); // 至少停留1秒
statusCatchUpTimersRef.current.set(doc.documentId, timer);
}
});
}, [uploadedDocuments, stopPolling, loadSegmentsPreview]);
/**
* 轮询索引状态
*/
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
try {
const response = await fetchIndexingStatus(datasetId, batch);
const documentStatus = response.data?.[0];
if (documentStatus) {
const realStatus = documentStatus.indexing_status as IndexingStatus;
// 更新文档状态(只更新真实状态和统计信息,显示状态由 useEffect 控制)
setUploadedDocuments(prev => prev.map((doc, idx) => {
if (idx !== docIndex) return doc;
// 如果已经是 error 状态,直接更新
if (realStatus === 'error') {
stopPolling(documentId);
return {
...doc,
stage: 'error',
error: documentStatus.error || '处理失败',
realIndexingStatus: realStatus
};
}
return {
...doc,
realIndexingStatus: realStatus,
completedSegments: documentStatus.completed_segments,
totalSegments: documentStatus.total_segments
};
}));
}
} catch (err) {
console.error('获取索引状态失败:', err);
}
}, [datasetId, stopPolling]);
/**
* 开始轮询
*/
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
// 先停止之前的轮询
stopPolling(documentId);
// 开始新的轮询
const timer = setInterval(() => {
pollIndexingStatus(batch, documentId, docIndex);
}, 2000);
pollingTimersRef.current.set(documentId, timer);
// 立即执行一次
pollIndexingStatus(batch, documentId, docIndex);
}, [stopPolling, pollIndexingStatus]);
/**
* 构建上传配置
*/
const buildConfig = useCallback((s: SegmentationSettings) => ({
indexing_technique: s.indexingTechnique,
process_rule: {
mode: 'custom' as const,
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
],
segmentation: {
separator: s.separator.replace(/\\n/g, '\n'),
max_tokens: s.maxTokens,
},
},
},
}), []);
/**
* 更新当前文档的设置
*/
const updateCurrentSettings = useCallback((key: keyof SegmentationSettings, value: any) => {
setCurrentSettings(prev => {
const newSettings = { ...prev, [key]: value };
// 同步更新到文档列表
setUploadedDocuments(prevDocs => prevDocs.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
));
return newSettings;
});
}, [currentDocIndex]);
/**
* 处理文件选择变化
*/
const handleFileChange: UploadProps['onChange'] = useCallback(({ fileList: newFileList }: { fileList: UploadFile[] }) => {
setFileList(newFileList);
// 提取实际文件对象
const files = newFileList
.filter((f: UploadFile) => f.originFileObj)
.map((f: UploadFile) => f.originFileObj as File);
setSelectedFiles(files);
}, []);
/**
* 移除文件
*/
const handleRemoveFile = useCallback((file: UploadFile) => {
setFileList(prev => {
const newFileList = prev.filter(f => f.uid !== file.uid);
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
return newFileList;
});
}, []);
/**
* 上传单个文件
*/
const uploadSingleFile = useCallback(async (file: File, index: number): Promise<void> => {
try {
// 更新状态为上传中
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
));
const config = buildConfig(DEFAULT_SEGMENTATION_SETTINGS);
const result = await uploadDocumentWithConfig(
datasetId,
file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新文档信息
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
documentId: result.document.id,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
} : doc
));
// 开始轮询索引状态
startPolling(result.batch, result.document.id, index);
} catch (err: any) {
console.error(`上传文档 ${file.name} 失败:`, err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '上传失败',
} : doc
));
}
}, [datasetId, buildConfig, startPolling]);
/**
* 点击"下一步":立即上传所有文件
*/
const handleNextStep = useCallback(async () => {
if (selectedFiles.length === 0) {
message.warning('请先选择文件');
return;
}
// 初始化所有文档状态
const docs: UploadedDocument[] = selectedFiles.map(file => ({
file,
documentId: '',
batch: '',
stage: 'pending' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
uploadProgress: 0,
settings: { ...DEFAULT_SEGMENTATION_SETTINGS },
segments: [],
}));
setUploadedDocuments(docs);
setCurrentDocIndex(0);
setCurrentSettings({ ...DEFAULT_SEGMENTATION_SETTINGS });
setStep(2);
// 依次上传所有文件
for (let i = 0; i < selectedFiles.length; i++) {
await uploadSingleFile(selectedFiles[i], i);
}
}, [selectedFiles, uploadSingleFile]);
/**
* 切换查看的文档
*/
const handleDocumentChange = useCallback((docId: string) => {
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
if (index !== -1) {
setCurrentDocIndex(index);
const doc = uploadedDocuments[index];
setCurrentSettings(doc.settings);
}
}, [uploadedDocuments]);
/**
* 修改参数后重新处理当前文档
*/
const handleReprocess = useCallback(async () => {
const currentDoc = uploadedDocuments[currentDocIndex];
if (!currentDoc || !currentDoc.documentId) return;
// 更新状态
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'uploading' as DocumentStage,
uploadProgress: 0,
segments: [],
} : doc
));
try {
const config = buildConfig(currentSettings);
const result = await updateDocumentByFile(
datasetId,
currentDoc.documentId,
currentDoc.file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新 batch
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
} : doc
));
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
} catch (err: any) {
console.error('重新处理失败:', err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '重新处理失败',
} : doc
));
message.error(err.message || '重新处理失败');
}
}, [uploadedDocuments, currentDocIndex, currentSettings, datasetId, buildConfig, startPolling]);
/**
* 返回上一步
*/
const handlePrevStep = useCallback(() => {
// 检查是否有文档正在处理
const hasProcessing = uploadedDocuments.some(doc =>
doc.stage === 'uploading' || doc.stage === 'indexing'
);
if (hasProcessing) {
message.warning('还有文档正在处理中,请等待完成');
return;
}
stopAllPolling();
setStep(1);
setUploadedDocuments([]);
setCurrentDocIndex(0);
setCurrentSettings(DEFAULT_SEGMENTATION_SETTINGS);
}, [uploadedDocuments, stopAllPolling]);
/**
* 返回文档列表
*/
const handleGoToDocuments = useCallback(() => {
stopAllPolling();
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
if (hasCompleted) {
onSuccess();
}
onClose();
}, [uploadedDocuments, stopAllPolling, onSuccess, onClose]);
/**
* 获取当前文档
*/
const getCurrentDocument = useCallback((): UploadedDocument | null => {
return uploadedDocuments[currentDocIndex] || null;
}, [uploadedDocuments, currentDocIndex]);
/**
* 获取当前文档的进度
*/
const getCurrentProgress = useCallback(() => {
const doc = getCurrentDocument();
if (!doc) return 0;
if (doc.stage === 'uploading') {
return doc.uploadProgress;
}
if (doc.stage === 'indexing' || doc.stage === 'completed') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
}
return 0;
}, [getCurrentDocument]);
/**
* 获取当前文档的状态文本
*/
const getStatusText = useCallback(() => {
const doc = getCurrentDocument();
if (!doc) return '';
if (doc.stage === 'uploading') {
return `正在上传... ${doc.uploadProgress}%`;
}
if (doc.stage === 'indexing') {
const baseText = INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
// 如果有分段信息,且处于分段或索引阶段,显示进度
if ((doc.indexingStatus === 'splitting' || doc.indexingStatus === 'indexing') &&
doc.totalSegments && doc.totalSegments > 0) {
return `${baseText} (${doc.completedSegments || 0}/${doc.totalSegments})`;
}
return baseText;
}
if (doc.stage === 'completed') {
return `处理完成 (${doc.totalSegments || doc.segments?.length || 0} 段)`;
}
if (doc.stage === 'error') {
return doc.error || '处理失败';
}
return '';
}, [getCurrentDocument]);
/**
* 判断当前文档是否正在处理
*/
const isCurrentDocProcessing = useCallback(() => {
const doc = getCurrentDocument();
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
}, [getCurrentDocument]);
/**
* 获取所有文档的完成状态统计
*/
const getCompletionStats = useCallback(() => {
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
const total = uploadedDocuments.length;
return { completed, total };
}, [uploadedDocuments]);
return {
// 状态
step,
selectedFiles,
fileList,
uploadedDocuments,
currentDocIndex,
currentSettings,
previewLoading,
// 方法
handleFileChange,
handleRemoveFile,
handleNextStep,
handleDocumentChange,
handleReprocess,
handlePrevStep,
handleGoToDocuments,
updateCurrentSettings,
// 计算属性方法
getCurrentDocument,
getCurrentProgress,
getStatusText,
isCurrentDocProcessing,
getCompletionStats,
};
}
export type UseDocumentUploadReturn = ReturnType<typeof useDocumentUpload>;