feat: 完善Dify知识库管理召回测试模块,优化知识库上传文件时的分段配置设置

This commit is contained in:
PingChuan
2025-12-03 12:03:09 +08:00
parent 0f49426a2e
commit a13f3b3635
29 changed files with 4016 additions and 1880 deletions
@@ -1,110 +1,36 @@
import { useState, useEffect, useRef, useCallback } from 'react';
import {
Input,
ArrowLeftOutlined,
CheckCircleOutlined,
DeleteOutlined,
ExclamationCircleOutlined,
FileTextOutlined,
InboxOutlined,
LoadingOutlined,
QuestionCircleOutlined,
} from '@ant-design/icons';
import type { UploadFile } from 'antd';
import {
Button,
InputNumber,
Checkbox,
Select,
Card,
Empty,
Spin,
message,
Checkbox,
Divider,
Tooltip,
Empty,
Input,
InputNumber,
Progress,
Select,
Spin,
Tooltip,
Upload,
} from 'antd';
import type { UploadFile, UploadProps } from 'antd';
import {
QuestionCircleOutlined,
FileTextOutlined,
CheckCircleOutlined,
LoadingOutlined,
ExclamationCircleOutlined,
InboxOutlined,
DeleteOutlined,
ArrowLeftOutlined,
} from '@ant-design/icons';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import { useEffect, useState } from 'react';
import type { Segment } from '~/api/dify-dataset/type';
import {
uploadDocumentWithConfig,
updateDocumentByFile,
fetchIndexingStatus,
} from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import { useDocumentUpload } from '~/hooks/dify-dataset-manager/document-upload';
import type { DocumentUploadProps, UploadedDocument } from '~/types/dify-dataset-manager/document-upload';
import { SUPPORTED_FORMATS } from '~/types/dify-dataset-manager/document-upload';
const { Dragger } = Upload;
interface DocumentUploadProps {
datasetId: string;
onClose: () => void;
onSuccess: () => void;
}
/**
* 分段设置配置
*/
interface SegmentationSettings {
separator: string;
maxTokens: number;
chunkOverlap: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
indexingTechnique: 'high_quality' | 'economy';
}
/**
* 默认分段设置
*/
const DEFAULT_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 1024,
chunkOverlap: 50,
removeExtraSpaces: true,
removeUrlsEmails: false,
indexingTechnique: 'high_quality',
};
/**
* 单个文档的上传状态
*/
type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error';
/**
* 上传的文档信息(支持多文件)
*/
interface UploadedDocument {
file: File;
documentId: string;
batch: string;
stage: DocumentStage;
indexingStatus: IndexingStatus;
uploadProgress: number;
error?: string;
settings: SegmentationSettings;
segments: Segment[];
}
/**
* 索引状态配置
*/
const INDEXING_STATUS_CONFIG: Record<IndexingStatus, { text: string; percent: number }> = {
waiting: { text: '等待处理...', percent: 10 },
parsing: { text: '解析文档...', percent: 30 },
cleaning: { text: '清洗文本...', percent: 50 },
splitting: { text: '分段处理...', percent: 70 },
indexing: { text: '建立索引...', percent: 85 },
completed: { text: '处理完成', percent: 100 },
paused: { text: '已暂停', percent: 0 },
error: { text: '处理失败', percent: 0 },
};
/**
* 支持的文件格式
*/
const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM';
/**
* 文档上传组件
* 支持多文件上传,两步流程:选择文件 → 上传并配置分段
@@ -114,419 +40,55 @@ export default function DocumentUpload({
onClose,
onSuccess,
}: DocumentUploadProps) {
// 步骤控制
const [step, setStep] = useState<1 | 2>(1);
const {
// 状态
step,
fileList,
uploadedDocuments,
currentSettings,
previewLoading,
// 文件相关
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [fileList, setFileList] = useState<UploadFile[]>([]);
// 方法
handleFileChange,
handleRemoveFile,
handleNextStep,
handleDocumentChange,
handleReprocess,
handlePrevStep,
handleGoToDocuments,
updateCurrentSettings,
// 多文档状态管理
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
// 当前选中查看的文档索引
const [currentDocIndex, setCurrentDocIndex] = useState(0);
// 计算属性方法
getCurrentDocument,
getCurrentProgress,
getStatusText,
isCurrentDocProcessing,
getCompletionStats,
} = useDocumentUpload(datasetId, onClose, onSuccess);
// 当前显示的分段设置(来自当前选中的文档)
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SETTINGS);
const selectedFiles = fileList.filter((f: UploadFile) => f.originFileObj).map((f: UploadFile) => f.originFileObj as File);
// 预览相关
const [previewLoading, setPreviewLoading] = useState(false);
// 平滑进度条逻辑
const [displayPercent, setDisplayPercent] = useState(0);
const targetPercent = getCurrentProgress();
// 轮询定时器(支持多个文档)
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 清理所有轮询定时器
useEffect(() => {
return () => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
};
}, []);
if (targetPercent > displayPercent) {
// 如果目标进度大于当前显示进度,启动动画
const diff = targetPercent - displayPercent;
// 动态步长:差距越大跑得越快,但最小步长为1
const step = Math.max(1, Math.ceil(diff / 10));
/**
* 停止指定文档的轮询
*/
const stopPolling = useCallback((documentId: string) => {
const timer = pollingTimersRef.current.get(documentId);
if (timer) {
clearInterval(timer);
pollingTimersRef.current.delete(documentId);
const timer = requestAnimationFrame(() => {
setDisplayPercent(prev => Math.min(targetPercent, prev + step));
});
return () => cancelAnimationFrame(timer);
} else if (targetPercent < displayPercent && targetPercent === 0) {
// 如果目标重置为0(例如重新开始),立即重置
setDisplayPercent(0);
}
}, []);
/**
* 停止所有轮询
*/
const stopAllPolling = useCallback(() => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
}, []);
/**
* 加载分段预览
*/
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
setPreviewLoading(true);
try {
const response = await fetchSegments(datasetId, documentId, 1, 50);
const segments = response.data || [];
// 更新对应文档的分段
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, segments } : doc
));
} catch (err: any) {
console.error('加载分段预览失败:', err);
message.error('加载分段预览失败');
} finally {
setPreviewLoading(false);
}
}, [datasetId]);
/**
* 轮询索引状态
*/
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
try {
const response = await fetchIndexingStatus(datasetId, batch);
const documentStatus = response.data?.[0];
if (documentStatus) {
const status = documentStatus.indexing_status as IndexingStatus;
// 更新文档状态
setUploadedDocuments(prev => prev.map((doc, idx) => {
if (idx !== docIndex) return doc;
return { ...doc, indexingStatus: status };
}));
if (status === 'completed') {
stopPolling(documentId);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, stage: 'completed' } : doc
));
// message.success(`文档 "${uploadedDocuments[docIndex]?.file.name}" 处理完成!`);
// 自动加载分段预览
loadSegmentsPreview(documentId, docIndex);
} else if (status === 'error') {
stopPolling(documentId);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, stage: 'error', error: documentStatus.error || '处理失败' } : doc
));
}
}
} catch (err) {
console.error('获取索引状态失败:', err);
}
}, [datasetId, stopPolling, loadSegmentsPreview, uploadedDocuments]);
/**
* 开始轮询
*/
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
// 先停止之前的轮询
stopPolling(documentId);
// 开始新的轮询
const timer = setInterval(() => {
pollIndexingStatus(batch, documentId, docIndex);
}, 2000);
pollingTimersRef.current.set(documentId, timer);
// 立即执行一次
pollIndexingStatus(batch, documentId, docIndex);
}, [stopPolling, pollIndexingStatus]);
/**
* 构建上传配置
*/
const buildConfig = (s: SegmentationSettings) => ({
indexing_technique: s.indexingTechnique,
process_rule: {
mode: 'custom' as const,
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
],
segmentation: {
separator: s.separator.replace(/\\n/g, '\n'),
max_tokens: s.maxTokens,
},
},
},
});
/**
* 更新当前文档的设置
*/
const updateCurrentSettings = (key: keyof SegmentationSettings, value: any) => {
const newSettings = { ...currentSettings, [key]: value };
setCurrentSettings(newSettings);
// 同步更新到文档列表
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
));
};
/**
* 处理文件选择变化
*/
const handleFileChange: UploadProps['onChange'] = ({ fileList: newFileList }) => {
setFileList(newFileList);
// 提取实际文件对象
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
};
/**
* 移除文件
*/
const handleRemoveFile = (file: UploadFile) => {
const newFileList = fileList.filter(f => f.uid !== file.uid);
setFileList(newFileList);
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
};
/**
* 上传单个文件
*/
const uploadSingleFile = async (file: File, index: number): Promise<void> => {
try {
// 更新状态为上传中
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
));
const config = buildConfig(DEFAULT_SETTINGS);
const result = await uploadDocumentWithConfig(
datasetId,
file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新文档信息
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
documentId: result.document.id,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
} : doc
));
// 开始轮询索引状态
startPolling(result.batch, result.document.id, index);
} catch (err: any) {
console.error(`上传文档 ${file.name} 失败:`, err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '上传失败',
} : doc
));
}
};
/**
* 点击"下一步":立即上传所有文件
*/
const handleNextStep = async () => {
if (selectedFiles.length === 0) {
message.warning('请先选择文件');
return;
}
// 初始化所有文档状态
const docs: UploadedDocument[] = selectedFiles.map(file => ({
file,
documentId: '',
batch: '',
stage: 'pending' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
uploadProgress: 0,
settings: { ...DEFAULT_SETTINGS },
segments: [],
}));
setUploadedDocuments(docs);
setCurrentDocIndex(0);
setCurrentSettings({ ...DEFAULT_SETTINGS });
setStep(2);
// 依次上传所有文件
for (let i = 0; i < selectedFiles.length; i++) {
await uploadSingleFile(selectedFiles[i], i);
}
};
/**
* 切换查看的文档
*/
const handleDocumentChange = (docId: string) => {
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
if (index !== -1) {
setCurrentDocIndex(index);
const doc = uploadedDocuments[index];
setCurrentSettings(doc.settings);
}
};
/**
* 修改参数后重新处理当前文档
*/
const handleReprocess = async () => {
const currentDoc = uploadedDocuments[currentDocIndex];
if (!currentDoc || !currentDoc.documentId) return;
// 更新状态
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'uploading' as DocumentStage,
uploadProgress: 0,
segments: [],
} : doc
));
try {
const config = buildConfig(currentSettings);
const result = await updateDocumentByFile(
datasetId,
currentDoc.documentId,
currentDoc.file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新 batch
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
} : doc
));
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
} catch (err: any) {
console.error('重新处理失败:', err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '重新处理失败',
} : doc
));
message.error(err.message || '重新处理失败');
}
};
/**
* 返回上一步
*/
const handlePrevStep = () => {
// 检查是否有文档正在处理
const hasProcessing = uploadedDocuments.some(doc =>
doc.stage === 'uploading' || doc.stage === 'indexing'
);
if (hasProcessing) {
message.warning('还有文档正在处理中,请等待完成');
return;
}
stopAllPolling();
setStep(1);
setUploadedDocuments([]);
setCurrentDocIndex(0);
setCurrentSettings(DEFAULT_SETTINGS);
};
/**
* 返回文档列表
*/
const handleGoToDocuments = () => {
stopAllPolling();
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
if (hasCompleted) {
onSuccess();
}
onClose();
};
/**
* 获取当前文档
*/
const getCurrentDocument = (): UploadedDocument | null => {
return uploadedDocuments[currentDocIndex] || null;
};
/**
* 获取当前文档的进度
*/
const getCurrentProgress = () => {
const doc = getCurrentDocument();
if (!doc) return 0;
if (doc.stage === 'uploading') {
return doc.uploadProgress;
}
if (doc.stage === 'indexing' || doc.stage === 'completed') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
}
return 0;
};
/**
* 获取当前文档的状态文本
*/
const getStatusText = () => {
const doc = getCurrentDocument();
if (!doc) return '';
if (doc.stage === 'uploading') {
return `正在上传... ${doc.uploadProgress}%`;
}
if (doc.stage === 'indexing') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
}
if (doc.stage === 'completed') {
return '处理完成';
}
if (doc.stage === 'error') {
return doc.error || '处理失败';
}
return '';
};
/**
* 判断当前文档是否正在处理
*/
const isCurrentDocProcessing = () => {
const doc = getCurrentDocument();
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
};
/**
* 获取所有文档的完成状态统计
*/
const getCompletionStats = () => {
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
const total = uploadedDocuments.length;
return { completed, total };
};
}, [targetPercent, displayPercent]);
/**
* 渲染步骤指示器(两步流程)
@@ -567,7 +129,7 @@ export default function DocumentUpload({
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"> <span className="upload-link"></span></p>
<p className="ant-upload-text"> <span className="upload-link"></span></p>
<p className="ant-upload-hint">
{SUPPORTED_FORMATS} 15MB
</p>
@@ -579,7 +141,7 @@ export default function DocumentUpload({
<div className="selected-files-section">
<h3 className="section-subtitle"> ({selectedFiles.length} )</h3>
<div className="selected-files-list">
{fileList.map((file) => (
{fileList.map((file: UploadFile) => (
<div key={file.uid} className="selected-file-item">
<FileTextOutlined className="file-icon" />
<div className="file-info">
@@ -768,7 +330,7 @@ export default function DocumentUpload({
value={currentDoc?.documentId || currentDoc?.file.name}
style={{ width: 500 }}
onChange={handleDocumentChange}
options={uploadedDocuments.map((doc, idx) => ({
options={uploadedDocuments.map((doc: UploadedDocument) => ({
value: doc.documentId || doc.file.name,
label: (
<span className="file-select-option">
@@ -800,7 +362,7 @@ export default function DocumentUpload({
<LoadingOutlined className="status-icon loading" />
</div>
<Progress
percent={getCurrentProgress()}
percent={displayPercent}
status="active"
strokeColor={{
'0%': '#00684a',
@@ -808,36 +370,6 @@ export default function DocumentUpload({
}}
/>
<div className="status-text">{getStatusText()}</div>
{/* 索引阶段详情 */}
{currentDoc?.stage === 'indexing' && (
<div className="indexing-stages">
<div className={`stage-item ${['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['parsing', 'cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${currentDoc.indexingStatus === 'completed' ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
</div>
)}
</div>
) : currentDoc?.stage === 'error' ? (
<div className="preview-error">
@@ -855,7 +387,7 @@ export default function DocumentUpload({
</div>
) : (
<div className="preview-segments">
{currentDoc?.segments.map((segment, index) => (
{currentDoc?.segments.map((segment: Segment, index: number) => (
<div key={segment.id} className="segment-item">
<div className="segment-header">
<span className="segment-index">#{index + 1}</span>
@@ -881,7 +413,7 @@ export default function DocumentUpload({
{stats.completed}/{stats.total}
</span>
<Button type="primary" onClick={handleGoToDocuments}>
</Button>
</div>
)}