feat: 完善Dify知识库管理召回测试模块,优化知识库上传文件时的分段配置设置

This commit is contained in:
PingChuan
2025-12-03 12:03:09 +08:00
parent 0f49426a2e
commit a13f3b3635
29 changed files with 4016 additions and 1880 deletions
@@ -1,4 +1,3 @@
import { useState, useEffect } from 'react';
import {
Input,
Button,
@@ -8,7 +7,6 @@ import {
Card,
Empty,
Spin,
message,
Divider,
Tooltip,
} from 'antd';
@@ -17,45 +15,8 @@ import {
ReloadOutlined,
EyeOutlined,
} from '@ant-design/icons';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { Segment } from '~/api/dify-dataset/type';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import { updateDocumentWithSettings } from '~/api/dify-dataset/api/documentApi';
interface DocumentDetailProps {
datasetId: string;
document: Document | null;
}
/**
* 分段设置配置
* 注意:Dify API 支持的参数有限
* - separator: ✅ 支持
* - maxTokens: ✅ 支持
* - removeExtraSpaces: ✅ 支持
* - removeUrlsEmails: ✅ 支持
* - useQASegment: ⚠️ 需要 doc_form: "qa_model"
*/
interface SegmentationSettings {
separator: string;
maxTokens: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
useQASegment: boolean;
qaLanguage: string;
}
/**
* 默认分段设置
*/
const DEFAULT_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 500,
removeExtraSpaces: true,
removeUrlsEmails: false,
useQASegment: false,
qaLanguage: 'Chinese',
};
import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail';
import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail';
/**
* 文档详情组件
@@ -65,98 +26,17 @@ export default function DocumentDetail({
datasetId,
document,
}: DocumentDetailProps) {
// 分段设置状态
const [settings, setSettings] = useState<SegmentationSettings>(DEFAULT_SETTINGS);
// 预览状态
const [previewSegments, setPreviewSegments] = useState<Segment[]>([]);
const [previewLoading, setPreviewLoading] = useState(false);
const [showPreview, setShowPreview] = useState(false);
// 保存状态
const [saving, setSaving] = useState(false);
// 当文档变化时重置设置
useEffect(() => {
if (document) {
// 可以从文档中读取已有的设置,这里使用默认值
setSettings(DEFAULT_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
}
}, [document?.id]);
/**
* 更新设置
*/
const updateSettings = (key: keyof SegmentationSettings, value: any) => {
setSettings(prev => ({ ...prev, [key]: value }));
};
/**
* 重置设置
*/
const handleReset = () => {
setSettings(DEFAULT_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
};
/**
* 预览分段
*/
const handlePreview = async () => {
if (!document) return;
setPreviewLoading(true);
setShowPreview(true);
try {
// 获取当前文档的分段作为预览
const response = await fetchSegments(datasetId, document.id, 1, 50);
setPreviewSegments(response.data || []);
if (response.data?.length === 0) {
message.info('该文档暂无分段数据');
}
} catch (err: any) {
console.error('预览分段失败:', err);
message.error(err.message || '预览失败');
} finally {
setPreviewLoading(false);
}
};
/**
* 保存并处理
*/
const handleSaveAndProcess = async () => {
if (!document) return;
setSaving(true);
try {
await updateDocumentWithSettings(datasetId, document.id, {
indexing_technique: 'high_quality',
process_rule: {
mode: 'custom',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: settings.removeExtraSpaces },
{ id: 'remove_urls_emails', enabled: settings.removeUrlsEmails },
],
segmentation: {
separator: settings.separator.replace(/\\n/g, '\n'),
max_tokens: settings.maxTokens,
},
},
},
});
message.success('设置已保存,文档正在重新处理...');
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
} finally {
setSaving(false);
}
};
const {
settings,
previewSegments,
previewLoading,
showPreview,
saving,
updateSettings,
handleReset,
handlePreview,
handleSaveAndProcess,
} = useDocumentDetail(datasetId, document);
if (!document) {
return (