feat: 完善Dify知识库管理召回测试模块,优化知识库上传文件时的分段配置设置

This commit is contained in:
PingChuan
2025-12-03 12:03:09 +08:00
parent 0f49426a2e
commit a13f3b3635
29 changed files with 4016 additions and 1880 deletions
@@ -0,0 +1,25 @@
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
/**
* 知识库设置组件 Props
*/
export interface DatasetSettingsProps {
dataset: Dataset | null;
onDatasetUpdated: (dataset: Dataset) => void;
}
/**
* 知识库设置表单值
*/
export interface DatasetSettingsFormValues {
name: string;
description: string;
}
/**
* 知识库设置状态
*/
export interface DatasetSettingsState {
saving: boolean;
hasChanges: boolean;
}
@@ -0,0 +1,51 @@
import type { Segment } from '~/api/dify-dataset/type';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
/**
* 文档详情组件 Props
*/
export interface DocumentDetailProps {
datasetId: string;
document: Document | null;
}
/**
* 分段设置配置(文档详情专用)
* 注意:Dify API 支持的参数有限
* - separator: ✅ 支持
* - maxTokens: ✅ 支持
* - removeExtraSpaces: ✅ 支持
* - removeUrlsEmails: ✅ 支持
* - useQASegment: ⚠️ 需要 doc_form: "qa_model"
*/
export interface DocumentDetailSegmentationSettings {
separator: string;
maxTokens: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
useQASegment: boolean;
qaLanguage: string;
}
/**
* 默认分段设置(文档详情)
*/
export const DEFAULT_DOCUMENT_DETAIL_SETTINGS: DocumentDetailSegmentationSettings = {
separator: '\\n\\n',
maxTokens: 500,
removeExtraSpaces: true,
removeUrlsEmails: false,
useQASegment: false,
qaLanguage: 'Chinese',
};
/**
* 文档详情状态
*/
export interface DocumentDetailState {
settings: DocumentDetailSegmentationSettings;
previewSegments: Segment[];
previewLoading: boolean;
showPreview: boolean;
saving: boolean;
}
@@ -0,0 +1,33 @@
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
/**
* 文档列表组件 Props
*/
export interface DocumentListProps {
datasetId: string;
datasetName: string;
documents: Document[];
loading: boolean;
total: number;
page: number;
pageSize: number;
onPageChange: (page: number) => void;
onDocumentDeleted: (documentId: string) => void;
onDocumentStatusChanged: (documentId: string, enabled: boolean) => void;
onRefresh: () => void;
onViewDocument?: (document: Document) => void;
}
/**
* 状态标签配置类型
*/
export interface StatusConfig {
color: string;
icon: React.ReactNode;
text: string;
}
/**
* 状态配置映射类型
*/
export type StatusConfigMap = Record<IndexingStatus, StatusConfig>;
@@ -0,0 +1,77 @@
import type { Segment } from '~/api/dify-dataset/type';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
/**
* 分段设置配置
*/
export interface SegmentationSettings {
separator: string;
maxTokens: number;
chunkOverlap: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
indexingTechnique: 'high_quality' | 'economy';
}
/**
* 默认分段设置
*/
export const DEFAULT_SEGMENTATION_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 1024,
chunkOverlap: 50,
removeExtraSpaces: true,
removeUrlsEmails: false,
indexingTechnique: 'high_quality',
};
/**
* 单个文档的上传状态
*/
export type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error';
/**
* 上传的文档信息(支持多文件)
*/
export interface UploadedDocument {
file: File;
documentId: string;
batch: string;
stage: DocumentStage;
indexingStatus: IndexingStatus; // 显示用的状态
realIndexingStatus?: IndexingStatus; // 真实的后端状态
uploadProgress: number;
error?: string;
settings: SegmentationSettings;
segments: Segment[];
completedSegments?: number;
totalSegments?: number;
}
/**
* 索引状态配置
*/
export const INDEXING_STATUS_CONFIG: Record<IndexingStatus, { text: string; percent: number }> = {
waiting: { text: '等待处理...', percent: 10 },
parsing: { text: '解析文档...', percent: 30 },
cleaning: { text: '清洗文本...', percent: 50 },
splitting: { text: '分段处理...', percent: 70 },
indexing: { text: '建立索引...', percent: 85 },
completed: { text: '处理完成', percent: 100 },
paused: { text: '已暂停', percent: 0 },
error: { text: '处理失败', percent: 0 },
};
/**
* 支持的文件格式
*/
export const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM';
/**
* 文档上传组件 Props
*/
export interface DocumentUploadProps {
datasetId: string;
onClose: () => void;
onSuccess: () => void;
}
+43
View File
@@ -0,0 +1,43 @@
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { MenuTab } from './layout';
/**
* 知识库管理器状态
*/
export interface DatasetManagerState {
// 知识库状态
dataset: Dataset | null;
loadingDataset: boolean;
// 文档状态
documents: Document[];
loadingDocuments: boolean;
documentTotal: number;
documentPage: number;
documentPageSize: number;
// 初始化状态
inited: boolean;
error: string | null;
// 菜单状态
activeTab: MenuTab;
// 选中的文档
selectedDocument: Document | null;
}
/**
* 默认文档分页大小
*/
export const DEFAULT_DOCUMENT_PAGE_SIZE = 20;
// 导出所有子模块类型
export * from './dataset-settings';
export * from './document-detail';
export * from './document-list';
export * from './document-upload';
export * from './layout';
export * from './retrieve-test';
+34
View File
@@ -0,0 +1,34 @@
import type { ReactNode } from 'react';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
/**
* 菜单项类型
*/
export type MenuTab = 'documents' | 'retrieve' | 'settings';
/**
* 菜单项配置
*/
export interface MenuItem {
key: MenuTab;
icon: ReactNode;
label: string;
}
/**
* 知识库布局组件 Props
*/
export interface DatasetLayoutProps {
/** 知识库信息 */
dataset: Dataset | null;
/** 当前激活的菜单 */
activeTab: MenuTab;
/** 菜单切换回调 */
onTabChange: (tab: MenuTab) => void;
/** 是否显示返回按钮(在文档详情页时显示) */
showBackButton?: boolean;
/** 返回按钮点击回调 */
onBack?: () => void;
/** 子组件 */
children: ReactNode;
}
@@ -0,0 +1,35 @@
import type { RetrieveRecord } from '~/api/dify-dataset/type';
/**
* 召回测试组件 Props
*/
export interface RetrieveTestProps {
datasetId: string;
}
/**
* 检索方法类型
* - semantic_search: 向量检索(语义搜索)
* - full_text_search: 全文检索
* - hybrid_search: 混合检索
*/
export type SearchMethod = 'semantic_search' | 'full_text_search' | 'hybrid_search';
/**
* 检索选项
*/
export interface RetrieveOptions {
searchMethod: SearchMethod;
topK: number;
}
/**
* 召回测试状态
*/
export interface RetrieveTestState {
searchQuery: string;
retrieveResults: RetrieveRecord[];
retrieving: boolean;
searchMethod: SearchMethod;
topK: number;
}