diff --git a/app/api/dify-dataset/api/segmentApi.ts b/app/api/dify-dataset/api/segmentApi.ts index 0761e06..b71e268 100644 --- a/app/api/dify-dataset/api/segmentApi.ts +++ b/app/api/dify-dataset/api/segmentApi.ts @@ -332,7 +332,7 @@ export async function deleteChildChunk( * * @param datasetId - 知识库 ID * @param query - 检索关键词 - * @param retrievalModel - 检索模型配置 + * @param retrievalModel - 检索模型配置(完整的 Dify API 格式) * @returns 检索结果 */ export async function retrieveDataset( @@ -340,7 +340,7 @@ export async function retrieveDataset( query: string, retrievalModel?: RetrieveRequest['retrieval_model'] ): Promise { - console.log('[Dataset Client] 检索知识库:', { datasetId, query }); + console.log('[Dataset Client] 检索知识库:', { datasetId, query, retrievalModel }); const requestBody: RetrieveRequest = { query, diff --git a/app/api/dify-dataset/type/index.ts b/app/api/dify-dataset/type/index.ts index 78d558e..1d9f649 100644 --- a/app/api/dify-dataset/type/index.ts +++ b/app/api/dify-dataset/type/index.ts @@ -42,6 +42,7 @@ export type { MetadataFilterCondition, MetadataFilteringConditions, RetrieveRequest, + RetrieveSegment, RetrieveRecord, RetrieveResponse, } from './segmentTypes'; diff --git a/app/api/dify-dataset/type/segmentTypes.ts b/app/api/dify-dataset/type/segmentTypes.ts index a976acd..3b83581 100644 --- a/app/api/dify-dataset/type/segmentTypes.ts +++ b/app/api/dify-dataset/type/segmentTypes.ts @@ -4,6 +4,11 @@ * @module api/dify-dataset/type/segmentTypes */ +import type { RetrievalModel } from './datasetTypes'; + +// 重新导出以便其他模块使用 +export type { RetrievalModel }; + // ============================================================================ // 分段类型 // ============================================================================ @@ -154,25 +159,27 @@ export interface MetadataFilteringConditions { */ export interface RetrieveRequest { query: string; - retrieval_model?: { - search_method: 'keyword_search' | 'semantic_search' | 'full_text_search' | 'hybrid_search'; - reranking_enable?: boolean; - reranking_model?: { - reranking_provider_name: string; - reranking_model_name: string; - }; - top_k?: number; - score_threshold_enabled?: boolean; - score_threshold?: number; - }; + retrieval_model?: RetrievalModel; metadata_filtering_conditions?: MetadataFilteringConditions; } +/** + * 检索结果中的分段信息(包含关联文档) + */ +export interface RetrieveSegment extends Segment { + document?: { + id: string; + data_source_type: string; + name: string; + doc_type: string | null; + }; +} + /** * 检索结果记录 */ export interface RetrieveRecord { - segment: Segment; + segment: RetrieveSegment; score: number; tsne_position?: { x: number; diff --git a/app/components/dify-dataset-manager/dataset-settings.tsx b/app/components/dify-dataset-manager/dataset-settings.tsx index 333dfde..05f1a90 100644 --- a/app/components/dify-dataset-manager/dataset-settings.tsx +++ b/app/components/dify-dataset-manager/dataset-settings.tsx @@ -1,16 +1,10 @@ -import { useState, useEffect } from 'react'; -import { Form, Input, Button, Card, message, Spin } from 'antd'; +import { Form, Input, Button, Card, Spin } from 'antd'; import { SaveOutlined } from '@ant-design/icons'; -import type { Dataset } from '~/api/dify-dataset/type/datasetTypes'; -import { updateDatasetName } from '~/api/dify-dataset/api/datasetApi'; +import { useDatasetSettings } from '~/hooks/dify-dataset-manager/dataset-settings'; +import type { DatasetSettingsProps } from '~/types/dify-dataset-manager/dataset-settings'; const { TextArea } = Input; -interface DatasetSettingsProps { - dataset: Dataset | null; - onDatasetUpdated: (dataset: Dataset) => void; -} - /** * 知识库设置组件 * 用于修改知识库名称和描述 @@ -20,70 +14,14 @@ export default function DatasetSettings({ onDatasetUpdated, }: DatasetSettingsProps) { const [form] = Form.useForm(); - const [saving, setSaving] = useState(false); - const [hasChanges, setHasChanges] = useState(false); - - // 初始化表单数据 - useEffect(() => { - if (dataset) { - form.setFieldsValue({ - name: dataset.name, - description: dataset.description || '', - }); - setHasChanges(false); - } - }, [dataset, form]); - - /** - * 处理表单值变化 - */ - const handleValuesChange = () => { - const values = form.getFieldsValue(); - const changed = - values.name !== dataset?.name || - values.description !== (dataset?.description || ''); - setHasChanges(changed); - }; - - /** - * 保存设置 - */ - const handleSave = async () => { - if (!dataset) { - message.error('知识库不存在'); - return; - } - - try { - const values = await form.validateFields(); - setSaving(true); - - // 目前只支持修改名称 - const updatedDataset = await updateDatasetName(dataset.id, values.name); - - message.success('保存成功'); - onDatasetUpdated(updatedDataset); - setHasChanges(false); - } catch (err: any) { - console.error('保存设置失败:', err); - message.error(err.message || '保存失败'); - } finally { - setSaving(false); - } - }; - - /** - * 重置表单 - */ - const handleReset = () => { - if (dataset) { - form.setFieldsValue({ - name: dataset.name, - description: dataset.description || '', - }); - setHasChanges(false); - } - }; + + const { + saving, + hasChanges, + handleValuesChange, + handleSave, + handleReset, + } = useDatasetSettings(dataset, form, onDatasetUpdated); if (!dataset) { return ( diff --git a/app/components/dify-dataset-manager/document-detail.tsx b/app/components/dify-dataset-manager/document-detail.tsx index 1a38377..a59fc8b 100644 --- a/app/components/dify-dataset-manager/document-detail.tsx +++ b/app/components/dify-dataset-manager/document-detail.tsx @@ -1,4 +1,3 @@ -import { useState, useEffect } from 'react'; import { Input, Button, @@ -8,7 +7,6 @@ import { Card, Empty, Spin, - message, Divider, Tooltip, } from 'antd'; @@ -17,45 +15,8 @@ import { ReloadOutlined, EyeOutlined, } from '@ant-design/icons'; -import type { Document } from '~/api/dify-dataset/type/documentTypes'; -import type { Segment } from '~/api/dify-dataset/type'; -import { fetchSegments } from '~/api/dify-dataset/api/segmentApi'; -import { updateDocumentWithSettings } from '~/api/dify-dataset/api/documentApi'; - -interface DocumentDetailProps { - datasetId: string; - document: Document | null; -} - -/** - * 分段设置配置 - * 注意:Dify API 支持的参数有限 - * - separator: ✅ 支持 - * - maxTokens: ✅ 支持 - * - removeExtraSpaces: ✅ 支持 - * - removeUrlsEmails: ✅ 支持 - * - useQASegment: ⚠️ 需要 doc_form: "qa_model" - */ -interface SegmentationSettings { - separator: string; - maxTokens: number; - removeExtraSpaces: boolean; - removeUrlsEmails: boolean; - useQASegment: boolean; - qaLanguage: string; -} - -/** - * 默认分段设置 - */ -const DEFAULT_SETTINGS: SegmentationSettings = { - separator: '\\n\\n', - maxTokens: 500, - removeExtraSpaces: true, - removeUrlsEmails: false, - useQASegment: false, - qaLanguage: 'Chinese', -}; +import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail'; +import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail'; /** * 文档详情组件 @@ -65,98 +26,17 @@ export default function DocumentDetail({ datasetId, document, }: DocumentDetailProps) { - // 分段设置状态 - const [settings, setSettings] = useState(DEFAULT_SETTINGS); - - // 预览状态 - const [previewSegments, setPreviewSegments] = useState([]); - const [previewLoading, setPreviewLoading] = useState(false); - const [showPreview, setShowPreview] = useState(false); - - // 保存状态 - const [saving, setSaving] = useState(false); - - // 当文档变化时重置设置 - useEffect(() => { - if (document) { - // 可以从文档中读取已有的设置,这里使用默认值 - setSettings(DEFAULT_SETTINGS); - setPreviewSegments([]); - setShowPreview(false); - } - }, [document?.id]); - - /** - * 更新设置 - */ - const updateSettings = (key: keyof SegmentationSettings, value: any) => { - setSettings(prev => ({ ...prev, [key]: value })); - }; - - /** - * 重置设置 - */ - const handleReset = () => { - setSettings(DEFAULT_SETTINGS); - setPreviewSegments([]); - setShowPreview(false); - }; - - /** - * 预览分段 - */ - const handlePreview = async () => { - if (!document) return; - - setPreviewLoading(true); - setShowPreview(true); - try { - // 获取当前文档的分段作为预览 - const response = await fetchSegments(datasetId, document.id, 1, 50); - setPreviewSegments(response.data || []); - if (response.data?.length === 0) { - message.info('该文档暂无分段数据'); - } - } catch (err: any) { - console.error('预览分段失败:', err); - message.error(err.message || '预览失败'); - } finally { - setPreviewLoading(false); - } - }; - - /** - * 保存并处理 - */ - const handleSaveAndProcess = async () => { - if (!document) return; - - setSaving(true); - try { - await updateDocumentWithSettings(datasetId, document.id, { - indexing_technique: 'high_quality', - process_rule: { - mode: 'custom', - rules: { - pre_processing_rules: [ - { id: 'remove_extra_spaces', enabled: settings.removeExtraSpaces }, - { id: 'remove_urls_emails', enabled: settings.removeUrlsEmails }, - ], - segmentation: { - separator: settings.separator.replace(/\\n/g, '\n'), - max_tokens: settings.maxTokens, - }, - }, - }, - }); - message.success('设置已保存,文档正在重新处理...'); - } catch (err: any) { - console.error('保存设置失败:', err); - message.error(err.message || '保存失败'); - } finally { - setSaving(false); - } - }; + const { + settings, + previewSegments, + previewLoading, + showPreview, + saving, + updateSettings, + handleReset, + handlePreview, + handleSaveAndProcess, + } = useDocumentDetail(datasetId, document); if (!document) { return ( diff --git a/app/components/dify-dataset-manager/document-list.tsx b/app/components/dify-dataset-manager/document-list.tsx index 4c204c8..4da0c78 100644 --- a/app/components/dify-dataset-manager/document-list.tsx +++ b/app/components/dify-dataset-manager/document-list.tsx @@ -1,4 +1,3 @@ -import { useState } from 'react'; import { Button, Input, @@ -8,7 +7,6 @@ import { Tooltip, Popconfirm, Switch, - message, Empty, Spin, } from 'antd'; @@ -19,33 +17,14 @@ import { FileTextOutlined, CloudUploadOutlined, EyeOutlined, - ClockCircleOutlined, - CheckCircleOutlined, - SyncOutlined, - ExclamationCircleOutlined, - PauseCircleOutlined, } from '@ant-design/icons'; import type { ColumnsType } from 'antd/es/table'; import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; -import { deleteDocument, toggleDocumentStatus } from '~/api/dify-dataset/api/documentApi'; +import { useDocumentList } from '~/hooks/dify-dataset-manager/document-list'; +import type { DocumentListProps } from '~/types/dify-dataset-manager/document-list'; import DocumentUpload from './document-upload'; import '../../styles/components/dify-dataset-manager/index.css'; -interface DocumentListProps { - datasetId: string; - datasetName: string; - documents: Document[]; - loading: boolean; - total: number; - page: number; - pageSize: number; - onPageChange: (page: number) => void; - onDocumentDeleted: (documentId: string) => void; - onDocumentStatusChanged: (documentId: string, enabled: boolean) => void; - onRefresh: () => void; - onViewDocument?: (document: Document) => void; -} - /** * 文档列表组件 */ @@ -62,116 +41,24 @@ export default function DocumentList({ onRefresh, onViewDocument, }: DocumentListProps) { - const [searchValue, setSearchValue] = useState(''); - const [deletingId, setDeletingId] = useState(null); - - // 显示上传页面的状态 - const [showUploadPage, setShowUploadPage] = useState(false); - - /** - * 获取状态标签配置 - */ - const getStatusConfig = (status: IndexingStatus) => { - const configs: Record = { - completed: { color: 'success', icon: , text: '已完成' }, - indexing: { color: 'processing', icon: , text: '索引中' }, - waiting: { color: 'warning', icon: , text: '等待中' }, - parsing: { color: 'processing', icon: , text: '解析中' }, - cleaning: { color: 'processing', icon: , text: '清洗中' }, - splitting: { color: 'processing', icon: , text: '分段中' }, - paused: { color: 'default', icon: , text: '已暂停' }, - error: { color: 'error', icon: , text: '错误' }, - }; - return configs[status] || { color: 'default', icon: null, text: status }; - }; - - /** - * 格式化日期 - */ - const formatDate = (timestamp: number) => { - return new Date(timestamp * 1000).toLocaleString('zh-CN', { - year: 'numeric', - month: '2-digit', - day: '2-digit', - hour: '2-digit', - minute: '2-digit', - }); - }; - - /** - * 格式化数字 - */ - const formatNumber = (num: number) => { - if (num >= 10000) { - return (num / 10000).toFixed(1) + 'w'; - } - if (num >= 1000) { - return (num / 1000).toFixed(1) + 'k'; - } - return num.toString(); - }; - - /** - * 处理删除文档 - */ - const handleDelete = async (documentId: string) => { - setDeletingId(documentId); - try { - await deleteDocument(datasetId, documentId); - message.success('删除成功'); - onDocumentDeleted(documentId); - } catch (err: any) { - console.error('删除文档失败:', err); - message.error(err.message || '删除失败'); - } finally { - setDeletingId(null); - } - }; - - /** - * 处理启用/禁用文档 - */ - const handleToggleStatus = async (documentId: string, enabled: boolean) => { - try { - await toggleDocumentStatus(datasetId, documentId, enabled); - message.success(enabled ? '已启用' : '已禁用'); - onDocumentStatusChanged(documentId, enabled); - } catch (err: any) { - console.error('切换文档状态失败:', err); - message.error(err.message || '操作失败'); - } - }; - - /** - * 点击上传按钮,显示上传页面 - */ - const handleUploadClick = () => { - if (!datasetId) { - message.error('请先选择知识库'); - return; - } - setShowUploadPage(true); - }; - - /** - * 关闭上传页面 - */ - const handleUploadClose = () => { - setShowUploadPage(false); - }; - - /** - * 上传成功回调 - */ - const handleUploadSuccess = () => { - setShowUploadPage(false); - onRefresh(); - }; + const { + searchValue, + setSearchValue, + deletingId, + showUploadPage, + getStatusConfig, + formatDate, + formatNumber, + handleDelete, + handleToggleStatus, + handleUploadClick, + handleUploadClose, + handleUploadSuccess, + filterDocuments, + } = useDocumentList(datasetId, onDocumentDeleted, onDocumentStatusChanged, onRefresh); // 过滤文档 - const filteredDocuments = documents.filter((doc) => - doc.name.toLowerCase().includes(searchValue.toLowerCase()) - ); + const filteredDocuments = filterDocuments(documents); // 表格列定义 const columns: ColumnsType = [ diff --git a/app/components/dify-dataset-manager/document-upload.tsx b/app/components/dify-dataset-manager/document-upload.tsx index 8085b8e..a461be0 100644 --- a/app/components/dify-dataset-manager/document-upload.tsx +++ b/app/components/dify-dataset-manager/document-upload.tsx @@ -1,110 +1,36 @@ -import { useState, useEffect, useRef, useCallback } from 'react'; import { - Input, + ArrowLeftOutlined, + CheckCircleOutlined, + DeleteOutlined, + ExclamationCircleOutlined, + FileTextOutlined, + InboxOutlined, + LoadingOutlined, + QuestionCircleOutlined, +} from '@ant-design/icons'; +import type { UploadFile } from 'antd'; +import { Button, - InputNumber, - Checkbox, - Select, Card, - Empty, - Spin, - message, + Checkbox, Divider, - Tooltip, + Empty, + Input, + InputNumber, Progress, + Select, + Spin, + Tooltip, Upload, } from 'antd'; -import type { UploadFile, UploadProps } from 'antd'; -import { - QuestionCircleOutlined, - FileTextOutlined, - CheckCircleOutlined, - LoadingOutlined, - ExclamationCircleOutlined, - InboxOutlined, - DeleteOutlined, - ArrowLeftOutlined, -} from '@ant-design/icons'; -import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes'; +import { useEffect, useState } from 'react'; import type { Segment } from '~/api/dify-dataset/type'; -import { - uploadDocumentWithConfig, - updateDocumentByFile, - fetchIndexingStatus, -} from '~/api/dify-dataset/api/documentApi'; -import { fetchSegments } from '~/api/dify-dataset/api/segmentApi'; +import { useDocumentUpload } from '~/hooks/dify-dataset-manager/document-upload'; +import type { DocumentUploadProps, UploadedDocument } from '~/types/dify-dataset-manager/document-upload'; +import { SUPPORTED_FORMATS } from '~/types/dify-dataset-manager/document-upload'; const { Dragger } = Upload; -interface DocumentUploadProps { - datasetId: string; - onClose: () => void; - onSuccess: () => void; -} - -/** - * 分段设置配置 - */ -interface SegmentationSettings { - separator: string; - maxTokens: number; - chunkOverlap: number; - removeExtraSpaces: boolean; - removeUrlsEmails: boolean; - indexingTechnique: 'high_quality' | 'economy'; -} - -/** - * 默认分段设置 - */ -const DEFAULT_SETTINGS: SegmentationSettings = { - separator: '\\n\\n', - maxTokens: 1024, - chunkOverlap: 50, - removeExtraSpaces: true, - removeUrlsEmails: false, - indexingTechnique: 'high_quality', -}; - -/** - * 单个文档的上传状态 - */ -type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error'; - -/** - * 上传的文档信息(支持多文件) - */ -interface UploadedDocument { - file: File; - documentId: string; - batch: string; - stage: DocumentStage; - indexingStatus: IndexingStatus; - uploadProgress: number; - error?: string; - settings: SegmentationSettings; - segments: Segment[]; -} - -/** - * 索引状态配置 - */ -const INDEXING_STATUS_CONFIG: Record = { - waiting: { text: '等待处理...', percent: 10 }, - parsing: { text: '解析文档...', percent: 30 }, - cleaning: { text: '清洗文本...', percent: 50 }, - splitting: { text: '分段处理...', percent: 70 }, - indexing: { text: '建立索引...', percent: 85 }, - completed: { text: '处理完成', percent: 100 }, - paused: { text: '已暂停', percent: 0 }, - error: { text: '处理失败', percent: 0 }, -}; - -/** - * 支持的文件格式 - */ -const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM'; - /** * 文档上传组件 * 支持多文件上传,两步流程:选择文件 → 上传并配置分段 @@ -114,419 +40,55 @@ export default function DocumentUpload({ onClose, onSuccess, }: DocumentUploadProps) { - // 步骤控制 - const [step, setStep] = useState<1 | 2>(1); + const { + // 状态 + step, + fileList, + uploadedDocuments, + currentSettings, + previewLoading, - // 文件相关 - const [selectedFiles, setSelectedFiles] = useState([]); - const [fileList, setFileList] = useState([]); + // 方法 + handleFileChange, + handleRemoveFile, + handleNextStep, + handleDocumentChange, + handleReprocess, + handlePrevStep, + handleGoToDocuments, + updateCurrentSettings, - // 多文档状态管理 - const [uploadedDocuments, setUploadedDocuments] = useState([]); - // 当前选中查看的文档索引 - const [currentDocIndex, setCurrentDocIndex] = useState(0); + // 计算属性方法 + getCurrentDocument, + getCurrentProgress, + getStatusText, + isCurrentDocProcessing, + getCompletionStats, + } = useDocumentUpload(datasetId, onClose, onSuccess); - // 当前显示的分段设置(来自当前选中的文档) - const [currentSettings, setCurrentSettings] = useState(DEFAULT_SETTINGS); + const selectedFiles = fileList.filter((f: UploadFile) => f.originFileObj).map((f: UploadFile) => f.originFileObj as File); - // 预览相关 - const [previewLoading, setPreviewLoading] = useState(false); + // 平滑进度条逻辑 + const [displayPercent, setDisplayPercent] = useState(0); + const targetPercent = getCurrentProgress(); - // 轮询定时器(支持多个文档) - const pollingTimersRef = useRef>(new Map()); - - // 清理所有轮询定时器 useEffect(() => { - return () => { - pollingTimersRef.current.forEach(timer => clearInterval(timer)); - pollingTimersRef.current.clear(); - }; - }, []); + if (targetPercent > displayPercent) { + // 如果目标进度大于当前显示进度,启动动画 + const diff = targetPercent - displayPercent; + // 动态步长:差距越大跑得越快,但最小步长为1 + const step = Math.max(1, Math.ceil(diff / 10)); - /** - * 停止指定文档的轮询 - */ - const stopPolling = useCallback((documentId: string) => { - const timer = pollingTimersRef.current.get(documentId); - if (timer) { - clearInterval(timer); - pollingTimersRef.current.delete(documentId); + const timer = requestAnimationFrame(() => { + setDisplayPercent(prev => Math.min(targetPercent, prev + step)); + }); + + return () => cancelAnimationFrame(timer); + } else if (targetPercent < displayPercent && targetPercent === 0) { + // 如果目标重置为0(例如重新开始),立即重置 + setDisplayPercent(0); } - }, []); - - /** - * 停止所有轮询 - */ - const stopAllPolling = useCallback(() => { - pollingTimersRef.current.forEach(timer => clearInterval(timer)); - pollingTimersRef.current.clear(); - }, []); - - /** - * 加载分段预览 - */ - const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => { - setPreviewLoading(true); - try { - const response = await fetchSegments(datasetId, documentId, 1, 50); - const segments = response.data || []; - // 更新对应文档的分段 - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === docIndex ? { ...doc, segments } : doc - )); - } catch (err: any) { - console.error('加载分段预览失败:', err); - message.error('加载分段预览失败'); - } finally { - setPreviewLoading(false); - } - }, [datasetId]); - - /** - * 轮询索引状态 - */ - const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => { - try { - const response = await fetchIndexingStatus(datasetId, batch); - const documentStatus = response.data?.[0]; - - if (documentStatus) { - const status = documentStatus.indexing_status as IndexingStatus; - - // 更新文档状态 - setUploadedDocuments(prev => prev.map((doc, idx) => { - if (idx !== docIndex) return doc; - return { ...doc, indexingStatus: status }; - })); - - if (status === 'completed') { - stopPolling(documentId); - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === docIndex ? { ...doc, stage: 'completed' } : doc - )); - // message.success(`文档 "${uploadedDocuments[docIndex]?.file.name}" 处理完成!`); - // 自动加载分段预览 - loadSegmentsPreview(documentId, docIndex); - } else if (status === 'error') { - stopPolling(documentId); - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === docIndex ? { ...doc, stage: 'error', error: documentStatus.error || '处理失败' } : doc - )); - } - } - } catch (err) { - console.error('获取索引状态失败:', err); - } - }, [datasetId, stopPolling, loadSegmentsPreview, uploadedDocuments]); - - /** - * 开始轮询 - */ - const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => { - // 先停止之前的轮询 - stopPolling(documentId); - - // 开始新的轮询 - const timer = setInterval(() => { - pollIndexingStatus(batch, documentId, docIndex); - }, 2000); - pollingTimersRef.current.set(documentId, timer); - - // 立即执行一次 - pollIndexingStatus(batch, documentId, docIndex); - }, [stopPolling, pollIndexingStatus]); - - /** - * 构建上传配置 - */ - const buildConfig = (s: SegmentationSettings) => ({ - indexing_technique: s.indexingTechnique, - process_rule: { - mode: 'custom' as const, - rules: { - pre_processing_rules: [ - { id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces }, - { id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails }, - ], - segmentation: { - separator: s.separator.replace(/\\n/g, '\n'), - max_tokens: s.maxTokens, - }, - }, - }, - }); - - /** - * 更新当前文档的设置 - */ - const updateCurrentSettings = (key: keyof SegmentationSettings, value: any) => { - const newSettings = { ...currentSettings, [key]: value }; - setCurrentSettings(newSettings); - // 同步更新到文档列表 - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === currentDocIndex ? { ...doc, settings: newSettings } : doc - )); - }; - - /** - * 处理文件选择变化 - */ - const handleFileChange: UploadProps['onChange'] = ({ fileList: newFileList }) => { - setFileList(newFileList); - // 提取实际文件对象 - const files = newFileList - .filter(f => f.originFileObj) - .map(f => f.originFileObj as File); - setSelectedFiles(files); - }; - - /** - * 移除文件 - */ - const handleRemoveFile = (file: UploadFile) => { - const newFileList = fileList.filter(f => f.uid !== file.uid); - setFileList(newFileList); - const files = newFileList - .filter(f => f.originFileObj) - .map(f => f.originFileObj as File); - setSelectedFiles(files); - }; - - /** - * 上传单个文件 - */ - const uploadSingleFile = async (file: File, index: number): Promise => { - try { - // 更新状态为上传中 - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc - )); - - const config = buildConfig(DEFAULT_SETTINGS); - const result = await uploadDocumentWithConfig( - datasetId, - file, - config, - (percent) => { - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === index ? { ...doc, uploadProgress: percent } : doc - )); - } - ); - - // 更新文档信息 - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === index ? { - ...doc, - documentId: result.document.id, - batch: result.batch, - stage: 'indexing' as DocumentStage, - indexingStatus: 'waiting' as IndexingStatus, - } : doc - )); - - // 开始轮询索引状态 - startPolling(result.batch, result.document.id, index); - } catch (err: any) { - console.error(`上传文档 ${file.name} 失败:`, err); - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === index ? { - ...doc, - stage: 'error' as DocumentStage, - error: err.message || '上传失败', - } : doc - )); - } - }; - - /** - * 点击"下一步":立即上传所有文件 - */ - const handleNextStep = async () => { - if (selectedFiles.length === 0) { - message.warning('请先选择文件'); - return; - } - - // 初始化所有文档状态 - const docs: UploadedDocument[] = selectedFiles.map(file => ({ - file, - documentId: '', - batch: '', - stage: 'pending' as DocumentStage, - indexingStatus: 'waiting' as IndexingStatus, - uploadProgress: 0, - settings: { ...DEFAULT_SETTINGS }, - segments: [], - })); - setUploadedDocuments(docs); - setCurrentDocIndex(0); - setCurrentSettings({ ...DEFAULT_SETTINGS }); - setStep(2); - - // 依次上传所有文件 - for (let i = 0; i < selectedFiles.length; i++) { - await uploadSingleFile(selectedFiles[i], i); - } - }; - - /** - * 切换查看的文档 - */ - const handleDocumentChange = (docId: string) => { - const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId); - if (index !== -1) { - setCurrentDocIndex(index); - const doc = uploadedDocuments[index]; - setCurrentSettings(doc.settings); - } - }; - - /** - * 修改参数后重新处理当前文档 - */ - const handleReprocess = async () => { - const currentDoc = uploadedDocuments[currentDocIndex]; - if (!currentDoc || !currentDoc.documentId) return; - - // 更新状态 - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === currentDocIndex ? { - ...doc, - stage: 'uploading' as DocumentStage, - uploadProgress: 0, - segments: [], - } : doc - )); - - try { - const config = buildConfig(currentSettings); - const result = await updateDocumentByFile( - datasetId, - currentDoc.documentId, - currentDoc.file, - config, - (percent) => { - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc - )); - } - ); - - // 更新 batch - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === currentDocIndex ? { - ...doc, - batch: result.batch, - stage: 'indexing' as DocumentStage, - indexingStatus: 'waiting' as IndexingStatus, - } : doc - )); - - startPolling(result.batch, currentDoc.documentId, currentDocIndex); - } catch (err: any) { - console.error('重新处理失败:', err); - setUploadedDocuments(prev => prev.map((doc, idx) => - idx === currentDocIndex ? { - ...doc, - stage: 'error' as DocumentStage, - error: err.message || '重新处理失败', - } : doc - )); - message.error(err.message || '重新处理失败'); - } - }; - - /** - * 返回上一步 - */ - const handlePrevStep = () => { - // 检查是否有文档正在处理 - const hasProcessing = uploadedDocuments.some(doc => - doc.stage === 'uploading' || doc.stage === 'indexing' - ); - if (hasProcessing) { - message.warning('还有文档正在处理中,请等待完成'); - return; - } - stopAllPolling(); - setStep(1); - setUploadedDocuments([]); - setCurrentDocIndex(0); - setCurrentSettings(DEFAULT_SETTINGS); - }; - - /** - * 返回文档列表 - */ - const handleGoToDocuments = () => { - stopAllPolling(); - const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed'); - if (hasCompleted) { - onSuccess(); - } - onClose(); - }; - - /** - * 获取当前文档 - */ - const getCurrentDocument = (): UploadedDocument | null => { - return uploadedDocuments[currentDocIndex] || null; - }; - - /** - * 获取当前文档的进度 - */ - const getCurrentProgress = () => { - const doc = getCurrentDocument(); - if (!doc) return 0; - if (doc.stage === 'uploading') { - return doc.uploadProgress; - } - if (doc.stage === 'indexing' || doc.stage === 'completed') { - return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0; - } - return 0; - }; - - /** - * 获取当前文档的状态文本 - */ - const getStatusText = () => { - const doc = getCurrentDocument(); - if (!doc) return ''; - if (doc.stage === 'uploading') { - return `正在上传... ${doc.uploadProgress}%`; - } - if (doc.stage === 'indexing') { - return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...'; - } - if (doc.stage === 'completed') { - return '处理完成'; - } - if (doc.stage === 'error') { - return doc.error || '处理失败'; - } - return ''; - }; - - /** - * 判断当前文档是否正在处理 - */ - const isCurrentDocProcessing = () => { - const doc = getCurrentDocument(); - return doc?.stage === 'uploading' || doc?.stage === 'indexing'; - }; - - /** - * 获取所有文档的完成状态统计 - */ - const getCompletionStats = () => { - const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length; - const total = uploadedDocuments.length; - return { completed, total }; - }; + }, [targetPercent, displayPercent]); /** * 渲染步骤指示器(两步流程) @@ -567,7 +129,7 @@ export default function DocumentUpload({

-

拖拽文件或文件夹至此,或者 选择文件

+

拖拽文件或至此,或者 选择文件

已支持 {SUPPORTED_FORMATS},每个文件不超过 15MB。支持批量上传多个文件。

@@ -579,7 +141,7 @@ export default function DocumentUpload({

嵌入已就绪 ({selectedFiles.length} 个文件)

- {fileList.map((file) => ( + {fileList.map((file: UploadFile) => (
@@ -768,7 +330,7 @@ export default function DocumentUpload({ value={currentDoc?.documentId || currentDoc?.file.name} style={{ width: 500 }} onChange={handleDocumentChange} - options={uploadedDocuments.map((doc, idx) => ({ + options={uploadedDocuments.map((doc: UploadedDocument) => ({ value: doc.documentId || doc.file.name, label: ( @@ -800,7 +362,7 @@ export default function DocumentUpload({
{getStatusText()}
- - {/* 索引阶段详情 */} - {currentDoc?.stage === 'indexing' && ( -
-
- - 等待处理 -
-
- - 解析文档 -
-
- - 清洗文本 -
-
- - 分段处理 -
-
- - 建立索引 -
-
- - 完成 -
-
- )}
) : currentDoc?.stage === 'error' ? (
@@ -855,7 +387,7 @@ export default function DocumentUpload({
) : (
- {currentDoc?.segments.map((segment, index) => ( + {currentDoc?.segments.map((segment: Segment, index: number) => (
#{index + 1} @@ -881,7 +413,7 @@ export default function DocumentUpload({ {stats.completed}/{stats.total} 个文档处理完成
)} diff --git a/app/components/dify-dataset-manager/index.tsx b/app/components/dify-dataset-manager/index.tsx index 2bdb8eb..42b62fe 100644 --- a/app/components/dify-dataset-manager/index.tsx +++ b/app/components/dify-dataset-manager/index.tsx @@ -1,14 +1,10 @@ -import { useEffect, useState } from 'react'; -import { message, Spin } from 'antd'; -import DatasetLayout, { type MenuTab } from './layout'; +import { Spin } from 'antd'; +import DatasetLayout from './layout'; import DocumentList from './document-list'; import DocumentDetail from './document-detail'; import RetrieveTest from './retrieve-test'; import DatasetSettings from './dataset-settings'; -import type { Dataset } from '~/api/dify-dataset/type/datasetTypes'; -import type { Document } from '~/api/dify-dataset/type/documentTypes'; -import { fetchDatasets } from '~/api/dify-dataset/api/datasetApi'; -import { fetchDocuments } from '~/api/dify-dataset/api/documentApi'; +import { useDatasetManager } from '~/hooks/dify-dataset-manager'; import '../../styles/components/dify-dataset-manager/index.css'; /** @@ -16,162 +12,30 @@ import '../../styles/components/dify-dataset-manager/index.css'; * 带左侧菜单栏的完整布局 */ export default function DatasetManager() { - // 知识库状态 - const [dataset, setDataset] = useState(null); - const [loadingDataset, setLoadingDataset] = useState(true); - - // 文档状态 - const [documents, setDocuments] = useState([]); - const [loadingDocuments, setLoadingDocuments] = useState(false); - const [documentTotal, setDocumentTotal] = useState(0); - const [documentPage, setDocumentPage] = useState(1); - const [documentPageSize] = useState(20); - - // 初始化状态 - const [inited, setInited] = useState(false); - const [error, setError] = useState(null); - - // 菜单状态 - const [activeTab, setActiveTab] = useState('documents'); - - // 选中的文档(用于查看文档详情) - const [selectedDocument, setSelectedDocument] = useState(null); - - /** - * 加载知识库(获取第一个知识库) - */ - const loadDataset = async () => { - setLoadingDataset(true); - try { - console.log('[DatasetManager] 加载知识库...'); - const response = await fetchDatasets(1, 1); - console.log('[DatasetManager] 知识库响应:', response); - - if (response && response.data && response.data.length > 0) { - const firstDataset = response.data[0]; - setDataset(firstDataset); - // 立即加载文档 - await loadDocuments(firstDataset.id, 1); - } else { - setError('未找到知识库,请先在Dify中创建知识库'); - } - } catch (err: any) { - console.error('[DatasetManager] 加载知识库失败:', err); - setError(err.message || '加载知识库失败'); - message.error('加载知识库失败'); - } finally { - setLoadingDataset(false); - setInited(true); - } - }; - - /** - * 加载文档列表 - */ - const loadDocuments = async (datasetId: string, page: number = 1) => { - if (!datasetId) return; - - setLoadingDocuments(true); - try { - console.log('[DatasetManager] 加载文档列表:', { datasetId, page }); - const response = await fetchDocuments(datasetId, page, documentPageSize); - console.log('[DatasetManager] 文档列表响应:', response); - - if (response && response.data) { - setDocuments(response.data); - setDocumentTotal(response.total); - setDocumentPage(page); - } - } catch (err: any) { - console.error('[DatasetManager] 加载文档列表失败:', err); - message.error('加载文档列表失败'); - } finally { - setLoadingDocuments(false); - } - }; - - /** - * 处理文档页码变化 - */ - const handlePageChange = (page: number) => { - if (dataset) { - loadDocuments(dataset.id, page); - } - }; - - /** - * 处理文档删除 - */ - const handleDocumentDeleted = (documentId: string) => { - setDocuments((prev) => prev.filter((doc) => doc.id !== documentId)); - setDocumentTotal((prev) => prev - 1); - - // 更新知识库的文档数量 - if (dataset) { - setDataset({ - ...dataset, - document_count: dataset.document_count - 1 - }); - } - }; - - /** - * 处理文档状态变化 - */ - const handleDocumentStatusChanged = (documentId: string, enabled: boolean) => { - setDocuments((prev) => - prev.map((doc) => - doc.id === documentId ? { ...doc, enabled } : doc - ) - ); - }; - - /** - * 刷新文档列表 - */ - const handleRefresh = () => { - if (dataset) { - loadDocuments(dataset.id, documentPage); - } - }; - - /** - * 查看文档详情(分段管理) - */ - const handleViewDocument = (doc: Document) => { - console.log('[DatasetManager] 查看文档详情:', doc); - setSelectedDocument(doc); - }; - - /** - * 返回文档列表 - */ - const handleBackToDocuments = () => { - setSelectedDocument(null); - }; - - /** - * 处理菜单切换 - */ - const handleTabChange = (tab: MenuTab) => { - setActiveTab(tab); - // 切换菜单时清除选中的文档 - if (tab !== 'documents') { - setSelectedDocument(null); - } - }; - - /** - * 处理知识库更新 - */ - const handleDatasetUpdated = (updatedDataset: Dataset) => { - setDataset(updatedDataset); - }; - - // 初始化 - useEffect(() => { - loadDataset(); - }, []); + const { + // 状态 + dataset, + loadingDataset, + documents, + loadingDocuments, + documentTotal, + documentPage, + documentPageSize, + inited, + error, + activeTab, + selectedDocument, + + // 方法 + handlePageChange, + handleDocumentDeleted, + handleDocumentStatusChanged, + handleRefresh, + handleViewDocument, + handleBackToDocuments, + handleTabChange, + handleDatasetUpdated, + } = useDatasetManager(); // 加载中状态 if (!inited || loadingDataset) { diff --git a/app/components/dify-dataset-manager/layout.tsx b/app/components/dify-dataset-manager/layout.tsx index 8ca457b..88e3fe4 100644 --- a/app/components/dify-dataset-manager/layout.tsx +++ b/app/components/dify-dataset-manager/layout.tsx @@ -1,4 +1,3 @@ -import { ReactNode } from 'react'; import { Button, Tooltip } from 'antd'; import { FileTextOutlined, @@ -7,27 +6,7 @@ import { ArrowLeftOutlined, DatabaseOutlined, } from '@ant-design/icons'; -import type { Dataset } from '~/api/dify-dataset/type/datasetTypes'; - -/** - * 菜单项类型 - */ -export type MenuTab = 'documents' | 'retrieve' | 'settings'; - -interface DatasetLayoutProps { - /** 知识库信息 */ - dataset: Dataset | null; - /** 当前激活的菜单 */ - activeTab: MenuTab; - /** 菜单切换回调 */ - onTabChange: (tab: MenuTab) => void; - /** 是否显示返回按钮(在文档详情页时显示) */ - showBackButton?: boolean; - /** 返回按钮点击回调 */ - onBack?: () => void; - /** 子组件 */ - children: ReactNode; -} +import type { DatasetLayoutProps, MenuTab, MenuItem } from '~/types/dify-dataset-manager/layout'; /** * 知识库布局组件 @@ -41,7 +20,7 @@ export default function DatasetLayout({ onBack, children, }: DatasetLayoutProps) { - const menuItems: { key: MenuTab; icon: ReactNode; label: string }[] = [ + const menuItems: MenuItem[] = [ { key: 'documents', icon: , label: '文档' }, { key: 'retrieve', icon: , label: '召回测试' }, { key: 'settings', icon: , label: '设置' }, @@ -106,3 +85,6 @@ export default function DatasetLayout({
); } + +// 重新导出类型,保持向后兼容 +export type { MenuTab } from '~/types/dify-dataset-manager/layout'; diff --git a/app/components/dify-dataset-manager/retrieve-test.tsx b/app/components/dify-dataset-manager/retrieve-test.tsx index 7ce129c..840dc6b 100644 --- a/app/components/dify-dataset-manager/retrieve-test.tsx +++ b/app/components/dify-dataset-manager/retrieve-test.tsx @@ -1,202 +1,306 @@ -import { useState } from 'react'; -import { - Input, - Button, - Card, - Select, - Slider, - Table, - Tag, - Empty, - Spin, - message, -} from 'antd'; -import { FileSearchOutlined } from '@ant-design/icons'; -import type { ColumnsType } from 'antd/es/table'; +import { SearchOutlined, FileSearchOutlined } from '@ant-design/icons'; +import { Button, Tag, Input, Slider, Spin, Select, Flex } from 'antd'; import type { RetrieveRecord } from '~/api/dify-dataset/type'; -import { retrieveDataset } from '~/api/dify-dataset/api/segmentApi'; +import { useRetrieveTest } from '~/hooks/dify-dataset-manager/retrieve-test'; +import type { RetrieveTestProps } from '~/types/dify-dataset-manager/retrieve-test'; -interface RetrieveTestProps { - datasetId: string; +// 颜色常量 +const colors = { + bgContainer: '#fff', + bgLayout: '#f5f5f5', + bgElevated: '#fafafa', + border: '#e8e8e8', + text: '#262626', + textSecondary: '#8c8c8c', + textTertiary: '#bfbfbf', + textQuaternary: '#d9d9d9', + fillTertiary: '#f0f0f0', +}; + +/** + * 检索结果项组件 + */ +function ResultItem({ record, index }: { record: RetrieveRecord; index: number }) { + const scorePercent = (record.score * 100).toFixed(1); + const scoreColor = record.score > 0.8 ? '#52c41a' : record.score > 0.5 ? '#faad14' : '#666'; + + return ( + + + + + {scorePercent}% + + + #{index + 1} · {record.segment.word_count} 字 · 命中 {record.segment.hit_count} 次 + + + {record.segment.document && ( + + 来源: {record.segment.document.name} + + )} + +
+ {record.segment.content.length > 500 + ? record.segment.content.substring(0, 500) + '...' + : record.segment.content} +
+ {record.segment.answer && ( + + + 答案: + + + {record.segment.answer.length > 200 + ? record.segment.answer.substring(0, 200) + '...' + : record.segment.answer} + + + )} +
+ ); } /** * 召回测试组件 - * 用于测试知识库的检索效果 */ export default function RetrieveTest({ datasetId }: RetrieveTestProps) { - const [searchQuery, setSearchQuery] = useState(''); - const [retrieveResults, setRetrieveResults] = useState([]); - const [retrieving, setRetrieving] = useState(false); - const [searchMethod, setSearchMethod] = useState('hybrid_search'); - const [topK, setTopK] = useState(5); + const { + searchQuery, + setSearchQuery, + retrieveResults, + retrieving, + searchMethod, + setSearchMethod, + topK, + setTopK, + handleRetrieve, + } = useRetrieveTest(datasetId); - /** - * 执行检索 - */ - const handleRetrieve = async () => { - if (!searchQuery.trim()) { - message.warning('请输入检索关键词'); - return; - } - - if (!datasetId) { - message.warning('知识库ID不存在'); - return; - } - - setRetrieving(true); - try { - const response = await retrieveDataset(datasetId, searchQuery, { - search_method: searchMethod as any, - top_k: topK, - }); - setRetrieveResults(response.records || []); - if (response.records?.length === 0) { - message.info('未找到匹配的结果'); - } - } catch (err: any) { - console.error('检索失败:', err); - message.error(err.message || '检索失败'); - } finally { - setRetrieving(false); - } - }; - - // 检索结果列定义 - const columns: ColumnsType = [ - { - title: '相关度', - dataIndex: 'score', - key: 'score', - width: 100, - render: (score: number) => ( - 0.8 ? 'green' : score > 0.5 ? 'orange' : 'default'}> - {(score * 100).toFixed(1)}% - - ), - }, - { - title: '内容', - key: 'content', - render: (_, record) => ( -
-
- {record.segment.content.length > 300 - ? record.segment.content.substring(0, 300) + '...' - : record.segment.content} -
- {record.segment.answer && ( -
- 答案: - {record.segment.answer.length > 150 - ? record.segment.answer.substring(0, 150) + '...' - : record.segment.answer} -
- )} -
- ), - }, - { - title: '字数', - key: 'word_count', - width: 80, - render: (_, record) => record.segment.word_count, - }, - { - title: '命中次数', - key: 'hit_count', - width: 100, - render: (_, record) => record.segment.hit_count, - }, + // 检索方式选项(只有3种) + const searchMethodOptions = [ + { label: '向量检索', value: 'semantic_search' }, + { label: '全文检索', value: 'full_text_search' }, + { label: '混合检索', value: 'hybrid_search' }, ]; return ( -
- {/* 页面标题 */} -
-

召回测试

-

- 输入查询内容,测试知识库的检索效果 -

-
+ + {/* 左侧面板 - 输入区域 */} + + {/* 标题 */} + +

+ 召回测试 +

+ + 根据给定的查询文本测试知识库召回效果 + +
- {/* 检索设置 */} - -
- } - value={searchQuery} - onChange={(e) => setSearchQuery(e.target.value)} - onPressEnter={handleRetrieve} - className="search-input" - /> - -
- -
-
- 检索方式: + {/* 查询输入区 */} + + + + 源文本 +