Merge branch 'PingChuan' into shiy-login

# Conflicts:
#	app/config/api-config.ts
This commit is contained in:
2025-12-03 12:10:21 +08:00
33 changed files with 6104 additions and 1477 deletions
+151 -20
View File
@@ -121,7 +121,7 @@ export async function toggleDocumentStatus(
}
/**
* 上传文件到知识库
* 上传文件到知识库(使用默认配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
@@ -132,19 +132,44 @@ export async function uploadDocument(
datasetId: string,
file: File,
onProgress?: (percent: number) => void
): Promise<any> {
): Promise<UploadDocumentResponse> {
return uploadDocumentWithConfig(
datasetId,
file,
{
indexing_technique: 'high_quality',
process_rule: { mode: 'automatic' },
},
onProgress
);
}
/**
* 上传文件到知识库(使用自定义配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
* @param config - 上传配置(索引模式、分段规则等)
* @param onProgress - 上传进度回调
* @returns 创建的文档信息,包含 batch 用于查询索引进度
*/
export async function uploadDocumentWithConfig(
datasetId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify({
indexing_technique: 'high_quality',
process_rule: {
mode: 'automatic',
},
}));
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 上传文档:', { datasetId, fileName: file.name });
console.log('[Dataset Client] 上传文档:', {
datasetId,
fileName: file.name,
config,
});
const response = await axios.post(
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents`,
formData,
{
@@ -200,21 +225,41 @@ export async function fetchUploadFileInfo(
return response.data;
}
/**
* 预处理规则 ID
*/
export type PreProcessingRuleId = 'remove_extra_spaces' | 'remove_urls_emails';
/**
* 预处理规则配置
*/
export interface PreProcessingRule {
id: PreProcessingRuleId;
enabled: boolean;
}
/**
* 分段配置
*/
export interface SegmentationConfig {
separator: string;
max_tokens: number;
}
/**
* 自定义处理规则
*/
export interface CustomRules {
pre_processing_rules?: PreProcessingRule[];
segmentation?: SegmentationConfig;
}
/**
* 文档处理规则配置
*/
export interface ProcessRule {
mode: 'automatic' | 'custom';
rules?: {
pre_processing_rules?: Array<{
id: 'remove_extra_spaces' | 'remove_urls_emails';
enabled: boolean;
}>;
segmentation?: {
separator: string;
max_tokens: number;
};
};
rules?: CustomRules;
}
/**
@@ -225,6 +270,45 @@ export interface UpdateDocumentSettings {
process_rule?: ProcessRule;
}
/**
* 上传文档配置参数
*/
export interface UploadDocumentConfig {
indexing_technique: 'high_quality' | 'economy';
process_rule: ProcessRule;
}
/**
* 上传文档响应
*/
export interface UploadDocumentResponse {
document: {
id: string;
position: number;
data_source_type: string;
data_source_info: {
upload_file_id: string;
};
dataset_process_rule_id: string;
name: string;
created_from: string;
created_by: string;
created_at: number;
tokens: number;
indexing_status: string;
error: string | null;
enabled: boolean;
disabled_at: number | null;
disabled_by: string | null;
archived: boolean;
display_status: string;
word_count: number;
hit_count: number;
doc_form: string;
};
batch: string;
}
/**
* 更新文档设置并重新处理
* 注意:Dify API 不直接支持修改已有文档的分段设置
@@ -252,3 +336,50 @@ export async function updateDocumentWithSettings(
);
return response.data;
}
/**
* 通过文件更新文档
* Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
*
* 用于在用户修改分段参数后,使用同一文件重新处理文档
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @param file - 文件对象(使用原上传的文件)
* @param config - 新的分段配置
* @param onProgress - 上传进度回调
* @returns 更新后的文档信息,包含新的 batch 用于查询索引进度
*/
export async function updateDocumentByFile(
datasetId: string,
documentId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 通过文件更新文档:', {
datasetId,
documentId,
fileName: file.name,
config,
});
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-by-file`,
formData,
{
withCredentials: true,
onUploadProgress: (progressEvent) => {
if (progressEvent.total && onProgress) {
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
onProgress(percent);
}
},
}
);
return response.data;
}
+9
View File
@@ -18,10 +18,19 @@ export {
deleteDocument,
toggleDocumentStatus,
uploadDocument,
uploadDocumentWithConfig,
updateDocumentByFile,
fetchIndexingStatus,
fetchUploadFileInfo,
} from './documentApi';
// 文档 API 类型
export type {
ProcessRule,
UploadDocumentConfig,
UploadDocumentResponse,
} from './documentApi';
// 分段、子分段、检索 API
export {
fetchSegments,
+2 -2
View File
@@ -332,7 +332,7 @@ export async function deleteChildChunk(
*
* @param datasetId - 知识库 ID
* @param query - 检索关键词
* @param retrievalModel - 检索模型配置
* @param retrievalModel - 检索模型配置(完整的 Dify API 格式)
* @returns 检索结果
*/
export async function retrieveDataset(
@@ -340,7 +340,7 @@ export async function retrieveDataset(
query: string,
retrievalModel?: RetrieveRequest['retrieval_model']
): Promise<RetrieveResponse> {
console.log('[Dataset Client] 检索知识库:', { datasetId, query });
console.log('[Dataset Client] 检索知识库:', { datasetId, query, retrievalModel });
const requestBody: RetrieveRequest = {
query,
+1
View File
@@ -42,6 +42,7 @@ export type {
MetadataFilterCondition,
MetadataFilteringConditions,
RetrieveRequest,
RetrieveSegment,
RetrieveRecord,
RetrieveResponse,
} from './segmentTypes';
+19 -12
View File
@@ -4,6 +4,11 @@
* @module api/dify-dataset/type/segmentTypes
*/
import type { RetrievalModel } from './datasetTypes';
// 重新导出以便其他模块使用
export type { RetrievalModel };
// ============================================================================
// 分段类型
// ============================================================================
@@ -154,25 +159,27 @@ export interface MetadataFilteringConditions {
*/
export interface RetrieveRequest {
query: string;
retrieval_model?: {
search_method: 'keyword_search' | 'semantic_search' | 'full_text_search' | 'hybrid_search';
reranking_enable?: boolean;
reranking_model?: {
reranking_provider_name: string;
reranking_model_name: string;
};
top_k?: number;
score_threshold_enabled?: boolean;
score_threshold?: number;
};
retrieval_model?: RetrievalModel;
metadata_filtering_conditions?: MetadataFilteringConditions;
}
/**
* 检索结果中的分段信息(包含关联文档)
*/
export interface RetrieveSegment extends Segment {
document?: {
id: string;
data_source_type: string;
name: string;
doc_type: string | null;
};
}
/**
* 检索结果记录
*/
export interface RetrieveRecord {
segment: Segment;
segment: RetrieveSegment;
score: number;
tsne_position?: {
x: number;
@@ -1,16 +1,10 @@
import { useState, useEffect } from 'react';
import { Form, Input, Button, Card, message, Spin } from 'antd';
import { Form, Input, Button, Card, Spin } from 'antd';
import { SaveOutlined } from '@ant-design/icons';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import { updateDatasetName } from '~/api/dify-dataset/api/datasetApi';
import { useDatasetSettings } from '~/hooks/dify-dataset-manager/dataset-settings';
import type { DatasetSettingsProps } from '~/types/dify-dataset-manager/dataset-settings';
const { TextArea } = Input;
interface DatasetSettingsProps {
dataset: Dataset | null;
onDatasetUpdated: (dataset: Dataset) => void;
}
/**
* 知识库设置组件
* 用于修改知识库名称和描述
@@ -20,70 +14,14 @@ export default function DatasetSettings({
onDatasetUpdated,
}: DatasetSettingsProps) {
const [form] = Form.useForm();
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
// 初始化表单数据
useEffect(() => {
if (dataset) {
form.setFieldsValue({
name: dataset.name,
description: dataset.description || '',
});
setHasChanges(false);
}
}, [dataset, form]);
/**
* 处理表单值变化
*/
const handleValuesChange = () => {
const values = form.getFieldsValue();
const changed =
values.name !== dataset?.name ||
values.description !== (dataset?.description || '');
setHasChanges(changed);
};
/**
* 保存设置
*/
const handleSave = async () => {
if (!dataset) {
message.error('知识库不存在');
return;
}
try {
const values = await form.validateFields();
setSaving(true);
// 目前只支持修改名称
const updatedDataset = await updateDatasetName(dataset.id, values.name);
message.success('保存成功');
onDatasetUpdated(updatedDataset);
setHasChanges(false);
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
} finally {
setSaving(false);
}
};
/**
* 重置表单
*/
const handleReset = () => {
if (dataset) {
form.setFieldsValue({
name: dataset.name,
description: dataset.description || '',
});
setHasChanges(false);
}
};
const {
saving,
hasChanges,
handleValuesChange,
handleSave,
handleReset,
} = useDatasetSettings(dataset, form, onDatasetUpdated);
if (!dataset) {
return (
@@ -1,4 +1,3 @@
import { useState, useEffect } from 'react';
import {
Input,
Button,
@@ -8,7 +7,6 @@ import {
Card,
Empty,
Spin,
message,
Divider,
Tooltip,
} from 'antd';
@@ -17,45 +15,8 @@ import {
ReloadOutlined,
EyeOutlined,
} from '@ant-design/icons';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { Segment } from '~/api/dify-dataset/type';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import { updateDocumentWithSettings } from '~/api/dify-dataset/api/documentApi';
interface DocumentDetailProps {
datasetId: string;
document: Document | null;
}
/**
* 分段设置配置
* 注意:Dify API 支持的参数有限
* - separator: ✅ 支持
* - maxTokens: ✅ 支持
* - removeExtraSpaces: ✅ 支持
* - removeUrlsEmails: ✅ 支持
* - useQASegment: ⚠️ 需要 doc_form: "qa_model"
*/
interface SegmentationSettings {
separator: string;
maxTokens: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
useQASegment: boolean;
qaLanguage: string;
}
/**
* 默认分段设置
*/
const DEFAULT_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 500,
removeExtraSpaces: true,
removeUrlsEmails: false,
useQASegment: false,
qaLanguage: 'Chinese',
};
import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail';
import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail';
/**
* 文档详情组件
@@ -65,98 +26,17 @@ export default function DocumentDetail({
datasetId,
document,
}: DocumentDetailProps) {
// 分段设置状态
const [settings, setSettings] = useState<SegmentationSettings>(DEFAULT_SETTINGS);
// 预览状态
const [previewSegments, setPreviewSegments] = useState<Segment[]>([]);
const [previewLoading, setPreviewLoading] = useState(false);
const [showPreview, setShowPreview] = useState(false);
// 保存状态
const [saving, setSaving] = useState(false);
// 当文档变化时重置设置
useEffect(() => {
if (document) {
// 可以从文档中读取已有的设置,这里使用默认值
setSettings(DEFAULT_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
}
}, [document?.id]);
/**
* 更新设置
*/
const updateSettings = (key: keyof SegmentationSettings, value: any) => {
setSettings(prev => ({ ...prev, [key]: value }));
};
/**
* 重置设置
*/
const handleReset = () => {
setSettings(DEFAULT_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
};
/**
* 预览分段
*/
const handlePreview = async () => {
if (!document) return;
setPreviewLoading(true);
setShowPreview(true);
try {
// 获取当前文档的分段作为预览
const response = await fetchSegments(datasetId, document.id, 1, 50);
setPreviewSegments(response.data || []);
if (response.data?.length === 0) {
message.info('该文档暂无分段数据');
}
} catch (err: any) {
console.error('预览分段失败:', err);
message.error(err.message || '预览失败');
} finally {
setPreviewLoading(false);
}
};
/**
* 保存并处理
*/
const handleSaveAndProcess = async () => {
if (!document) return;
setSaving(true);
try {
await updateDocumentWithSettings(datasetId, document.id, {
indexing_technique: 'high_quality',
process_rule: {
mode: 'custom',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: settings.removeExtraSpaces },
{ id: 'remove_urls_emails', enabled: settings.removeUrlsEmails },
],
segmentation: {
separator: settings.separator.replace(/\\n/g, '\n'),
max_tokens: settings.maxTokens,
},
},
},
});
message.success('设置已保存,文档正在重新处理...');
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
} finally {
setSaving(false);
}
};
const {
settings,
previewSegments,
previewLoading,
showPreview,
saving,
updateSettings,
handleReset,
handlePreview,
handleSaveAndProcess,
} = useDocumentDetail(datasetId, document);
if (!document) {
return (
@@ -1,4 +1,3 @@
import { useState } from 'react';
import {
Button,
Input,
@@ -8,10 +7,8 @@ import {
Tooltip,
Popconfirm,
Switch,
message,
Empty,
Spin,
Upload,
} from 'antd';
import {
SearchOutlined,
@@ -20,32 +17,14 @@ import {
FileTextOutlined,
CloudUploadOutlined,
EyeOutlined,
ClockCircleOutlined,
CheckCircleOutlined,
SyncOutlined,
ExclamationCircleOutlined,
PauseCircleOutlined,
} from '@ant-design/icons';
import type { ColumnsType } from 'antd/es/table';
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import { deleteDocument, toggleDocumentStatus, uploadDocument } from '~/api/dify-dataset/api/documentApi';
import { useDocumentList } from '~/hooks/dify-dataset-manager/document-list';
import type { DocumentListProps } from '~/types/dify-dataset-manager/document-list';
import DocumentUpload from './document-upload';
import '../../styles/components/dify-dataset-manager/index.css';
interface DocumentListProps {
datasetId: string;
datasetName: string;
documents: Document[];
loading: boolean;
total: number;
page: number;
pageSize: number;
onPageChange: (page: number) => void;
onDocumentDeleted: (documentId: string) => void;
onDocumentStatusChanged: (documentId: string, enabled: boolean) => void;
onRefresh: () => void;
onViewDocument?: (document: Document) => void;
}
/**
* 文档列表组件
*/
@@ -62,114 +41,24 @@ export default function DocumentList({
onRefresh,
onViewDocument,
}: DocumentListProps) {
const [searchValue, setSearchValue] = useState('');
const [uploading, setUploading] = useState(false);
const [deletingId, setDeletingId] = useState<string | null>(null);
/**
* 获取状态标签配置
*/
const getStatusConfig = (status: IndexingStatus) => {
const configs: Record<IndexingStatus, { color: string; icon: React.ReactNode; text: string }> = {
completed: { color: 'success', icon: <CheckCircleOutlined />, text: '已完成' },
indexing: { color: 'processing', icon: <SyncOutlined spin />, text: '索引中' },
waiting: { color: 'warning', icon: <ClockCircleOutlined />, text: '等待中' },
parsing: { color: 'processing', icon: <SyncOutlined spin />, text: '解析中' },
cleaning: { color: 'processing', icon: <SyncOutlined spin />, text: '清洗中' },
splitting: { color: 'processing', icon: <SyncOutlined spin />, text: '分段中' },
paused: { color: 'default', icon: <PauseCircleOutlined />, text: '已暂停' },
error: { color: 'error', icon: <ExclamationCircleOutlined />, text: '错误' },
};
return configs[status] || { color: 'default', icon: null, text: status };
};
/**
* 格式化日期
*/
const formatDate = (timestamp: number) => {
return new Date(timestamp * 1000).toLocaleString('zh-CN', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
});
};
/**
* 格式化数字
*/
const formatNumber = (num: number) => {
if (num >= 10000) {
return (num / 10000).toFixed(1) + 'w';
}
if (num >= 1000) {
return (num / 1000).toFixed(1) + 'k';
}
return num.toString();
};
/**
* 处理删除文档
*/
const handleDelete = async (documentId: string) => {
setDeletingId(documentId);
try {
await deleteDocument(datasetId, documentId);
message.success('删除成功');
onDocumentDeleted(documentId);
} catch (err: any) {
console.error('删除文档失败:', err);
message.error(err.message || '删除失败');
} finally {
setDeletingId(null);
}
};
/**
* 处理启用/禁用文档
*/
const handleToggleStatus = async (documentId: string, enabled: boolean) => {
try {
await toggleDocumentStatus(datasetId, documentId, enabled);
message.success(enabled ? '已启用' : '已禁用');
onDocumentStatusChanged(documentId, enabled);
} catch (err: any) {
console.error('切换文档状态失败:', err);
message.error(err.message || '操作失败');
}
};
/**
* 处理文件上传
*/
const handleUpload = async (file: File) => {
if (!datasetId) {
message.error('请先选择知识库');
return false;
}
setUploading(true);
try {
await uploadDocument(datasetId, file, (percent) => {
console.log('上传进度:', percent);
});
message.success('上传成功,正在处理...');
onRefresh();
} catch (err: any) {
console.error('上传文件失败:', err);
message.error(err.message || '上传失败');
} finally {
setUploading(false);
}
return false;
};
const {
searchValue,
setSearchValue,
deletingId,
showUploadPage,
getStatusConfig,
formatDate,
formatNumber,
handleDelete,
handleToggleStatus,
handleUploadClick,
handleUploadClose,
handleUploadSuccess,
filterDocuments,
} = useDocumentList(datasetId, onDocumentDeleted, onDocumentStatusChanged, onRefresh);
// 过滤文档
const filteredDocuments = documents.filter((doc) =>
doc.name.toLowerCase().includes(searchValue.toLowerCase())
);
const filteredDocuments = filterDocuments(documents);
// 表格列定义
const columns: ColumnsType<Document> = [
@@ -271,14 +160,20 @@ export default function DocumentList({
];
return (
<>
{/* 上传页面 */}
{showUploadPage ? (
<DocumentUpload
datasetId={datasetId}
onClose={handleUploadClose}
onSuccess={handleUploadSuccess}
/>
) : (
<div className="document-list-page">
{/* 页面头部 */}
<div className="page-header">
<div className="header-left">
<h1></h1>
{/* <p className="page-description">
知识库的所有文件都在这里显示,整个知识库都可以被接到 Dify 引用或通过 Chat 插件进行索引。
</p> */}
</div>
<div className="header-actions">
<Tooltip title="刷新">
@@ -288,21 +183,14 @@ export default function DocumentList({
loading={loading}
/>
</Tooltip>
<Upload
beforeUpload={handleUpload}
showUploadList={false}
accept=".txt,.md,.pdf,.docx,.doc,.csv,.xlsx,.xls"
disabled={!datasetId}
>
<Button
type="primary"
icon={<CloudUploadOutlined />}
loading={uploading}
onClick={handleUploadClick}
disabled={!datasetId}
>
</Button>
</Upload>
</div>
</div>
@@ -329,15 +217,13 @@ export default function DocumentList({
<div className="empty-state">
<Empty description={searchValue ? '未找到匹配的文档' : '暂无文档'}>
{!searchValue && (
<Upload
beforeUpload={handleUpload}
showUploadList={false}
accept=".txt,.md,.pdf,.docx,.doc,.csv,.xlsx,.xls"
<Button
type="primary"
icon={<CloudUploadOutlined />}
onClick={handleUploadClick}
>
<Button type="primary" icon={<CloudUploadOutlined />}>
</Button>
</Upload>
)}
</Empty>
</div>
@@ -381,5 +267,7 @@ export default function DocumentList({
</div>
)}
</div>
)}
</>
);
}
@@ -0,0 +1,446 @@
import {
ArrowLeftOutlined,
CheckCircleOutlined,
DeleteOutlined,
ExclamationCircleOutlined,
FileTextOutlined,
InboxOutlined,
LoadingOutlined,
QuestionCircleOutlined,
} from '@ant-design/icons';
import type { UploadFile } from 'antd';
import {
Button,
Card,
Checkbox,
Divider,
Empty,
Input,
InputNumber,
Progress,
Select,
Spin,
Tooltip,
Upload,
} from 'antd';
import { useEffect, useState } from 'react';
import type { Segment } from '~/api/dify-dataset/type';
import { useDocumentUpload } from '~/hooks/dify-dataset-manager/document-upload';
import type { DocumentUploadProps, UploadedDocument } from '~/types/dify-dataset-manager/document-upload';
import { SUPPORTED_FORMATS } from '~/types/dify-dataset-manager/document-upload';
const { Dragger } = Upload;
/**
* 文档上传组件
* 支持多文件上传,两步流程:选择文件 → 上传并配置分段
*/
export default function DocumentUpload({
datasetId,
onClose,
onSuccess,
}: DocumentUploadProps) {
const {
// 状态
step,
fileList,
uploadedDocuments,
currentSettings,
previewLoading,
// 方法
handleFileChange,
handleRemoveFile,
handleNextStep,
handleDocumentChange,
handleReprocess,
handlePrevStep,
handleGoToDocuments,
updateCurrentSettings,
// 计算属性方法
getCurrentDocument,
getCurrentProgress,
getStatusText,
isCurrentDocProcessing,
getCompletionStats,
} = useDocumentUpload(datasetId, onClose, onSuccess);
const selectedFiles = fileList.filter((f: UploadFile) => f.originFileObj).map((f: UploadFile) => f.originFileObj as File);
// 平滑进度条逻辑
const [displayPercent, setDisplayPercent] = useState(0);
const targetPercent = getCurrentProgress();
useEffect(() => {
if (targetPercent > displayPercent) {
// 如果目标进度大于当前显示进度,启动动画
const diff = targetPercent - displayPercent;
// 动态步长:差距越大跑得越快,但最小步长为1
const step = Math.max(1, Math.ceil(diff / 10));
const timer = requestAnimationFrame(() => {
setDisplayPercent(prev => Math.min(targetPercent, prev + step));
});
return () => cancelAnimationFrame(timer);
} else if (targetPercent < displayPercent && targetPercent === 0) {
// 如果目标重置为0(例如重新开始),立即重置
setDisplayPercent(0);
}
}, [targetPercent, displayPercent]);
/**
* 渲染步骤指示器(两步流程)
*/
const renderSteps = () => (
<div className="upload-steps">
<div className={`step-item ${step === 1 ? 'active' : ''} ${step > 1 ? 'completed' : ''}`}>
<span className="step-number">1</span>
<span className="step-title"></span>
</div>
<div className={`step-divider ${step > 1 ? 'completed' : ''}`}></div>
<div className={`step-item ${step === 2 ? 'active' : ''}`}>
<span className="step-number">2</span>
<span className="step-title"></span>
</div>
</div>
);
/**
* 渲染第一步:选择文件(支持多文件)
*/
const renderStep1 = () => (
<div className="upload-step-content step1">
<h2 className="step-heading"></h2>
<p className="step-description">
广
</p>
<div className="file-drop-zone">
<Dragger
fileList={fileList}
onChange={handleFileChange}
beforeUpload={() => false}
multiple={true}
accept=".txt,.md,.mdx,.pdf,.html,.htm,.xlsx,.xls,.docx,.csv,.vtt,.properties"
showUploadList={false}
>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"> <span className="upload-link"></span></p>
<p className="ant-upload-hint">
{SUPPORTED_FORMATS} 15MB
</p>
</Dragger>
</div>
{/* 已选文件列表 */}
{selectedFiles.length > 0 && (
<div className="selected-files-section">
<h3 className="section-subtitle"> ({selectedFiles.length} )</h3>
<div className="selected-files-list">
{fileList.map((file: UploadFile) => (
<div key={file.uid} className="selected-file-item">
<FileTextOutlined className="file-icon" />
<div className="file-info">
<span className="file-name">{file.name}</span>
<span className="file-size">
{file.originFileObj
? `${file.originFileObj.type?.split('/')[1]?.toUpperCase() || 'FILE'}${(file.originFileObj.size / 1024 / 1024).toFixed(2)}MB`
: ''}
</span>
</div>
<Button
type="text"
icon={<DeleteOutlined />}
onClick={() => handleRemoveFile(file)}
className="remove-file-btn"
/>
</div>
))}
</div>
</div>
)}
<div className="step-actions">
<Button
type="primary"
onClick={handleNextStep}
disabled={selectedFiles.length === 0}
className="next-btn"
>
</Button>
</div>
</div>
);
/**
* 渲染第二步:分段配置与预览
* 左侧始终显示配置面板,右侧预览框内显示进度或分段内容
*/
const renderStep2 = () => {
const currentDoc = getCurrentDocument();
const isProcessing = isCurrentDocProcessing();
const stats = getCompletionStats();
return (
<div className="upload-step-content step2">
{/* 分段配置与预览 */}
<div className="document-detail-content">
{/* 左侧设置区域 */}
<div className="settings-panel">
<div className="settings-section">
<h3 className="section-title"></h3>
{/* 分段标识符 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="系统会在遇到指定分隔符时自动分段,默认值为 \n\n(按段落分段)">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<Input
value={currentSettings.separator}
onChange={(e) => updateCurrentSettings('separator', e.target.value)}
placeholder="\n\n"
className="setting-input"
disabled={isProcessing}
/>
</div>
{/* 分段最大长度 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="指定每个分段允许的最大字符数(100-4000),超过此限制系统会强制分段">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<div className="setting-input-with-suffix">
<InputNumber
value={currentSettings.maxTokens}
onChange={(value) => updateCurrentSettings('maxTokens', value || 1024)}
min={100}
max={4000}
className="setting-input-number"
disabled={isProcessing}
/>
<span className="input-suffix">characters</span>
</div>
</div>
{/* 分段重叠长度 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="相邻分段之间重叠的字符数,有助于保持上下文连贯性">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<div className="setting-input-with-suffix">
<InputNumber
value={currentSettings.chunkOverlap}
onChange={(value) => updateCurrentSettings('chunkOverlap', value || 50)}
min={0}
max={500}
className="setting-input-number"
disabled={isProcessing}
/>
<span className="input-suffix">characters</span>
</div>
</div>
</div>
<Divider />
{/* 文本预处理规则 */}
<div className="settings-section">
<h3 className="section-title"></h3>
<div className="checkbox-group">
<Checkbox
checked={currentSettings.removeExtraSpaces}
onChange={(e) => updateCurrentSettings('removeExtraSpaces', e.target.checked)}
disabled={isProcessing}
>
</Checkbox>
<Checkbox
checked={currentSettings.removeUrlsEmails}
onChange={(e) => updateCurrentSettings('removeUrlsEmails', e.target.checked)}
disabled={isProcessing}
>
URL
</Checkbox>
</div>
</div>
<Divider />
{/* 索引方式 */}
<div className="settings-section">
<h3 className="section-title"></h3>
<div className="index-options">
<div
className={`index-option ${currentSettings.indexingTechnique === 'high_quality' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateCurrentSettings('indexingTechnique', 'high_quality')}
>
<span className="option-radio"></span>
<span className="option-label"></span>
<span className="option-badge recommended"></span>
</div>
<div
className={`index-option ${currentSettings.indexingTechnique === 'economy' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateCurrentSettings('indexingTechnique', 'economy')}
>
<span className="option-radio"></span>
<span className="option-label"></span>
</div>
</div>
</div>
{/* 操作按钮 */}
<div className="settings-actions">
<Button onClick={handlePrevStep} disabled={isProcessing}>
<ArrowLeftOutlined />
</Button>
<Button
type="primary"
onClick={handleReprocess}
loading={isProcessing}
disabled={isProcessing || !currentDoc?.documentId}
>
</Button>
</div>
</div>
{/* 右侧预览区域 */}
<div className="preview-panel">
<Card
title={
<div className="preview-header">
<span></span>
{uploadedDocuments.length > 0 && (
<>
<Select
value={currentDoc?.documentId || currentDoc?.file.name}
style={{ width: 500 }}
onChange={handleDocumentChange}
options={uploadedDocuments.map((doc: UploadedDocument) => ({
value: doc.documentId || doc.file.name,
label: (
<span className="file-select-option">
{doc.stage === 'completed' && <CheckCircleOutlined style={{ color: '#52c41a', marginRight: 4 }} />}
{(doc.stage === 'uploading' || doc.stage === 'indexing') && <LoadingOutlined style={{ color: '#00684a', marginRight: 4 }} />}
{doc.stage === 'error' && <ExclamationCircleOutlined style={{ color: '#ff4d4f', marginRight: 4 }} />}
{doc.file.name}
</span>
),
}))}
/>
{!isProcessing && currentDoc?.segments && (
<span className="segment-count">
{currentDoc.segments.length}
</span>
)}
</>
)}
</div>
}
className="preview-card"
>
{/* 处理进度(在预览框内显示) */}
{isProcessing ? (
<div className="preview-processing">
<div className="processing-file">
<FileTextOutlined className="file-icon" />
<span className="file-name">{currentDoc?.file.name}</span>
<LoadingOutlined className="status-icon loading" />
</div>
<Progress
percent={displayPercent}
status="active"
strokeColor={{
'0%': '#00684a',
'100%': '#52c41a',
}}
/>
<div className="status-text">{getStatusText()}</div>
</div>
) : currentDoc?.stage === 'error' ? (
<div className="preview-error">
<ExclamationCircleOutlined className="error-icon" />
<div className="error-text">{currentDoc.error || '处理失败'}</div>
</div>
) : previewLoading ? (
<div className="preview-loading">
<Spin size="large" />
<div className="loading-text">...</div>
</div>
) : (currentDoc?.segments?.length ?? 0) === 0 ? (
<div className="preview-empty">
<Empty description="等待处理完成后显示分段预览" />
</div>
) : (
<div className="preview-segments">
{currentDoc?.segments.map((segment: Segment, index: number) => (
<div key={segment.id} className="segment-item">
<div className="segment-header">
<span className="segment-index">#{index + 1}</span>
<span className="segment-chars">
{segment.word_count}
</span>
</div>
<div className="segment-content">
{segment.content}
</div>
</div>
))}
</div>
)}
</Card>
</div>
</div>
{/* 完成状态底部操作 */}
{stats.completed > 0 && (
<div className="completion-actions">
<span className="completion-stats">
{stats.completed}/{stats.total}
</span>
<Button type="primary" onClick={handleGoToDocuments}>
</Button>
</div>
)}
</div>
);
};
return (
<div className="document-upload-page">
{/* 页面头部 */}
<div className="upload-header">
<Button
type="text"
icon={<ArrowLeftOutlined />}
onClick={onClose}
className="back-btn"
>
</Button>
{renderSteps()}
</div>
{/* 内容区域 */}
<div className="upload-content">
{step === 1 && renderStep1()}
{step === 2 && renderStep2()}
</div>
</div>
);
}
+26 -162
View File
@@ -1,14 +1,10 @@
import { useEffect, useState } from 'react';
import { message, Spin } from 'antd';
import DatasetLayout, { type MenuTab } from './layout';
import { Spin } from 'antd';
import DatasetLayout from './layout';
import DocumentList from './document-list';
import DocumentDetail from './document-detail';
import RetrieveTest from './retrieve-test';
import DatasetSettings from './dataset-settings';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import { fetchDatasets } from '~/api/dify-dataset/api/datasetApi';
import { fetchDocuments } from '~/api/dify-dataset/api/documentApi';
import { useDatasetManager } from '~/hooks/dify-dataset-manager';
import '../../styles/components/dify-dataset-manager/index.css';
/**
@@ -16,162 +12,30 @@ import '../../styles/components/dify-dataset-manager/index.css';
* 带左侧菜单栏的完整布局
*/
export default function DatasetManager() {
// 知识库状态
const [dataset, setDataset] = useState<Dataset | null>(null);
const [loadingDataset, setLoadingDataset] = useState(true);
const {
// 状态
dataset,
loadingDataset,
documents,
loadingDocuments,
documentTotal,
documentPage,
documentPageSize,
inited,
error,
activeTab,
selectedDocument,
// 文档状态
const [documents, setDocuments] = useState<Document[]>([]);
const [loadingDocuments, setLoadingDocuments] = useState(false);
const [documentTotal, setDocumentTotal] = useState(0);
const [documentPage, setDocumentPage] = useState(1);
const [documentPageSize] = useState(20);
// 初始化状态
const [inited, setInited] = useState(false);
const [error, setError] = useState<string | null>(null);
// 菜单状态
const [activeTab, setActiveTab] = useState<MenuTab>('documents');
// 选中的文档(用于查看文档详情)
const [selectedDocument, setSelectedDocument] = useState<Document | null>(null);
/**
* 加载知识库(获取第一个知识库)
*/
const loadDataset = async () => {
setLoadingDataset(true);
try {
console.log('[DatasetManager] 加载知识库...');
const response = await fetchDatasets(1, 1);
console.log('[DatasetManager] 知识库响应:', response);
if (response && response.data && response.data.length > 0) {
const firstDataset = response.data[0];
setDataset(firstDataset);
// 立即加载文档
await loadDocuments(firstDataset.id, 1);
} else {
setError('未找到知识库,请先在Dify中创建知识库');
}
} catch (err: any) {
console.error('[DatasetManager] 加载知识库失败:', err);
setError(err.message || '加载知识库失败');
message.error('加载知识库失败');
} finally {
setLoadingDataset(false);
setInited(true);
}
};
/**
* 加载文档列表
*/
const loadDocuments = async (datasetId: string, page: number = 1) => {
if (!datasetId) return;
setLoadingDocuments(true);
try {
console.log('[DatasetManager] 加载文档列表:', { datasetId, page });
const response = await fetchDocuments(datasetId, page, documentPageSize);
console.log('[DatasetManager] 文档列表响应:', response);
if (response && response.data) {
setDocuments(response.data);
setDocumentTotal(response.total);
setDocumentPage(page);
}
} catch (err: any) {
console.error('[DatasetManager] 加载文档列表失败:', err);
message.error('加载文档列表失败');
} finally {
setLoadingDocuments(false);
}
};
/**
* 处理文档页码变化
*/
const handlePageChange = (page: number) => {
if (dataset) {
loadDocuments(dataset.id, page);
}
};
/**
* 处理文档删除
*/
const handleDocumentDeleted = (documentId: string) => {
setDocuments((prev) => prev.filter((doc) => doc.id !== documentId));
setDocumentTotal((prev) => prev - 1);
// 更新知识库的文档数量
if (dataset) {
setDataset({
...dataset,
document_count: dataset.document_count - 1
});
}
};
/**
* 处理文档状态变化
*/
const handleDocumentStatusChanged = (documentId: string, enabled: boolean) => {
setDocuments((prev) =>
prev.map((doc) =>
doc.id === documentId ? { ...doc, enabled } : doc
)
);
};
/**
* 刷新文档列表
*/
const handleRefresh = () => {
if (dataset) {
loadDocuments(dataset.id, documentPage);
}
};
/**
* 查看文档详情(分段管理)
*/
const handleViewDocument = (doc: Document) => {
console.log('[DatasetManager] 查看文档详情:', doc);
setSelectedDocument(doc);
};
/**
* 返回文档列表
*/
const handleBackToDocuments = () => {
setSelectedDocument(null);
};
/**
* 处理菜单切换
*/
const handleTabChange = (tab: MenuTab) => {
setActiveTab(tab);
// 切换菜单时清除选中的文档
if (tab !== 'documents') {
setSelectedDocument(null);
}
};
/**
* 处理知识库更新
*/
const handleDatasetUpdated = (updatedDataset: Dataset) => {
setDataset(updatedDataset);
};
// 初始化
useEffect(() => {
loadDataset();
}, []);
// 方法
handlePageChange,
handleDocumentDeleted,
handleDocumentStatusChanged,
handleRefresh,
handleViewDocument,
handleBackToDocuments,
handleTabChange,
handleDatasetUpdated,
} = useDatasetManager();
// 加载中状态
if (!inited || loadingDataset) {
+18 -37
View File
@@ -1,4 +1,3 @@
import { ReactNode } from 'react';
import { Button, Tooltip } from 'antd';
import {
FileTextOutlined,
@@ -7,27 +6,7 @@ import {
ArrowLeftOutlined,
DatabaseOutlined,
} from '@ant-design/icons';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
/**
* 菜单项类型
*/
export type MenuTab = 'documents' | 'retrieve' | 'settings';
interface DatasetLayoutProps {
/** 知识库信息 */
dataset: Dataset | null;
/** 当前激活的菜单 */
activeTab: MenuTab;
/** 菜单切换回调 */
onTabChange: (tab: MenuTab) => void;
/** 是否显示返回按钮(在文档详情页时显示) */
showBackButton?: boolean;
/** 返回按钮点击回调 */
onBack?: () => void;
/** 子组件 */
children: ReactNode;
}
import type { DatasetLayoutProps, MenuTab, MenuItem } from '~/types/dify-dataset-manager/layout';
/**
* 知识库布局组件
@@ -41,7 +20,7 @@ export default function DatasetLayout({
onBack,
children,
}: DatasetLayoutProps) {
const menuItems: { key: MenuTab; icon: ReactNode; label: string }[] = [
const menuItems: MenuItem[] = [
{ key: 'documents', icon: <FileTextOutlined />, label: '文档' },
{ key: 'retrieve', icon: <SearchOutlined />, label: '召回测试' },
{ key: 'settings', icon: <SettingOutlined />, label: '设置' },
@@ -51,20 +30,6 @@ export default function DatasetLayout({
<div className="dataset-layout">
{/* 左侧侧边栏 */}
<aside className="dataset-sidebar">
{/* 返回按钮 */}
{showBackButton && onBack && (
<div className="sidebar-back">
<Button
type="text"
icon={<ArrowLeftOutlined />}
onClick={onBack}
className="back-btn"
>
</Button>
</div>
)}
{/* 知识库信息 */}
<div className="sidebar-header">
<div className="dataset-icon">
@@ -102,8 +67,24 @@ export default function DatasetLayout({
{/* 右侧内容区 */}
<main className="dataset-main">
{/* 返回按钮 */}
{showBackButton && onBack && (
<div className="sidebar-back">
<Button
type="text"
icon={<ArrowLeftOutlined />}
onClick={onBack}
className="back-btn"
>
</Button>
</div>
)}
{children}
</main>
</div>
);
}
// 重新导出类型,保持向后兼容
export type { MenuTab } from '~/types/dify-dataset-manager/layout';
@@ -1,202 +1,306 @@
import { useState } from 'react';
import {
Input,
Button,
Card,
Select,
Slider,
Table,
Tag,
Empty,
Spin,
message,
} from 'antd';
import { FileSearchOutlined } from '@ant-design/icons';
import type { ColumnsType } from 'antd/es/table';
import { SearchOutlined, FileSearchOutlined } from '@ant-design/icons';
import { Button, Tag, Input, Slider, Spin, Select, Flex } from 'antd';
import type { RetrieveRecord } from '~/api/dify-dataset/type';
import { retrieveDataset } from '~/api/dify-dataset/api/segmentApi';
import { useRetrieveTest } from '~/hooks/dify-dataset-manager/retrieve-test';
import type { RetrieveTestProps } from '~/types/dify-dataset-manager/retrieve-test';
interface RetrieveTestProps {
datasetId: string;
// 颜色常量
const colors = {
bgContainer: '#fff',
bgLayout: '#f5f5f5',
bgElevated: '#fafafa',
border: '#e8e8e8',
text: '#262626',
textSecondary: '#8c8c8c',
textTertiary: '#bfbfbf',
textQuaternary: '#d9d9d9',
fillTertiary: '#f0f0f0',
};
/**
* 检索结果项组件
*/
function ResultItem({ record, index }: { record: RetrieveRecord; index: number }) {
const scorePercent = (record.score * 100).toFixed(1);
const scoreColor = record.score > 0.8 ? '#52c41a' : record.score > 0.5 ? '#faad14' : '#666';
return (
<Flex
vertical
gap={12}
style={{
padding: 16,
background: colors.bgContainer,
borderRadius: 8,
border: `1px solid ${colors.border}`,
}}
>
<Flex justify="space-between" align="center">
<Flex gap={8} align="center">
<Tag style={{ background: scoreColor, color: '#fff', border: 'none' }}>
{scorePercent}%
</Tag>
<span style={{ color: colors.textSecondary, fontSize: 12 }}>
#{index + 1} · {record.segment.word_count} · {record.segment.hit_count}
</span>
</Flex>
{record.segment.document && (
<span style={{ color: colors.textTertiary, fontSize: 12 }}>
: {record.segment.document.name}
</span>
)}
</Flex>
<div style={{
color: colors.text,
fontSize: 14,
lineHeight: 1.6,
whiteSpace: 'pre-wrap',
}}>
{record.segment.content.length > 500
? record.segment.content.substring(0, 500) + '...'
: record.segment.content}
</div>
{record.segment.answer && (
<Flex
vertical
gap={4}
style={{
padding: 12,
background: colors.fillTertiary,
borderRadius: 6,
}}
>
<span style={{ color: colors.textSecondary, fontSize: 12 }}>
:
</span>
<span style={{ color: colors.text, fontSize: 14 }}>
{record.segment.answer.length > 200
? record.segment.answer.substring(0, 200) + '...'
: record.segment.answer}
</span>
</Flex>
)}
</Flex>
);
}
/**
* 召回测试组件
* 用于测试知识库的检索效果
*/
export default function RetrieveTest({ datasetId }: RetrieveTestProps) {
const [searchQuery, setSearchQuery] = useState('');
const [retrieveResults, setRetrieveResults] = useState<RetrieveRecord[]>([]);
const [retrieving, setRetrieving] = useState(false);
const [searchMethod, setSearchMethod] = useState<string>('hybrid_search');
const [topK, setTopK] = useState<number>(5);
const {
searchQuery,
setSearchQuery,
retrieveResults,
retrieving,
searchMethod,
setSearchMethod,
topK,
setTopK,
handleRetrieve,
} = useRetrieveTest(datasetId);
/**
* 执行检索
*/
const handleRetrieve = async () => {
if (!searchQuery.trim()) {
message.warning('请输入检索关键词');
return;
}
if (!datasetId) {
message.warning('知识库ID不存在');
return;
}
setRetrieving(true);
try {
const response = await retrieveDataset(datasetId, searchQuery, {
search_method: searchMethod as any,
top_k: topK,
});
setRetrieveResults(response.records || []);
if (response.records?.length === 0) {
message.info('未找到匹配的结果');
}
} catch (err: any) {
console.error('检索失败:', err);
message.error(err.message || '检索失败');
} finally {
setRetrieving(false);
}
};
// 检索结果列定义
const columns: ColumnsType<RetrieveRecord> = [
{
title: '相关度',
dataIndex: 'score',
key: 'score',
width: 100,
render: (score: number) => (
<Tag color={score > 0.8 ? 'green' : score > 0.5 ? 'orange' : 'default'}>
{(score * 100).toFixed(1)}%
</Tag>
),
},
{
title: '内容',
key: 'content',
render: (_, record) => (
<div className="retrieve-result-content">
<div className="content-text">
{record.segment.content.length > 300
? record.segment.content.substring(0, 300) + '...'
: record.segment.content}
</div>
{record.segment.answer && (
<div className="answer-text">
<strong></strong>
{record.segment.answer.length > 150
? record.segment.answer.substring(0, 150) + '...'
: record.segment.answer}
</div>
)}
</div>
),
},
{
title: '字数',
key: 'word_count',
width: 80,
render: (_, record) => record.segment.word_count,
},
{
title: '命中次数',
key: 'hit_count',
width: 100,
render: (_, record) => record.segment.hit_count,
},
// 检索方式选项(只有3种)
const searchMethodOptions = [
{ label: '向量检索', value: 'semantic_search' },
{ label: '全文检索', value: 'full_text_search' },
{ label: '混合检索', value: 'hybrid_search' },
];
return (
<div className="retrieve-test-page">
{/* 页面标题 */}
<div className="page-header">
<h1></h1>
<p className="page-description">
</p>
</div>
<Flex
style={{
height: '100%',
minHeight: 'calc(100vh - 120px)',
}}
>
{/* 左侧面板 - 输入区域 */}
<Flex
vertical
gap={16}
style={{
width: 400,
minWidth: 400,
padding: 20,
background: colors.bgLayout,
borderRight: `1px solid ${colors.border}`,
}}
>
{/* 标题 */}
<Flex vertical gap={4}>
<h2 style={{
margin: 0,
fontSize: 18,
fontWeight: 600,
color: colors.text,
}}>
</h2>
<span style={{
fontSize: 13,
color: colors.textSecondary,
}}>
</span>
</Flex>
{/* 检索设置 */}
<Card className="retrieve-settings" size="small">
<div className="search-row">
<Input
placeholder="输入检索关键词..."
prefix={<FileSearchOutlined />}
{/* 查询输入区 */}
<Flex vertical gap={8}>
<Flex justify="space-between" align="center">
<span style={{
fontSize: 13,
color: colors.text,
fontWeight: 500,
}}>
</span>
<Select
value={searchMethod}
onChange={(value) => setSearchMethod(value as any)}
options={searchMethodOptions}
style={{ width: 130 }}
size="small"
/>
</Flex>
<Input.TextArea
placeholder="请输入文本,建议使用简短的陈述句。"
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
onPressEnter={handleRetrieve}
className="search-input"
onPressEnter={(e) => {
if (!e.shiftKey) {
e.preventDefault();
handleRetrieve();
}
}}
autoSize={{ minRows: 6, maxRows: 12 }}
style={{
background: colors.bgContainer,
resize: 'none',
}}
/>
<Flex justify="space-between" align="center">
<span style={{
fontSize: 12,
color: colors.textTertiary,
}}>
{searchQuery.length} / 200
</span>
<Button
type="primary"
icon={<SearchOutlined />}
onClick={handleRetrieve}
loading={retrieving}
>
</Button>
</div>
</Flex>
</Flex>
<div className="options-row">
<div className="option-item">
<span className="option-label"></span>
<Select
value={searchMethod}
onChange={setSearchMethod}
style={{ width: 140 }}
options={[
{ value: 'keyword_search', label: '关键词搜索' },
{ value: 'semantic_search', label: '语义搜索' },
{ value: 'full_text_search', label: '全文搜索' },
{ value: 'hybrid_search', label: '混合搜索' },
]}
/>
</div>
<div className="option-item">
<span className="option-label"> (Top K)</span>
{/* 检索设置 */}
<Flex vertical gap={12}>
<span style={{
fontSize: 13,
color: colors.text,
fontWeight: 500,
}}>
</span>
<Flex align="center" gap={12}>
<span style={{
fontSize: 13,
color: colors.textSecondary,
whiteSpace: 'nowrap',
}}>
(Top K):
</span>
<Slider
value={topK}
onChange={setTopK}
min={1}
max={20}
style={{ width: 120 }}
style={{ flex: 1 }}
/>
<span className="option-value">{topK}</span>
</div>
</div>
</Card>
<span style={{
fontSize: 13,
color: colors.text,
minWidth: 24,
textAlign: 'right',
}}>
{topK}
</span>
</Flex>
</Flex>
</Flex>
{/* 检索结果 */}
<div className="retrieve-results">
{/* 右侧面板 - 结果展示 */}
<Flex
vertical
flex={1}
gap={16}
style={{
padding: 20,
background: colors.bgElevated,
overflow: 'auto',
}}
>
{retrieving ? (
<div className="loading-state">
<Flex
flex={1}
align="center"
justify="center"
vertical
gap={12}
>
<Spin size="large" />
<div className="loading-text">...</div>
</div>
<span style={{ color: colors.textSecondary }}>
...
</span>
</Flex>
) : retrieveResults.length === 0 ? (
<Empty
description="请输入关键词进行检索"
className="empty-state"
/>
<Flex
flex={1}
align="center"
justify="center"
vertical
gap={12}
>
<FileSearchOutlined style={{
fontSize: 48,
color: colors.textQuaternary,
}} />
<span style={{ color: colors.textTertiary }}>
</span>
</Flex>
) : (
<>
<div className="results-header">
<span> {retrieveResults.length} </span>
</div>
<Table
columns={columns}
dataSource={retrieveResults}
rowKey={(record) => record.segment.id}
pagination={false}
size="small"
<Flex justify="space-between" align="center">
<span style={{
fontSize: 14,
color: colors.text,
fontWeight: 500,
}}>
</span>
<span style={{
fontSize: 13,
color: colors.textSecondary,
}}>
{retrieveResults.length}
</span>
</Flex>
<Flex vertical gap={12}>
{retrieveResults.map((record, index) => (
<ResultItem
key={record.segment.id}
record={record}
index={index}
/>
))}
</Flex>
</>
)}
</div>
</div>
</Flex>
</Flex>
);
}
+33 -14
View File
@@ -28,6 +28,13 @@ interface ApiConfig {
// 应用ID(用于登出)
appId?: string;
};
// Dify 知识库检索配置
dify: {
// Reranking 模型提供商
rerankingProviderName: string;
// Reranking 模型名称
rerankingModelName: string;
};
}
// 端口特定配置映射
@@ -37,17 +44,12 @@ const portConfigs: Record<string, Partial<ApiConfig>> = {
// 主要
// 梅州
'51703': {
// baseUrl: 'http://172.16.0.55:8073',
// documentUrl: 'http://172.16.0.55:8073/docauditai/',
// uploadUrl: 'http://172.16.0.55:8073/admin/documents',
// collaboraUrl: 'http://172.16.0.81:9980',
// appUrl: 'http://172.16.0.34:51703',
baseUrl: 'http://172.16.0.78:8073',
documentUrl: 'http://172.16.0.78:8073/docauditai/',
uploadUrl: 'http://172.16.0.78:8073/admin/documents',
baseUrl: 'http://10.79.97.17:8000',
documentUrl: 'http://10.79.97.17:8000/docauditai/',
uploadUrl: 'http://10.79.97.17:8000/admin/documents',
collaboraUrl: 'http://10.79.97.17:9980',
appUrl: 'http://10.79.97.17:51703',
collaboraUrl: 'http://172.16.0.81:9980',
appUrl: 'http://172.16.0.34:51703',
oauth: {
redirectUri: 'http://10.79.97.17:51703/callback'
@@ -127,8 +129,7 @@ const configs: Record<string, ApiConfig> = {
// uploadUrl: 'http://172.16.0.55:8073/admin/documents',
collaboraUrl: 'http://172.16.0.81:9980',
// appUrl: 'http://172.16.0.34:51709',
appUrl: 'http://172.16.0.34:5173',
appUrl: 'http://172.16.0.78:51703',
oauth: {
serverUrl: 'http://10.79.112.85', // IDaaS服务器地址
@@ -136,6 +137,10 @@ const configs: Record<string, ApiConfig> = {
clientSecret: 'none', // 需要替换为实际的Client Secret
redirectUri: 'http://10.79.97.17/', // 回调地址
appId: 'idaasoauth2' // 应用ID,用于登出
},
dify: {
rerankingProviderName: 'langgenius/tongyi/tongyi',
rerankingModelName: 'gte-rerank'
}
},
@@ -152,6 +157,10 @@ const configs: Record<string, ApiConfig> = {
clientSecret: 'placeholder', // 需要替换为实际的Client Secret
redirectUri: 'http://10.79.97.17/', // 回调地址
appId: 'idaasoauth2' // 应用ID,用于登出
},
dify: {
rerankingProviderName: 'langgenius/tongyi/tongyi',
rerankingModelName: 'gte-rerank'
}
},
@@ -173,6 +182,10 @@ const configs: Record<string, ApiConfig> = {
clientSecret: 'placeholder', // 占位符,实际值从环境变量获取
redirectUri: 'http://10.79.97.17/', // 回调地址
appId: 'idaasoauth2' // 应用ID,用于登出
},
dify: {
rerankingProviderName: 'langgenius/tongyi/tongyi',
rerankingModelName: 'gte-rerank'
}
},
@@ -189,6 +202,10 @@ const configs: Record<string, ApiConfig> = {
clientSecret: 'your_client_secret', // 需要替换为实际的Client Secret
redirectUri: 'http://172.16.0.119:3000/callback', // 回调地址
appId: 'idaasoauth2' // 应用ID,用于登出
},
dify: {
rerankingProviderName: 'langgenius/tongyi/tongyi',
rerankingModelName: 'gte-rerank'
}
}
};
@@ -238,7 +255,8 @@ const getConfigFromEnv = (defaultConfig: ApiConfig): ApiConfig => {
clientSecret: process.env.OAUTH_CLIENT_SECRET || defaultConfig.oauth.clientSecret,
redirectUri: process.env.NEXT_PUBLIC_OAUTH_REDIRECT_URI || defaultConfig.oauth.redirectUri,
appId: process.env.NEXT_PUBLIC_OAUTH_APP_ID || defaultConfig.oauth.appId
}
},
dify: defaultConfig.dify
};
};
@@ -366,7 +384,8 @@ export const {
uploadUrl: UPLOAD_URL,
collaboraUrl: COLLABORA_URL,
appUrl: APP_URL,
oauth: OAUTH_CONFIG
oauth: OAUTH_CONFIG,
dify: DIFY_CONFIG
} = apiConfig;
/**
@@ -0,0 +1,92 @@
import { useState, useEffect, useCallback } from 'react';
import { message } from 'antd';
import type { FormInstance } from 'antd';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import { updateDatasetName } from '~/api/dify-dataset/api/datasetApi';
/**
* 知识库设置状态管理 Hook
*/
export function useDatasetSettings(
dataset: Dataset | null,
form: FormInstance,
onDatasetUpdated: (dataset: Dataset) => void
) {
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
// 初始化表单数据
useEffect(() => {
if (dataset) {
form.setFieldsValue({
name: dataset.name,
description: dataset.description || '',
});
setHasChanges(false);
}
}, [dataset, form]);
/**
* 处理表单值变化
*/
const handleValuesChange = useCallback(() => {
const values = form.getFieldsValue();
const changed =
values.name !== dataset?.name ||
values.description !== (dataset?.description || '');
setHasChanges(changed);
}, [form, dataset]);
/**
* 保存设置
*/
const handleSave = useCallback(async () => {
if (!dataset) {
message.error('知识库不存在');
return;
}
try {
const values = await form.validateFields();
setSaving(true);
// 目前只支持修改名称
const updatedDataset = await updateDatasetName(dataset.id, values.name);
message.success('保存成功');
onDatasetUpdated(updatedDataset);
setHasChanges(false);
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
} finally {
setSaving(false);
}
}, [dataset, form, onDatasetUpdated]);
/**
* 重置表单
*/
const handleReset = useCallback(() => {
if (dataset) {
form.setFieldsValue({
name: dataset.name,
description: dataset.description || '',
});
setHasChanges(false);
}
}, [dataset, form]);
return {
// 状态
saving,
hasChanges,
// 方法
handleValuesChange,
handleSave,
handleReset,
};
}
export type UseDatasetSettingsReturn = ReturnType<typeof useDatasetSettings>;
@@ -0,0 +1,123 @@
import { message } from 'antd';
import { useCallback, useEffect, useState } from 'react';
import { updateDocumentWithSettings } from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import type { Segment } from '~/api/dify-dataset/type';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { DocumentDetailSegmentationSettings } from '~/types/dify-dataset-manager/document-detail';
import { DEFAULT_DOCUMENT_DETAIL_SETTINGS } from '~/types/dify-dataset-manager/document-detail';
/**
* 文档详情状态管理 Hook
*/
export function useDocumentDetail(datasetId: string, document: Document | null) {
// 分段设置状态
const [settings, setSettings] = useState<DocumentDetailSegmentationSettings>(DEFAULT_DOCUMENT_DETAIL_SETTINGS);
// 预览状态
const [previewSegments, setPreviewSegments] = useState<Segment[]>([]);
const [previewLoading, setPreviewLoading] = useState(false);
const [showPreview, setShowPreview] = useState(false);
// 保存状态
const [saving, setSaving] = useState(false);
// 当文档变化时重置设置
useEffect(() => {
if (document) {
// 可以从文档中读取已有的设置,这里使用默认值
setSettings(DEFAULT_DOCUMENT_DETAIL_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
}
}, [document?.id]);
/**
* 更新设置
*/
const updateSettings = useCallback((key: keyof DocumentDetailSegmentationSettings, value: any) => {
setSettings(prev => ({ ...prev, [key]: value }));
}, []);
/**
* 重置设置
*/
const handleReset = useCallback(() => {
setSettings(DEFAULT_DOCUMENT_DETAIL_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
}, []);
/**
* 预览分段
*/
const handlePreview = useCallback(async () => {
if (!document) return;
setPreviewLoading(true);
setShowPreview(true);
try {
// 获取当前文档的分段作为预览
const response = await fetchSegments(datasetId, document.id, 1, 50);
setPreviewSegments(response.data || []);
if (response.data?.length === 0) {
message.info('该文档暂无分段数据');
}
} catch (err: any) {
console.error('预览分段失败:', err);
message.error(err.message || '预览失败');
} finally {
setPreviewLoading(false);
}
}, [datasetId, document]);
/**
* 保存并处理
*/
const handleSaveAndProcess = useCallback(async () => {
if (!document) return;
setSaving(true);
try {
await updateDocumentWithSettings(datasetId, document.id, {
indexing_technique: 'high_quality',
process_rule: {
mode: 'custom',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: settings.removeExtraSpaces },
{ id: 'remove_urls_emails', enabled: settings.removeUrlsEmails },
],
segmentation: {
separator: settings.separator.replace(/\\n/g, '\n'),
max_tokens: settings.maxTokens,
},
},
},
});
message.success('设置已保存,文档正在重新处理...');
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
} finally {
setSaving(false);
}
}, [datasetId, document, settings]);
return {
// 状态
settings,
previewSegments,
previewLoading,
showPreview,
saving,
// 方法
updateSettings,
handleReset,
handlePreview,
handleSaveAndProcess,
};
}
export type UseDocumentDetailReturn = ReturnType<typeof useDocumentDetail>;
@@ -0,0 +1,156 @@
import { useState, useCallback } from 'react';
import { message } from 'antd';
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import { deleteDocument, toggleDocumentStatus } from '~/api/dify-dataset/api/documentApi';
import {
CheckCircleOutlined,
SyncOutlined,
ClockCircleOutlined,
PauseCircleOutlined,
ExclamationCircleOutlined,
} from '@ant-design/icons';
import type { StatusConfig } from '~/types/dify-dataset-manager/document-list';
/**
* 文档列表状态管理 Hook
*/
export function useDocumentList(
datasetId: string,
onDocumentDeleted: (documentId: string) => void,
onDocumentStatusChanged: (documentId: string, enabled: boolean) => void,
onRefresh: () => void
) {
const [searchValue, setSearchValue] = useState('');
const [deletingId, setDeletingId] = useState<string | null>(null);
const [showUploadPage, setShowUploadPage] = useState(false);
/**
* 获取状态标签配置
*/
const getStatusConfig = useCallback((status: IndexingStatus): StatusConfig => {
const configs: Record<IndexingStatus, StatusConfig> = {
completed: { color: 'success', icon: <CheckCircleOutlined />, text: '已完成' },
indexing: { color: 'processing', icon: <SyncOutlined spin />, text: '索引中' },
waiting: { color: 'warning', icon: <ClockCircleOutlined />, text: '等待中' },
parsing: { color: 'processing', icon: <SyncOutlined spin />, text: '解析中' },
cleaning: { color: 'processing', icon: <SyncOutlined spin />, text: '清洗中' },
splitting: { color: 'processing', icon: <SyncOutlined spin />, text: '分段中' },
paused: { color: 'default', icon: <PauseCircleOutlined />, text: '已暂停' },
error: { color: 'error', icon: <ExclamationCircleOutlined />, text: '错误' },
};
return configs[status] || { color: 'default', icon: null, text: status };
}, []);
/**
* 格式化日期
*/
const formatDate = useCallback((timestamp: number) => {
return new Date(timestamp * 1000).toLocaleString('zh-CN', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
});
}, []);
/**
* 格式化数字
*/
const formatNumber = useCallback((num: number) => {
if (num >= 10000) {
return (num / 10000).toFixed(1) + 'w';
}
if (num >= 1000) {
return (num / 1000).toFixed(1) + 'k';
}
return num.toString();
}, []);
/**
* 处理删除文档
*/
const handleDelete = useCallback(async (documentId: string) => {
setDeletingId(documentId);
try {
await deleteDocument(datasetId, documentId);
message.success('删除成功');
onDocumentDeleted(documentId);
} catch (err: any) {
console.error('删除文档失败:', err);
message.error(err.message || '删除失败');
} finally {
setDeletingId(null);
}
}, [datasetId, onDocumentDeleted]);
/**
* 处理启用/禁用文档
*/
const handleToggleStatus = useCallback(async (documentId: string, enabled: boolean) => {
try {
await toggleDocumentStatus(datasetId, documentId, enabled);
message.success(enabled ? '已启用' : '已禁用');
onDocumentStatusChanged(documentId, enabled);
} catch (err: any) {
console.error('切换文档状态失败:', err);
message.error(err.message || '操作失败');
}
}, [datasetId, onDocumentStatusChanged]);
/**
* 点击上传按钮,显示上传页面
*/
const handleUploadClick = useCallback(() => {
if (!datasetId) {
message.error('请先选择知识库');
return;
}
setShowUploadPage(true);
}, [datasetId]);
/**
* 关闭上传页面
*/
const handleUploadClose = useCallback(() => {
setShowUploadPage(false);
}, []);
/**
* 上传成功回调
*/
const handleUploadSuccess = useCallback(() => {
setShowUploadPage(false);
onRefresh();
}, [onRefresh]);
/**
* 过滤文档
*/
const filterDocuments = useCallback((documents: Document[]) => {
return documents.filter((doc) =>
doc.name.toLowerCase().includes(searchValue.toLowerCase())
);
}, [searchValue]);
return {
// 状态
searchValue,
setSearchValue,
deletingId,
showUploadPage,
// 方法
getStatusConfig,
formatDate,
formatNumber,
handleDelete,
handleToggleStatus,
handleUploadClick,
handleUploadClose,
handleUploadSuccess,
filterDocuments,
};
}
export type UseDocumentListReturn = ReturnType<typeof useDocumentList>;
@@ -0,0 +1,533 @@
import type { UploadFile, UploadProps } from 'antd';
import { message } from 'antd';
import { useCallback, useEffect, useRef, useState } from 'react';
import {
fetchIndexingStatus,
updateDocumentByFile,
uploadDocumentWithConfig,
} from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import type {
DocumentStage,
SegmentationSettings,
UploadedDocument,
} from '~/types/dify-dataset-manager/document-upload';
import {
DEFAULT_SEGMENTATION_SETTINGS,
INDEXING_STATUS_CONFIG,
} from '~/types/dify-dataset-manager/document-upload';
/**
* 文档上传状态管理 Hook
*/
export function useDocumentUpload(datasetId: string, onClose: () => void, onSuccess: () => void) {
// 步骤控制
const [step, setStep] = useState<1 | 2>(1);
// 文件相关
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [fileList, setFileList] = useState<UploadFile[]>([]);
// 多文档状态管理
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
// 当前选中查看的文档索引
const [currentDocIndex, setCurrentDocIndex] = useState(0);
// 当前显示的分段设置(来自当前选中的文档)
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SEGMENTATION_SETTINGS);
// 预览相关
const [previewLoading, setPreviewLoading] = useState(false);
// 轮询定时器(支持多个文档)
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 状态追赶定时器
const statusCatchUpTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 状态顺序
const STATUS_ORDER: IndexingStatus[] = ['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'];
/**
* 停止指定文档的轮询
*/
const stopPolling = useCallback((documentId: string) => {
const timer = pollingTimersRef.current.get(documentId);
if (timer) {
clearInterval(timer);
pollingTimersRef.current.delete(documentId);
}
}, []);
/**
* 停止所有轮询
*/
const stopAllPolling = useCallback(() => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
}, []);
/**
* 加载分段预览
*/
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
setPreviewLoading(true);
try {
const response = await fetchSegments(datasetId, documentId, 1, 50);
const segments = response.data || [];
// 更新对应文档的分段
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, segments } : doc
));
} catch (err: any) {
console.error('加载分段预览失败:', err);
message.error('加载分段预览失败');
} finally {
setPreviewLoading(false);
}
}, [datasetId]);
// 清理所有轮询定时器
useEffect(() => {
return () => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
statusCatchUpTimersRef.current.forEach(timer => clearTimeout(timer));
statusCatchUpTimersRef.current.clear();
};
}, []);
/**
* 状态追赶逻辑
*/
useEffect(() => {
uploadedDocuments.forEach((doc, index) => {
// 如果没有真实状态,或者已经完成/错误,或者正在追赶中(有定时器),则跳过
if (!doc.realIndexingStatus || doc.stage === 'error' || statusCatchUpTimersRef.current.has(doc.documentId)) {
return;
}
const currentIndex = STATUS_ORDER.indexOf(doc.indexingStatus);
const targetIndex = STATUS_ORDER.indexOf(doc.realIndexingStatus);
// 如果当前显示状态落后于真实状态
if (currentIndex < targetIndex) {
// 设置定时器,1秒后更新到下一个状态
const timer = setTimeout(() => {
setUploadedDocuments(prev => prev.map((d, idx) => {
if (idx !== index) return d;
const nextStatus = STATUS_ORDER[currentIndex + 1];
const isCompleted = nextStatus === 'completed';
// 如果到达完成状态,且真实状态也是完成,则触发完成逻辑
if (isCompleted && d.realIndexingStatus === 'completed') {
stopPolling(d.documentId);
// 自动加载分段预览
loadSegmentsPreview(d.documentId, index);
return { ...d, indexingStatus: nextStatus, stage: 'completed' };
}
return { ...d, indexingStatus: nextStatus };
}));
// 清除定时器引用
statusCatchUpTimersRef.current.delete(doc.documentId);
}, 1000); // 至少停留1秒
statusCatchUpTimersRef.current.set(doc.documentId, timer);
}
});
}, [uploadedDocuments, stopPolling, loadSegmentsPreview]);
/**
* 轮询索引状态
*/
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
try {
const response = await fetchIndexingStatus(datasetId, batch);
const documentStatus = response.data?.[0];
if (documentStatus) {
const realStatus = documentStatus.indexing_status as IndexingStatus;
// 更新文档状态(只更新真实状态和统计信息,显示状态由 useEffect 控制)
setUploadedDocuments(prev => prev.map((doc, idx) => {
if (idx !== docIndex) return doc;
// 如果已经是 error 状态,直接更新
if (realStatus === 'error') {
stopPolling(documentId);
return {
...doc,
stage: 'error',
error: documentStatus.error || '处理失败',
realIndexingStatus: realStatus
};
}
return {
...doc,
realIndexingStatus: realStatus,
completedSegments: documentStatus.completed_segments,
totalSegments: documentStatus.total_segments
};
}));
}
} catch (err) {
console.error('获取索引状态失败:', err);
}
}, [datasetId, stopPolling]);
/**
* 开始轮询
*/
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
// 先停止之前的轮询
stopPolling(documentId);
// 开始新的轮询
const timer = setInterval(() => {
pollIndexingStatus(batch, documentId, docIndex);
}, 2000);
pollingTimersRef.current.set(documentId, timer);
// 立即执行一次
pollIndexingStatus(batch, documentId, docIndex);
}, [stopPolling, pollIndexingStatus]);
/**
* 构建上传配置
*/
const buildConfig = useCallback((s: SegmentationSettings) => ({
indexing_technique: s.indexingTechnique,
process_rule: {
mode: 'custom' as const,
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
],
segmentation: {
separator: s.separator.replace(/\\n/g, '\n'),
max_tokens: s.maxTokens,
},
},
},
}), []);
/**
* 更新当前文档的设置
*/
const updateCurrentSettings = useCallback((key: keyof SegmentationSettings, value: any) => {
setCurrentSettings(prev => {
const newSettings = { ...prev, [key]: value };
// 同步更新到文档列表
setUploadedDocuments(prevDocs => prevDocs.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
));
return newSettings;
});
}, [currentDocIndex]);
/**
* 处理文件选择变化
*/
const handleFileChange: UploadProps['onChange'] = useCallback(({ fileList: newFileList }: { fileList: UploadFile[] }) => {
setFileList(newFileList);
// 提取实际文件对象
const files = newFileList
.filter((f: UploadFile) => f.originFileObj)
.map((f: UploadFile) => f.originFileObj as File);
setSelectedFiles(files);
}, []);
/**
* 移除文件
*/
const handleRemoveFile = useCallback((file: UploadFile) => {
setFileList(prev => {
const newFileList = prev.filter(f => f.uid !== file.uid);
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
return newFileList;
});
}, []);
/**
* 上传单个文件
*/
const uploadSingleFile = useCallback(async (file: File, index: number): Promise<void> => {
try {
// 更新状态为上传中
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
));
const config = buildConfig(DEFAULT_SEGMENTATION_SETTINGS);
const result = await uploadDocumentWithConfig(
datasetId,
file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新文档信息
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
documentId: result.document.id,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
} : doc
));
// 开始轮询索引状态
startPolling(result.batch, result.document.id, index);
} catch (err: any) {
console.error(`上传文档 ${file.name} 失败:`, err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '上传失败',
} : doc
));
}
}, [datasetId, buildConfig, startPolling]);
/**
* 点击"下一步":立即上传所有文件
*/
const handleNextStep = useCallback(async () => {
if (selectedFiles.length === 0) {
message.warning('请先选择文件');
return;
}
// 初始化所有文档状态
const docs: UploadedDocument[] = selectedFiles.map(file => ({
file,
documentId: '',
batch: '',
stage: 'pending' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
uploadProgress: 0,
settings: { ...DEFAULT_SEGMENTATION_SETTINGS },
segments: [],
}));
setUploadedDocuments(docs);
setCurrentDocIndex(0);
setCurrentSettings({ ...DEFAULT_SEGMENTATION_SETTINGS });
setStep(2);
// 依次上传所有文件
for (let i = 0; i < selectedFiles.length; i++) {
await uploadSingleFile(selectedFiles[i], i);
}
}, [selectedFiles, uploadSingleFile]);
/**
* 切换查看的文档
*/
const handleDocumentChange = useCallback((docId: string) => {
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
if (index !== -1) {
setCurrentDocIndex(index);
const doc = uploadedDocuments[index];
setCurrentSettings(doc.settings);
}
}, [uploadedDocuments]);
/**
* 修改参数后重新处理当前文档
*/
const handleReprocess = useCallback(async () => {
const currentDoc = uploadedDocuments[currentDocIndex];
if (!currentDoc || !currentDoc.documentId) return;
// 更新状态
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'uploading' as DocumentStage,
uploadProgress: 0,
segments: [],
} : doc
));
try {
const config = buildConfig(currentSettings);
const result = await updateDocumentByFile(
datasetId,
currentDoc.documentId,
currentDoc.file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新 batch
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
realIndexingStatus: 'waiting' as IndexingStatus, // 初始化真实状态
} : doc
));
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
} catch (err: any) {
console.error('重新处理失败:', err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '重新处理失败',
} : doc
));
message.error(err.message || '重新处理失败');
}
}, [uploadedDocuments, currentDocIndex, currentSettings, datasetId, buildConfig, startPolling]);
/**
* 返回上一步
*/
const handlePrevStep = useCallback(() => {
// 检查是否有文档正在处理
const hasProcessing = uploadedDocuments.some(doc =>
doc.stage === 'uploading' || doc.stage === 'indexing'
);
if (hasProcessing) {
message.warning('还有文档正在处理中,请等待完成');
return;
}
stopAllPolling();
setStep(1);
setUploadedDocuments([]);
setCurrentDocIndex(0);
setCurrentSettings(DEFAULT_SEGMENTATION_SETTINGS);
}, [uploadedDocuments, stopAllPolling]);
/**
* 返回文档列表
*/
const handleGoToDocuments = useCallback(() => {
stopAllPolling();
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
if (hasCompleted) {
onSuccess();
}
onClose();
}, [uploadedDocuments, stopAllPolling, onSuccess, onClose]);
/**
* 获取当前文档
*/
const getCurrentDocument = useCallback((): UploadedDocument | null => {
return uploadedDocuments[currentDocIndex] || null;
}, [uploadedDocuments, currentDocIndex]);
/**
* 获取当前文档的进度
*/
const getCurrentProgress = useCallback(() => {
const doc = getCurrentDocument();
if (!doc) return 0;
if (doc.stage === 'uploading') {
return doc.uploadProgress;
}
if (doc.stage === 'indexing' || doc.stage === 'completed') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
}
return 0;
}, [getCurrentDocument]);
/**
* 获取当前文档的状态文本
*/
const getStatusText = useCallback(() => {
const doc = getCurrentDocument();
if (!doc) return '';
if (doc.stage === 'uploading') {
return `正在上传... ${doc.uploadProgress}%`;
}
if (doc.stage === 'indexing') {
const baseText = INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
// 如果有分段信息,且处于分段或索引阶段,显示进度
if ((doc.indexingStatus === 'splitting' || doc.indexingStatus === 'indexing') &&
doc.totalSegments && doc.totalSegments > 0) {
return `${baseText} (${doc.completedSegments || 0}/${doc.totalSegments})`;
}
return baseText;
}
if (doc.stage === 'completed') {
return `处理完成 (${doc.totalSegments || doc.segments?.length || 0} 段)`;
}
if (doc.stage === 'error') {
return doc.error || '处理失败';
}
return '';
}, [getCurrentDocument]);
/**
* 判断当前文档是否正在处理
*/
const isCurrentDocProcessing = useCallback(() => {
const doc = getCurrentDocument();
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
}, [getCurrentDocument]);
/**
* 获取所有文档的完成状态统计
*/
const getCompletionStats = useCallback(() => {
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
const total = uploadedDocuments.length;
return { completed, total };
}, [uploadedDocuments]);
return {
// 状态
step,
selectedFiles,
fileList,
uploadedDocuments,
currentDocIndex,
currentSettings,
previewLoading,
// 方法
handleFileChange,
handleRemoveFile,
handleNextStep,
handleDocumentChange,
handleReprocess,
handlePrevStep,
handleGoToDocuments,
updateCurrentSettings,
// 计算属性方法
getCurrentDocument,
getCurrentProgress,
getStatusText,
isCurrentDocProcessing,
getCompletionStats,
};
}
export type UseDocumentUploadReturn = ReturnType<typeof useDocumentUpload>;
+202
View File
@@ -0,0 +1,202 @@
import { useState, useEffect, useCallback } from 'react';
import { message } from 'antd';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import { fetchDatasets } from '~/api/dify-dataset/api/datasetApi';
import { fetchDocuments } from '~/api/dify-dataset/api/documentApi';
import type { MenuTab } from '~/types/dify-dataset-manager/layout';
import { DEFAULT_DOCUMENT_PAGE_SIZE } from '~/types/dify-dataset-manager/index';
/**
* 知识库管理器状态管理 Hook
*/
export function useDatasetManager() {
// 知识库状态
const [dataset, setDataset] = useState<Dataset | null>(null);
const [loadingDataset, setLoadingDataset] = useState(true);
// 文档状态
const [documents, setDocuments] = useState<Document[]>([]);
const [loadingDocuments, setLoadingDocuments] = useState(false);
const [documentTotal, setDocumentTotal] = useState(0);
const [documentPage, setDocumentPage] = useState(1);
const [documentPageSize] = useState(DEFAULT_DOCUMENT_PAGE_SIZE);
// 初始化状态
const [inited, setInited] = useState(false);
const [error, setError] = useState<string | null>(null);
// 菜单状态
const [activeTab, setActiveTab] = useState<MenuTab>('documents');
// 选中的文档(用于查看文档详情)
const [selectedDocument, setSelectedDocument] = useState<Document | null>(null);
/**
* 加载文档列表
*/
const loadDocuments = useCallback(async (datasetId: string, page: number = 1) => {
if (!datasetId) return;
setLoadingDocuments(true);
try {
console.log('[DatasetManager] 加载文档列表:', { datasetId, page });
const response = await fetchDocuments(datasetId, page, documentPageSize);
console.log('[DatasetManager] 文档列表响应:', response);
if (response && response.data) {
setDocuments(response.data);
setDocumentTotal(response.total);
setDocumentPage(page);
}
} catch (err: any) {
console.error('[DatasetManager] 加载文档列表失败:', err);
message.error('加载文档列表失败');
} finally {
setLoadingDocuments(false);
}
}, [documentPageSize]);
/**
* 加载知识库(获取第一个知识库)
*/
const loadDataset = useCallback(async () => {
setLoadingDataset(true);
try {
console.log('[DatasetManager] 加载知识库...');
const response = await fetchDatasets(1, 1);
console.log('[DatasetManager] 知识库响应:', response);
if (response && response.data && response.data.length > 0) {
const firstDataset = response.data[0];
setDataset(firstDataset);
// 立即加载文档
await loadDocuments(firstDataset.id, 1);
} else {
setError('未找到知识库,请先在Dify中创建知识库');
}
} catch (err: any) {
console.error('[DatasetManager] 加载知识库失败:', err);
setError(err.message || '加载知识库失败');
message.error('加载知识库失败');
} finally {
setLoadingDataset(false);
setInited(true);
}
}, [loadDocuments]);
/**
* 处理文档页码变化
*/
const handlePageChange = useCallback((page: number) => {
if (dataset) {
loadDocuments(dataset.id, page);
}
}, [dataset, loadDocuments]);
/**
* 处理文档删除
*/
const handleDocumentDeleted = useCallback((documentId: string) => {
setDocuments((prev) => prev.filter((doc) => doc.id !== documentId));
setDocumentTotal((prev) => prev - 1);
// 更新知识库的文档数量
setDataset((prev) => {
if (prev) {
return {
...prev,
document_count: prev.document_count - 1
};
}
return prev;
});
}, []);
/**
* 处理文档状态变化
*/
const handleDocumentStatusChanged = useCallback((documentId: string, enabled: boolean) => {
setDocuments((prev) =>
prev.map((doc) =>
doc.id === documentId ? { ...doc, enabled } : doc
)
);
}, []);
/**
* 刷新文档列表
*/
const handleRefresh = useCallback(() => {
if (dataset) {
loadDocuments(dataset.id, documentPage);
}
}, [dataset, documentPage, loadDocuments]);
/**
* 查看文档详情(分段管理)
*/
const handleViewDocument = useCallback((doc: Document) => {
console.log('[DatasetManager] 查看文档详情:', doc);
setSelectedDocument(doc);
}, []);
/**
* 返回文档列表
*/
const handleBackToDocuments = useCallback(() => {
setSelectedDocument(null);
}, []);
/**
* 处理菜单切换
*/
const handleTabChange = useCallback((tab: MenuTab) => {
setActiveTab(tab);
// 切换菜单时清除选中的文档
if (tab !== 'documents') {
setSelectedDocument(null);
}
}, []);
/**
* 处理知识库更新
*/
const handleDatasetUpdated = useCallback((updatedDataset: Dataset) => {
setDataset(updatedDataset);
}, []);
// 初始化
useEffect(() => {
loadDataset();
}, [loadDataset]);
return {
// 状态
dataset,
loadingDataset,
documents,
loadingDocuments,
documentTotal,
documentPage,
documentPageSize,
inited,
error,
activeTab,
selectedDocument,
// 方法
loadDataset,
loadDocuments,
handlePageChange,
handleDocumentDeleted,
handleDocumentStatusChanged,
handleRefresh,
handleViewDocument,
handleBackToDocuments,
handleTabChange,
handleDatasetUpdated,
};
}
export type UseDatasetManagerReturn = ReturnType<typeof useDatasetManager>;
@@ -0,0 +1,90 @@
import { useState, useCallback } from 'react';
import { message } from 'antd';
import type { RetrieveRecord, RetrievalModel } from '~/api/dify-dataset/type';
import { retrieveDataset } from '~/api/dify-dataset/api/segmentApi';
import { DIFY_CONFIG } from '~/config/api-config';
import type { SearchMethod } from '~/types/dify-dataset-manager/retrieve-test';
/**
* 构建完整的 retrieval_model 参数(匹配 Dify API 规范)
* 根据检索方式启用 Reranking(语义搜索和混合搜索需要启用)
*/
function buildRetrievalModel(searchMethod: SearchMethod, topK: number): RetrievalModel {
// 语义搜索和混合搜索需要启用 Reranking
const needReranking = searchMethod === 'semantic_search' || searchMethod === 'hybrid_search';
return {
search_method: searchMethod,
reranking_enable: needReranking,
reranking_mode: needReranking ? null : null,
reranking_model: {
reranking_provider_name: DIFY_CONFIG.rerankingProviderName,
reranking_model_name: DIFY_CONFIG.rerankingModelName,
},
weights: null,
top_k: topK,
score_threshold_enabled: false,
score_threshold: null,
};
}
/**
* 召回测试状态管理 Hook
*/
export function useRetrieveTest(datasetId: string) {
const [searchQuery, setSearchQuery] = useState('');
const [retrieveResults, setRetrieveResults] = useState<RetrieveRecord[]>([]);
const [retrieving, setRetrieving] = useState(false);
// 默认使用语义搜索
const [searchMethod, setSearchMethod] = useState<SearchMethod>('semantic_search');
const [topK, setTopK] = useState<number>(5);
/**
* 执行检索
*/
const handleRetrieve = useCallback(async () => {
if (!searchQuery.trim()) {
message.warning('请输入检索关键词');
return;
}
if (!datasetId) {
message.warning('知识库ID不存在');
return;
}
setRetrieving(true);
try {
const retrievalModel = buildRetrievalModel(searchMethod, topK);
console.log('[Hook] 检索参数:', { datasetId, query: searchQuery, retrievalModel });
const response = await retrieveDataset(datasetId, searchQuery, retrievalModel);
setRetrieveResults(response.records || []);
if (response.records?.length === 0) {
message.info('未找到匹配的结果');
}
} catch (err: any) {
console.error('检索失败:', err);
message.error(err.message || '检索失败');
} finally {
setRetrieving(false);
}
}, [datasetId, searchQuery, searchMethod, topK]);
return {
// 状态
searchQuery,
setSearchQuery,
retrieveResults,
retrieving,
searchMethod,
setSearchMethod,
topK,
setTopK,
// 方法
handleRetrieve,
};
}
export type UseRetrieveTestReturn = ReturnType<typeof useRetrieveTest>;
@@ -0,0 +1,59 @@
import { type ActionFunctionArgs } from '@remix-run/node';
import { API_BASE_URL } from '~/config/api-config';
/**
* POST /api/dataset/datasets/:datasetId/documents/:documentId/update-by-file
* 通过文件更新文档(重新处理)
*/
export async function action({ request, params }: ActionFunctionArgs) {
try {
// 获取用户会话信息和 JWT
const { getUserSession } = await import("~/api/login/auth.server");
const { frontendJWT } = await getUserSession(request);
if (!frontendJWT) {
return new Response(
JSON.stringify({ error: 'JWT认证失败,请重新登录' }),
{ status: 401, headers: { 'Content-Type': 'application/json' } }
);
}
const { datasetId, documentId } = params;
if (!datasetId || !documentId) {
return new Response(
JSON.stringify({ error: '缺少 datasetId 或 documentId 参数' }),
{ status: 400, headers: { 'Content-Type': 'application/json' } }
);
}
// 获取表单数据
const formData = await request.formData();
console.log('[API] Update Document By File:', { datasetId, documentId });
// 转发请求到 FastAPI
// Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
const apiUrl = `${API_BASE_URL}/dify_dataset/datasets/${datasetId}/documents/${documentId}/update-by-file`;
const response = await fetch(apiUrl, {
method: 'POST',
headers: {
'Authorization': `Bearer ${frontendJWT}`,
},
body: formData,
});
const data = await response.json();
return new Response(JSON.stringify(data), {
status: response.status,
headers: { 'Content-Type': 'application/json' },
});
} catch (error: any) {
console.error('[API] Update Document By File - Error:', error.message);
return new Response(
JSON.stringify({ error: error.message || 'Failed to update document by file' }),
{ status: 500, headers: { 'Content-Type': 'application/json' } }
);
}
}
+1 -1
View File
@@ -1,4 +1,4 @@
import { Outlet } from "react-router-dom";
import { Outlet } from "@remix-run/react";
import {type MetaFunction} from "@remix-run/node";
export const meta: MetaFunction = () => {
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,25 @@
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
/**
* Props
*/
export interface DatasetSettingsProps {
dataset: Dataset | null;
onDatasetUpdated: (dataset: Dataset) => void;
}
/**
*
*/
export interface DatasetSettingsFormValues {
name: string;
description: string;
}
/**
*
*/
export interface DatasetSettingsState {
saving: boolean;
hasChanges: boolean;
}
@@ -0,0 +1,51 @@
import type { Segment } from '~/api/dify-dataset/type';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
/**
* Props
*/
export interface DocumentDetailProps {
datasetId: string;
document: Document | null;
}
/**
*
* Dify API
* - separator:
* - maxTokens:
* - removeExtraSpaces:
* - removeUrlsEmails:
* - useQASegment: doc_form: "qa_model"
*/
export interface DocumentDetailSegmentationSettings {
separator: string;
maxTokens: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
useQASegment: boolean;
qaLanguage: string;
}
/**
*
*/
export const DEFAULT_DOCUMENT_DETAIL_SETTINGS: DocumentDetailSegmentationSettings = {
separator: '\\n\\n',
maxTokens: 500,
removeExtraSpaces: true,
removeUrlsEmails: false,
useQASegment: false,
qaLanguage: 'Chinese',
};
/**
*
*/
export interface DocumentDetailState {
settings: DocumentDetailSegmentationSettings;
previewSegments: Segment[];
previewLoading: boolean;
showPreview: boolean;
saving: boolean;
}
@@ -0,0 +1,33 @@
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
/**
* Props
*/
export interface DocumentListProps {
datasetId: string;
datasetName: string;
documents: Document[];
loading: boolean;
total: number;
page: number;
pageSize: number;
onPageChange: (page: number) => void;
onDocumentDeleted: (documentId: string) => void;
onDocumentStatusChanged: (documentId: string, enabled: boolean) => void;
onRefresh: () => void;
onViewDocument?: (document: Document) => void;
}
/**
*
*/
export interface StatusConfig {
color: string;
icon: React.ReactNode;
text: string;
}
/**
*
*/
export type StatusConfigMap = Record<IndexingStatus, StatusConfig>;
@@ -0,0 +1,77 @@
import type { Segment } from '~/api/dify-dataset/type';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
/**
*
*/
export interface SegmentationSettings {
separator: string;
maxTokens: number;
chunkOverlap: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
indexingTechnique: 'high_quality' | 'economy';
}
/**
*
*/
export const DEFAULT_SEGMENTATION_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 1024,
chunkOverlap: 50,
removeExtraSpaces: true,
removeUrlsEmails: false,
indexingTechnique: 'high_quality',
};
/**
*
*/
export type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error';
/**
*
*/
export interface UploadedDocument {
file: File;
documentId: string;
batch: string;
stage: DocumentStage;
indexingStatus: IndexingStatus; // 显示用的状态
realIndexingStatus?: IndexingStatus; // 真实的后端状态
uploadProgress: number;
error?: string;
settings: SegmentationSettings;
segments: Segment[];
completedSegments?: number;
totalSegments?: number;
}
/**
*
*/
export const INDEXING_STATUS_CONFIG: Record<IndexingStatus, { text: string; percent: number }> = {
waiting: { text: '等待处理...', percent: 10 },
parsing: { text: '解析文档...', percent: 30 },
cleaning: { text: '清洗文本...', percent: 50 },
splitting: { text: '分段处理...', percent: 70 },
indexing: { text: '建立索引...', percent: 85 },
completed: { text: '处理完成', percent: 100 },
paused: { text: '已暂停', percent: 0 },
error: { text: '处理失败', percent: 0 },
};
/**
*
*/
export const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM';
/**
* Props
*/
export interface DocumentUploadProps {
datasetId: string;
onClose: () => void;
onSuccess: () => void;
}
+43
View File
@@ -0,0 +1,43 @@
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { MenuTab } from './layout';
/**
*
*/
export interface DatasetManagerState {
// 知识库状态
dataset: Dataset | null;
loadingDataset: boolean;
// 文档状态
documents: Document[];
loadingDocuments: boolean;
documentTotal: number;
documentPage: number;
documentPageSize: number;
// 初始化状态
inited: boolean;
error: string | null;
// 菜单状态
activeTab: MenuTab;
// 选中的文档
selectedDocument: Document | null;
}
/**
*
*/
export const DEFAULT_DOCUMENT_PAGE_SIZE = 20;
// 导出所有子模块类型
export * from './dataset-settings';
export * from './document-detail';
export * from './document-list';
export * from './document-upload';
export * from './layout';
export * from './retrieve-test';
+34
View File
@@ -0,0 +1,34 @@
import type { ReactNode } from 'react';
import type { Dataset } from '~/api/dify-dataset/type/datasetTypes';
/**
*
*/
export type MenuTab = 'documents' | 'retrieve' | 'settings';
/**
*
*/
export interface MenuItem {
key: MenuTab;
icon: ReactNode;
label: string;
}
/**
* Props
*/
export interface DatasetLayoutProps {
/** 知识库信息 */
dataset: Dataset | null;
/** 当前激活的菜单 */
activeTab: MenuTab;
/** 菜单切换回调 */
onTabChange: (tab: MenuTab) => void;
/** 是否显示返回按钮(在文档详情页时显示) */
showBackButton?: boolean;
/** 返回按钮点击回调 */
onBack?: () => void;
/** 子组件 */
children: ReactNode;
}
@@ -0,0 +1,35 @@
import type { RetrieveRecord } from '~/api/dify-dataset/type';
/**
* Props
*/
export interface RetrieveTestProps {
datasetId: string;
}
/**
*
* - semantic_search: 向量检索
* - full_text_search: 全文检索
* - hybrid_search: 混合检索
*/
export type SearchMethod = 'semantic_search' | 'full_text_search' | 'hybrid_search';
/**
*
*/
export interface RetrieveOptions {
searchMethod: SearchMethod;
topK: number;
}
/**
*
*/
export interface RetrieveTestState {
searchQuery: string;
retrieveResults: RetrieveRecord[];
retrieving: boolean;
searchMethod: SearchMethod;
topK: number;
}
+462
View File
@@ -0,0 +1,462 @@
# Dify 知识库 API 功能实现清单
## 概述
本文档对比 Dify 官方知识库 API 与当前项目已实现的功能。
- ✅ 已实现
- ❌ 未实现
- 🚫 不开放
---
## 系统架构
### 完整数据流向
```
┌─────────────────────────────────────────────────────────────────────┐
│ 第一层:React 组件 (浏览器) │
│ 位置:app/components/dify-dataset-manager/ │
│ 调用:客户端 API 函数 │
└────────────────────────────┬────────────────────────────────────────┘
│ 使用 axios 发送 HTTP 请求
│ URL: /api/dataset/...
│ 自动携带 cookies (JWT)
┌─────────────────────────────────────────────────────────────────────┐
│ 第二层:客户端 API 层 (浏览器侧) │
│ 位置:app/api/dify-dataset/api/*.ts │
│ 作用:封装 axios 请求,提供类型安全的函数接口 │
│ 请求:axios.get('/api/dataset/datasets', { withCredentials: true })│
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (浏览器 → Remix 服务器)
┌─────────────────────────────────────────────────────────────────────┐
│ 第三层:Remix 路由层 (Node.js 服务端) │
│ 位置:app/routes/api.dataset.*.tsx │
│ 作用:接收浏览器请求,验证 JWT,转发到 FastAPI │
│ 请求:fetch(`${API_BASE_URL}/dify_dataset/...`, { headers: JWT }) │
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (Remix → FastAPI)
┌─────────────────────────────────────────────────────────────────────┐
│ 第四层:FastAPI 后端代理 (Python) │
│ 位置:docauditai/routers/dify_dataset.py │
│ 作用:验证用户 JWT,添加 Dify DATASET_API_KEY,转发请求 │
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (FastAPI → Dify)
┌─────────────────────────────────────────────────────────────────────┐
│ 第五层:Dify 官方知识库 API │
│ URLhttps://api.dify.ai/v1/datasets/... │
│ 鉴权:Authorization: Bearer {DATASET_API_KEY} │
└─────────────────────────────────────────────────────────────────────┘
```
### 为什么有两层 API
| 层级 | 位置 | 执行环境 | HTTP 库 | 作用 |
|------|------|----------|---------|------|
| **客户端 API** | `app/api/dify-dataset/api/*.ts` | 浏览器 | axios | 供 React 组件调用,类型安全 |
| **Remix 路由** | `app/routes/api.dataset.*.tsx` | Node.js | fetch | 接收浏览器请求,转发到 FastAPI |
**调用链路**
```
React 组件 → 客户端 API (axios) → Remix 路由 (fetch) → FastAPI → Dify API
```
---
## 一、知识库管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取知识库列表 | /datasets | GET | ✅ | `api.dataset.datasets.tsx` | `fetchDatasets()` |
| 查看知识库详情 | /datasets/{dataset_id} | GET | ✅ | `api.dataset.datasets.$datasetId.tsx` | `fetchDataset()` |
| 创建空知识库 | /datasets | POST | ❌ | - | - |
| 修改知识库名称 | /datasets/{dataset_id} | PATCH | ✅ | `api.dataset.datasets.$datasetId.tsx` | `updateDatasetName()` |
| 删除知识库 | /datasets/{dataset_id} | DELETE | 🚫 | - | - |
**说明**
- 修改知识库:仅允许修改 `name` 字段,其他字段不开放
- 删除知识库:出于安全考虑不对用户开放
---
## 二、文档管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取文档列表 | /datasets/{id}/documents | GET | ✅ | `api.dataset.datasets.$datasetId.documents.tsx` | `fetchDocuments()` |
| 获取文档详情 | /datasets/{id}/documents/{docId} | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | `fetchDocument()` |
| 通过文件创建文档 | /datasets/{id}/document/create-by-file | POST | ✅ | `api.dataset.datasets.$datasetId.documents.tsx` | `uploadDocument()` |
| 通过文本创建文档 | /datasets/{id}/document/create-by-text | POST | ❌ | - | - |
| 通过文件更新文档 | /datasets/{id}/documents/{docId}/update-by-file | POST | ❌ | - | - |
| 通过文本更新文档 | /datasets/{id}/documents/{docId}/update-by-text | POST | ❌ | - | - |
| 删除文档 | /datasets/{id}/documents/{docId} | DELETE | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | `deleteDocument()` |
| 更新文档状态 | /datasets/{id}/documents/status/{action} | PATCH | ✅ | `api.dataset.datasets.$datasetId.documents.status.$action.tsx` | `toggleDocumentStatus()` |
| 获取文档嵌入状态 | /datasets/{id}/documents/{batch}/indexing-status | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$batch.indexing-status.tsx` | `fetchIndexingStatus()` |
| 获取上传文件信息 | /datasets/{id}/documents/{docId}/upload-file | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.upload-file.tsx` | `fetchUploadFileInfo()` |
| 索引预估(预览分段) | /datasets/{id}/indexing-estimate | POST | ✅ | `api.dataset.datasets.$datasetId.indexing-estimate.tsx` | `fetchIndexingEstimate()` |
| 重新处理文档 | /datasets/{id}/documents/reprocess | POST | ✅ | `api.dataset.datasets.$datasetId.documents.reprocess.tsx` | `reprocessDocument()` |
**说明**
- 上传文档:支持 multipart/form-data 格式
- 文档状态:action 可选值为 `enable` / `disable` / `archive` / `un_archive`
- 索引预估:用于预览分段效果,不会实际修改文档
- 重新处理文档:通过 `original_document_id` 参数使用新的分段设置重新处理已有文档
---
## 三、分段管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取分段列表 | /datasets/{id}/documents/{docId}/segments | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | `fetchSegments()` |
| 获取分段详情 | /datasets/{id}/documents/{docId}/segments/{segId} | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `fetchSegment()` |
| 新增分段 | /datasets/{id}/documents/{docId}/segments | POST | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | `createSegments()` |
| 更新分段 | /datasets/{id}/documents/{docId}/segments/{segId} | POST | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `updateSegment()` |
| 删除分段 | /datasets/{id}/documents/{docId}/segments/{segId} | DELETE | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `deleteSegment()` |
**说明**
- 新增分段:支持批量新增 `{ segments: [...] }`
- 更新分段:可更新 `content`, `answer`, `keywords`, `enabled`
---
## 四、子分段管理(父子模式)
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 查询子分段 | .../segments/{segId}/child_chunks | GET | ✅ | `...segments.$segmentId.child_chunks.tsx` | `fetchChildChunks()` |
| 新增子分段 | .../segments/{segId}/child_chunks | POST | ✅ | `...segments.$segmentId.child_chunks.tsx` | `createChildChunk()` |
| 更新子分段 | .../segments/{segId}/child_chunks/{chunkId} | PATCH | ✅ | `...segments.$segmentId.child_chunks.$childChunkId.tsx` | `updateChildChunk()` |
| 删除子分段 | .../segments/{segId}/child_chunks/{chunkId} | DELETE | ✅ | `...segments.$segmentId.child_chunks.$childChunkId.tsx` | `deleteChildChunk()` |
**说明**:子分段用于 Dify 的父子模式分段策略
---
## 五、检索功能
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 检索知识库 | /datasets/{id}/retrieve | POST | ✅ | `api.dataset.datasets.$datasetId.retrieve.tsx` | `retrieveDataset()` |
**检索参数详解**
```typescript
{
query: string; // 检索关键词
retrieval_model: {
search_method: 'keyword_search' | 'semantic_search' | 'full_text_search' | 'hybrid_search';
reranking_enable: boolean; // 是否开启 rerank
reranking_model?: object; // Rerank 模型配置
top_k: number; // 返回结果数量
score_threshold_enabled: boolean;
score_threshold: number; // 分数阈值 (0-1)
}
}
```
---
## 六、元数据管理
**当前状态:❌ 全部未实现**
| 功能 | API 端点 | 方法 | 说明 |
|------|----------|------|------|
| 新增元数据 | /datasets/{id}/metadata | POST | type, name |
| 更新元数据 | /datasets/{id}/metadata/{metaId} | PATCH | name |
| 删除元数据 | /datasets/{id}/metadata/{metaId} | DELETE | |
| 查询元数据列表 | /datasets/{id}/metadata | GET | |
| 启用/禁用内置元数据 | /datasets/{id}/metadata/built-in/{action} | POST | |
| 更新文档元数据 | /datasets/{id}/documents/metadata | POST | 批量更新 |
---
## 七、模型查询
| 功能 | API 端点 | 方法 | 状态 | 说明 |
|------|----------|------|------|------|
| 获取嵌入模型列表 | /workspaces/current/models/model-types/text-embedding | GET | ❌ | 创建知识库时需要 |
---
## 八、标签管理
**当前状态:❌ 全部未实现**
| 功能 | API 端点 | 方法 | 说明 |
|------|----------|------|------|
| 新增标签 | /datasets/tags | POST | name (最大50字符) |
| 获取标签列表 | /datasets/tags | GET | |
| 修改标签名称 | /datasets/tags | PATCH | name, tag_id |
| 删除标签 | /datasets/tags | DELETE | tag_id |
| 绑定知识库到标签 | /datasets/tags/binding | POST | tag_ids, target_id |
| 解绑知识库和标签 | /datasets/tags/unbinding | POST | tag_id, target_id |
| 查询知识库已绑定的标签 | /datasets/{id}/tags | POST | |
---
## 功能统计
| 类别 | 已实现 | 未实现 | 不开放 | 完成度 |
|------|--------|--------|--------|--------|
| 知识库管理 | 3 | 1 | 1 | 75% |
| 文档管理 | 9 | 3 | 0 | 75% |
| 分段管理 | 5 | 0 | 0 | 100% |
| 子分段管理 | 4 | 0 | 0 | 100% |
| 检索功能 | 1 | 0 | 0 | 100% |
| 元数据管理 | 0 | 6 | 0 | 0% |
| 模型查询 | 0 | 1 | 0 | 0% |
| 标签管理 | 0 | 7 | 0 | 0% |
| **总计** | **22** | **18** | **1** | **55%** |
---
## 代码文件清单
### Remix 路由层 (服务端)
所有路由文件位于 `app/routes/` 目录:
| 文件名 | HTTP 方法 | 功能 |
|--------|----------|------|
| `api.dataset.datasets.tsx` | GET | 获取知识库列表 |
| `api.dataset.datasets.$datasetId.tsx` | GET / PATCH | 知识库详情 / 修改名称 |
| `api.dataset.datasets.$datasetId.documents.tsx` | GET / POST | 文档列表 / 上传文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | GET / DELETE | 文档详情 / 删除文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.upload-file.tsx` | GET | 获取上传文件信息 |
| `api.dataset.datasets.$datasetId.documents.$batch.indexing-status.tsx` | GET | 获取嵌入状态 |
| `api.dataset.datasets.$datasetId.documents.status.$action.tsx` | PATCH | 更新文档状态 |
| `api.dataset.datasets.$datasetId.indexing-estimate.tsx` | POST | 索引预估(预览分段) |
| `api.dataset.datasets.$datasetId.documents.reprocess.tsx` | POST | 重新处理文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | GET / POST | 分段列表 / 新增分段 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | GET / POST / DELETE | 分段详情 / 更新 / 删除 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.child_chunks.tsx` | GET / POST | 子分段列表 / 新增 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.child_chunks.$childChunkId.tsx` | PATCH / DELETE | 子分段更新 / 删除 |
| `api.dataset.datasets.$datasetId.retrieve.tsx` | POST | 检索知识库 |
### 客户端 API 层 (浏览器侧)
```
app/api/dify-dataset/
├── index.ts # 统一导出
├── client.server.ts # 服务端基础请求函数(备用)
├── type/ # 类型定义
│ ├── index.ts # 类型统一导出
│ ├── commonTypes.ts # 通用类型
│ ├── datasetTypes.ts # 知识库类型
│ ├── documentTypes.ts # 文档类型
│ └── segmentTypes.ts # 分段/子分段/检索类型
└── api/ # API 调用函数
├── index.ts # 函数统一导出
├── datasetApi.ts # 知识库 API
├── documentApi.ts # 文档 API
└── segmentApi.ts # 分段/子分段/检索 API
```
### 客户端函数清单
**datasetApi.ts - 知识库管理**
```typescript
fetchDatasets(page, limit) // 获取知识库列表
fetchDataset(datasetId) // 获取知识库详情
updateDatasetName(datasetId, name) // 修改知识库名称
```
**documentApi.ts - 文档管理**
```typescript
fetchDocuments(datasetId, page, limit, keyword) // 获取文档列表
fetchDocument(datasetId, documentId) // 获取文档详情
deleteDocument(datasetId, documentId) // 删除文档
toggleDocumentStatus(datasetId, documentId, enabled) // 启用/禁用文档
uploadDocument(datasetId, file, onProgress) // 上传文档
fetchIndexingStatus(datasetId, batch) // 获取嵌入状态
fetchUploadFileInfo(datasetId, documentId) // 获取上传文件信息
fetchIndexingEstimate(datasetId, fileId, processRule, docForm, docLanguage) // 索引预估(预览分段效果)
reprocessDocument(datasetId, originalDocumentId, processRule, docForm, docLanguage) // 重新处理文档
```
**segmentApi.ts - 分段/子分段/检索**
```typescript
// 分段
fetchSegments(datasetId, documentId, page, limit, keyword)
fetchSegment(datasetId, documentId, segmentId)
createSegments(datasetId, documentId, segments)
updateSegment(datasetId, documentId, segmentId, segment)
deleteSegment(datasetId, documentId, segmentId)
toggleSegmentStatus(datasetId, documentId, segmentId, enabled)
// 子分段
fetchChildChunks(datasetId, documentId, segmentId, page, limit, keyword)
createChildChunk(datasetId, documentId, segmentId, content)
updateChildChunk(datasetId, documentId, segmentId, childChunkId, content)
deleteChildChunk(datasetId, documentId, segmentId, childChunkId)
// 检索
retrieveDataset(datasetId, query, retrievalModel)
```
### UI 组件
```
app/components/dify-dataset-manager/
├── index.tsx # 主容器组件 - 状态管理、标签页切换
├── layout.tsx # 布局组件 - 左侧菜单栏 + 右侧内容区
├── document-list.tsx # 文档列表 - 表格、搜索、上传、删除
├── document-detail.tsx # 文档详情 - 分段设置、预览块
├── retrieve-test.tsx # 召回测试 - 知识库检索测试
└── dataset-settings.tsx # 知识库设置 - 名称、描述修改
```
### 布局结构(仿 Dify 风格)
```
┌─────────────────────────────────────────────────────────────┐
│ dataset-layout │
├──────────────────┬──────────────────────────────────────────┤
│ dataset-sidebar │ dataset-main │
│ │ │
│ ┌─────────────┐ │ 根据 activeTab 渲染: │
│ │ 知识库信息 │ │ - documents → DocumentList │
│ │ (名称/数量) │ │ - documents + selectedDoc → DocumentDetail│
│ └─────────────┘ │ - retrieve → RetrieveTest │
│ │ - settings → DatasetSettings │
│ ┌─────────────┐ │ │
│ │ 文档 │ │ │
│ │ 召回测试 │ │ │
│ │ 设置 │ │ │
│ └─────────────┘ │ │
└──────────────────┴──────────────────────────────────────────┘
```
---
## 鉴权机制
### 三层认证流程
```
┌────────────────────────────────────────────────────────────────┐
│ 浏览器 → Remix 服务器 │
│ 认证方式:Cookie (会话中的 JWT) │
│ axios 配置:{ withCredentials: true } │
└────────────────────────────────────────────────────────────────┘
┌────────────────────────────────────────────────────────────────┐
│ Remix 服务器 → FastAPI │
│ 认证方式:Authorization: Bearer {frontendJWT} │
│ JWT 来源:getUserSession(request) │
└────────────────────────────────────────────────────────────────┘
┌────────────────────────────────────────────────────────────────┐
│ FastAPI → Dify API │
│ 认证方式:Authorization: Bearer {DATASET_API_KEY} │
│ API Key:服务端环境变量配置 │
└────────────────────────────────────────────────────────────────┘
```
### 关键配置
```typescript
// app/config/api-config.ts
export const API_BASE_URL = apiConfig.baseUrl; // 如:http://10.79.97.17:8000
// 根据端口自动选择配置
const portConfigs = {
'51703': { baseUrl: 'http://172.16.0.55:8073' }, // 梅州
'51704': { baseUrl: 'http://10.79.97.17:8001' }, // 云浮
'51707': { baseUrl: 'http://10.79.97.17:8004' }, // 省级
// ...
};
```
---
## 常见错误码
| code | status | message |
|------|--------|---------|
| no_file_uploaded | 400 | Please upload your file. |
| too_many_files | 400 | Only one file is allowed. |
| file_too_large | 413 | File size exceeded. |
| unsupported_file_type | 415 | File type not allowed. |
| high_quality_dataset_only | 400 | Current operation only supports 'high-quality' datasets. |
| dataset_not_initialized | 400 | The dataset is still being initialized or indexing. |
| archived_document_immutable | 403 | The archived document is not editable. |
| dataset_name_duplicate | 409 | The dataset name already exists. |
| invalid_action | 400 | Invalid action. |
| document_already_finished | 400 | The document has been processed. |
| document_indexing | 400 | The document is being processed and cannot be edited. |
| invalid_metadata | 400 | The metadata content is incorrect. |
---
## 九、文档分段设置(上传时配置)
### API 支持的分段参数
在上传文档时,可以通过 `process_rule` 参数配置分段设置:
```typescript
{
indexing_technique: 'high_quality' | 'economy',
process_rule: {
mode: 'automatic' | 'custom',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: boolean }, // 替换连续空格
{ id: 'remove_urls_emails', enabled: boolean } // 删除URL和邮件
],
segmentation: {
separator: string, // 分段标识符,如 "\\n\\n"
max_tokens: number // 分段最大长度,100-4000
}
}
}
}
```
### 功能支持情况
| 功能 | API 支持 | 参数 | 说明 |
|------|----------|------|------|
| 分段标识符 | ✅ | `separator` | 如 `\\n\\n``###` |
| 分段最大长度 | ✅ | `max_tokens` | 100-4000 |
| 替换连续空格 | ✅ | `remove_extra_spaces` | 预处理规则 |
| 删除URL和邮件 | ✅ | `remove_urls_emails` | 预处理规则 |
| 分段重叠长度 | ❌ | - | API 不支持 |
| Q&A 分段 | ⚠️ | `doc_form: "qa_model"` | 需特殊配置 |
### 重要限制
⚠️ **已有文档无法直接修改分段设置**
Dify API 不支持修改已上传文档的分段规则。如需应用新设置,必须:
1. 使用 `original_document_id` 参数重新上传文档
2. 或删除文档后重新上传
---
## 优先级建议
### 高优先级(核心功能)
1. ~~**检索知识库**~~ ✅ 已实现
2. ~~获取文档嵌入状态~~ ✅ 已实现
3. **创建空知识库** - 让用户能创建新的知识库
4. **获取嵌入模型列表** - 创建知识库时需要选择模型
### 中优先级(完善功能)
5. **通过文本创建文档** - 支持直接输入文本
6. ~~**新增分段**~~ ✅ 已实现
7. ~~修改知识库详情~~ ✅ 已实现
8. ~~获取上传文件信息~~ ✅ 已实现
### 低优先级(扩展功能)
9. ~~子分段管理系列~~ ✅ 已实现(API层)
10. 元数据管理系列
11. 标签管理系列
12. 文档更新功能
+3 -1
View File
@@ -18,6 +18,8 @@
},
"dependencies": {
"@ant-design/icons": "^5.6.1",
"@ant-design/x": "^2.0.0",
"@ant-design/x-markdown": "^2.0.0",
"@codemirror/lang-javascript": "^6.2.3",
"@codemirror/theme-one-dark": "^6.1.2",
"@monaco-editor/react": "^4.7.0",
@@ -27,7 +29,7 @@
"@types/jsonwebtoken": "^9.0.10",
"@uiw/react-codemirror": "^4.23.10",
"ahooks": "^3.8.5",
"antd": "^5.25.4",
"antd": "^6.0.0",
"axios": "^1.9.0",
"dayjs": "^1.11.13",
"diff": "^7.0.0",
+1894 -611
View File
File diff suppressed because it is too large Load Diff
+24 -2
View File
@@ -53,7 +53,7 @@ export default defineConfig({
server: {
host: '0.0.0.0',
// port: 5173,
port: Number(process.env.PORT) || 5173,
port: Number(process.env.PORT) || 51703,
open: true,
// open: false,
allowedHosts: ['nas.7bm.co', 'localhost', '127.0.0.1'], // 允许的主机名列表1
@@ -69,7 +69,29 @@ export default defineConfig({
// 防止依赖预构建时触发页面刷新导致路由中断
force: false,
// 预构建这些依赖,避免首次加载时出现重新构建
include: ['react-pdf', 'pdfjs-dist', 'dayjs', '@remix-run/node', 'react-dom', 'axios', 'dayjs/plugin/utc', '@remix-run/react', 'react-router-dom', 'jszip', 'ahooks', 'antd', 'immer', '@ant-design/icons', 'react-markdown', 'remark-math', 'remark-breaks', 'rehype-katex', 'remark-gfm'],
include: [
'react-pdf',
'pdfjs-dist',
'dayjs',
'@remix-run/node',
'react-dom',
'axios',
'dayjs/plugin/utc',
'@remix-run/react',
'react-router-dom',
'jszip',
'ahooks',
'antd',
'immer',
'@ant-design/icons',
'react-markdown',
'remark-math',
'remark-breaks',
'rehype-katex',
'remark-gfm',
// Ant Design X 相关依赖
'@ant-design/x',
],
},
// SSR 配置 - 排除只能在客户端运行的包
// ssr: {