feat:完成上传文档时调整嵌入参数模块初版

This commit is contained in:
PingChuan
2025-12-02 22:29:32 +08:00
parent 1baf99fe28
commit 0f49426a2e
7 changed files with 2637 additions and 145 deletions
+151 -20
View File
@@ -121,7 +121,7 @@ export async function toggleDocumentStatus(
}
/**
* 上传文件到知识库
* 上传文件到知识库(使用默认配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
@@ -132,19 +132,44 @@ export async function uploadDocument(
datasetId: string,
file: File,
onProgress?: (percent: number) => void
): Promise<any> {
): Promise<UploadDocumentResponse> {
return uploadDocumentWithConfig(
datasetId,
file,
{
indexing_technique: 'high_quality',
process_rule: { mode: 'automatic' },
},
onProgress
);
}
/**
* 上传文件到知识库(使用自定义配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
* @param config - 上传配置(索引模式、分段规则等)
* @param onProgress - 上传进度回调
* @returns 创建的文档信息,包含 batch 用于查询索引进度
*/
export async function uploadDocumentWithConfig(
datasetId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify({
indexing_technique: 'high_quality',
process_rule: {
mode: 'automatic',
},
}));
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 上传文档:', { datasetId, fileName: file.name });
console.log('[Dataset Client] 上传文档:', {
datasetId,
fileName: file.name,
config,
});
const response = await axios.post(
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents`,
formData,
{
@@ -200,21 +225,41 @@ export async function fetchUploadFileInfo(
return response.data;
}
/**
* 预处理规则 ID
*/
export type PreProcessingRuleId = 'remove_extra_spaces' | 'remove_urls_emails';
/**
* 预处理规则配置
*/
export interface PreProcessingRule {
id: PreProcessingRuleId;
enabled: boolean;
}
/**
* 分段配置
*/
export interface SegmentationConfig {
separator: string;
max_tokens: number;
}
/**
* 自定义处理规则
*/
export interface CustomRules {
pre_processing_rules?: PreProcessingRule[];
segmentation?: SegmentationConfig;
}
/**
* 文档处理规则配置
*/
export interface ProcessRule {
mode: 'automatic' | 'custom';
rules?: {
pre_processing_rules?: Array<{
id: 'remove_extra_spaces' | 'remove_urls_emails';
enabled: boolean;
}>;
segmentation?: {
separator: string;
max_tokens: number;
};
};
rules?: CustomRules;
}
/**
@@ -225,6 +270,45 @@ export interface UpdateDocumentSettings {
process_rule?: ProcessRule;
}
/**
* 上传文档配置参数
*/
export interface UploadDocumentConfig {
indexing_technique: 'high_quality' | 'economy';
process_rule: ProcessRule;
}
/**
* 上传文档响应
*/
export interface UploadDocumentResponse {
document: {
id: string;
position: number;
data_source_type: string;
data_source_info: {
upload_file_id: string;
};
dataset_process_rule_id: string;
name: string;
created_from: string;
created_by: string;
created_at: number;
tokens: number;
indexing_status: string;
error: string | null;
enabled: boolean;
disabled_at: number | null;
disabled_by: string | null;
archived: boolean;
display_status: string;
word_count: number;
hit_count: number;
doc_form: string;
};
batch: string;
}
/**
* 更新文档设置并重新处理
* 注意:Dify API 不直接支持修改已有文档的分段设置
@@ -252,3 +336,50 @@ export async function updateDocumentWithSettings(
);
return response.data;
}
/**
* 通过文件更新文档
* Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
*
* 用于在用户修改分段参数后,使用同一文件重新处理文档
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @param file - 文件对象(使用原上传的文件)
* @param config - 新的分段配置
* @param onProgress - 上传进度回调
* @returns 更新后的文档信息,包含新的 batch 用于查询索引进度
*/
export async function updateDocumentByFile(
datasetId: string,
documentId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 通过文件更新文档:', {
datasetId,
documentId,
fileName: file.name,
config,
});
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-by-file`,
formData,
{
withCredentials: true,
onUploadProgress: (progressEvent) => {
if (progressEvent.total && onProgress) {
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
onProgress(percent);
}
},
}
);
return response.data;
}
+9
View File
@@ -18,10 +18,19 @@ export {
deleteDocument,
toggleDocumentStatus,
uploadDocument,
uploadDocumentWithConfig,
updateDocumentByFile,
fetchIndexingStatus,
fetchUploadFileInfo,
} from './documentApi';
// 文档 API 类型
export type {
ProcessRule,
UploadDocumentConfig,
UploadDocumentResponse,
} from './documentApi';
// 分段、子分段、检索 API
export {
fetchSegments,
@@ -11,7 +11,6 @@ import {
message,
Empty,
Spin,
Upload,
} from 'antd';
import {
SearchOutlined,
@@ -28,7 +27,8 @@ import {
} from '@ant-design/icons';
import type { ColumnsType } from 'antd/es/table';
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import { deleteDocument, toggleDocumentStatus, uploadDocument } from '~/api/dify-dataset/api/documentApi';
import { deleteDocument, toggleDocumentStatus } from '~/api/dify-dataset/api/documentApi';
import DocumentUpload from './document-upload';
import '../../styles/components/dify-dataset-manager/index.css';
interface DocumentListProps {
@@ -63,9 +63,11 @@ export default function DocumentList({
onViewDocument,
}: DocumentListProps) {
const [searchValue, setSearchValue] = useState('');
const [uploading, setUploading] = useState(false);
const [deletingId, setDeletingId] = useState<string | null>(null);
// 显示上传页面的状态
const [showUploadPage, setShowUploadPage] = useState(false);
/**
* 获取状态标签配置
*/
@@ -141,29 +143,29 @@ export default function DocumentList({
};
/**
* 处理文件上传
* 点击上传按钮,显示上传页面
*/
const handleUpload = async (file: File) => {
const handleUploadClick = () => {
if (!datasetId) {
message.error('请先选择知识库');
return false;
return;
}
setShowUploadPage(true);
};
setUploading(true);
try {
await uploadDocument(datasetId, file, (percent) => {
console.log('上传进度:', percent);
});
message.success('上传成功,正在处理...');
onRefresh();
} catch (err: any) {
console.error('上传文件失败:', err);
message.error(err.message || '上传失败');
} finally {
setUploading(false);
}
/**
* 关闭上传页面
*/
const handleUploadClose = () => {
setShowUploadPage(false);
};
return false;
/**
* 上传成功回调
*/
const handleUploadSuccess = () => {
setShowUploadPage(false);
onRefresh();
};
// 过滤文档
@@ -271,115 +273,114 @@ export default function DocumentList({
];
return (
<div className="document-list-page">
{/* 页面头部 */}
<div className="page-header">
<div className="header-left">
<h1></h1>
{/* <p className="page-description">
知识库的所有文件都在这里显示,整个知识库都可以被接到 Dify 引用或通过 Chat 插件进行索引。
</p> */}
</div>
<div className="header-actions">
<Tooltip title="刷新">
<Button
icon={<ReloadOutlined />}
onClick={onRefresh}
loading={loading}
/>
</Tooltip>
<Upload
beforeUpload={handleUpload}
showUploadList={false}
accept=".txt,.md,.pdf,.docx,.doc,.csv,.xlsx,.xls"
disabled={!datasetId}
>
<Button
type="primary"
icon={<CloudUploadOutlined />}
loading={uploading}
disabled={!datasetId}
>
</Button>
</Upload>
</div>
</div>
{/* 搜索栏 */}
<div className="document-search-bar">
<Input
placeholder="搜索文档..."
prefix={<SearchOutlined />}
value={searchValue}
onChange={(e) => setSearchValue(e.target.value)}
allowClear
style={{ width: 280 }}
<>
{/* 上传页面 */}
{showUploadPage ? (
<DocumentUpload
datasetId={datasetId}
onClose={handleUploadClose}
onSuccess={handleUploadSuccess}
/>
</div>
{/* 文档表格 */}
<div className="document-table-wrapper">
{loading && documents.length === 0 ? (
<div className="loading-state">
<Spin size="large" />
<div className="loading-text">...</div>
) : (
<div className="document-list-page">
{/* 页面头部 */}
<div className="page-header">
<div className="header-left">
<h1></h1>
</div>
<div className="header-actions">
<Tooltip title="刷新">
<Button
icon={<ReloadOutlined />}
onClick={onRefresh}
loading={loading}
/>
</Tooltip>
<Button
type="primary"
icon={<CloudUploadOutlined />}
onClick={handleUploadClick}
disabled={!datasetId}
>
</Button>
</div>
</div>
) : filteredDocuments.length === 0 ? (
<div className="empty-state">
<Empty description={searchValue ? '未找到匹配的文档' : '暂无文档'}>
{!searchValue && (
<Upload
beforeUpload={handleUpload}
showUploadList={false}
accept=".txt,.md,.pdf,.docx,.doc,.csv,.xlsx,.xls"
{/* 搜索栏 */}
<div className="document-search-bar">
<Input
placeholder="搜索文档..."
prefix={<SearchOutlined />}
value={searchValue}
onChange={(e) => setSearchValue(e.target.value)}
allowClear
style={{ width: 280 }}
/>
</div>
{/* 文档表格 */}
<div className="document-table-wrapper">
{loading && documents.length === 0 ? (
<div className="loading-state">
<Spin size="large" />
<div className="loading-text">...</div>
</div>
) : filteredDocuments.length === 0 ? (
<div className="empty-state">
<Empty description={searchValue ? '未找到匹配的文档' : '暂无文档'}>
{!searchValue && (
<Button
type="primary"
icon={<CloudUploadOutlined />}
onClick={handleUploadClick}
>
</Button>
)}
</Empty>
</div>
) : (
<Table
className="document-table"
columns={columns}
dataSource={filteredDocuments}
rowKey="id"
loading={loading}
pagination={false}
size="small"
scroll={{ x: 'max-content' }}
/>
)}
</div>
{/* 底部分页器 */}
{filteredDocuments.length > 0 && (
<div className="document-pagination">
<span className="pagination-total"> {total} </span>
<div className="pagination-controls">
<Button
size="small"
disabled={page <= 1}
onClick={() => onPageChange(page - 1)}
>
<Button type="primary" icon={<CloudUploadOutlined />}>
</Button>
</Upload>
)}
</Empty>
</div>
) : (
<Table
className="document-table"
columns={columns}
dataSource={filteredDocuments}
rowKey="id"
loading={loading}
pagination={false}
size="small"
scroll={{ x: 'max-content' }}
/>
)}
</div>
{/* 底部分页器 */}
{filteredDocuments.length > 0 && (
<div className="document-pagination">
<span className="pagination-total"> {total} </span>
<div className="pagination-controls">
<Button
size="small"
disabled={page <= 1}
onClick={() => onPageChange(page - 1)}
>
</Button>
<span className="pagination-info">
{page} / {Math.ceil(total / pageSize)}
</span>
<Button
size="small"
disabled={page >= Math.ceil(total / pageSize)}
onClick={() => onPageChange(page + 1)}
>
</Button>
</div>
</Button>
<span className="pagination-info">
{page} / {Math.ceil(total / pageSize)}
</span>
<Button
size="small"
disabled={page >= Math.ceil(total / pageSize)}
onClick={() => onPageChange(page + 1)}
>
</Button>
</div>
</div>
)}
</div>
)}
</div>
</>
);
}
@@ -0,0 +1,914 @@
import { useState, useEffect, useRef, useCallback } from 'react';
import {
Input,
Button,
InputNumber,
Checkbox,
Select,
Card,
Empty,
Spin,
message,
Divider,
Tooltip,
Progress,
Upload,
} from 'antd';
import type { UploadFile, UploadProps } from 'antd';
import {
QuestionCircleOutlined,
FileTextOutlined,
CheckCircleOutlined,
LoadingOutlined,
ExclamationCircleOutlined,
InboxOutlined,
DeleteOutlined,
ArrowLeftOutlined,
} from '@ant-design/icons';
import type { IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import type { Segment } from '~/api/dify-dataset/type';
import {
uploadDocumentWithConfig,
updateDocumentByFile,
fetchIndexingStatus,
} from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
const { Dragger } = Upload;
interface DocumentUploadProps {
datasetId: string;
onClose: () => void;
onSuccess: () => void;
}
/**
* 分段设置配置
*/
interface SegmentationSettings {
separator: string;
maxTokens: number;
chunkOverlap: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
indexingTechnique: 'high_quality' | 'economy';
}
/**
* 默认分段设置
*/
const DEFAULT_SETTINGS: SegmentationSettings = {
separator: '\\n\\n',
maxTokens: 1024,
chunkOverlap: 50,
removeExtraSpaces: true,
removeUrlsEmails: false,
indexingTechnique: 'high_quality',
};
/**
* 单个文档的上传状态
*/
type DocumentStage = 'pending' | 'uploading' | 'indexing' | 'completed' | 'error';
/**
* 上传的文档信息(支持多文件)
*/
interface UploadedDocument {
file: File;
documentId: string;
batch: string;
stage: DocumentStage;
indexingStatus: IndexingStatus;
uploadProgress: number;
error?: string;
settings: SegmentationSettings;
segments: Segment[];
}
/**
* 索引状态配置
*/
const INDEXING_STATUS_CONFIG: Record<IndexingStatus, { text: string; percent: number }> = {
waiting: { text: '等待处理...', percent: 10 },
parsing: { text: '解析文档...', percent: 30 },
cleaning: { text: '清洗文本...', percent: 50 },
splitting: { text: '分段处理...', percent: 70 },
indexing: { text: '建立索引...', percent: 85 },
completed: { text: '处理完成', percent: 100 },
paused: { text: '已暂停', percent: 0 },
error: { text: '处理失败', percent: 0 },
};
/**
* 支持的文件格式
*/
const SUPPORTED_FORMATS = 'TXT, MARKDOWN, MDX, PDF, HTML, XLSX, XLS, DOCX, CSV, VTT, PROPERTIES, MD, HTM';
/**
* 文档上传组件
* 支持多文件上传,两步流程:选择文件 → 上传并配置分段
*/
export default function DocumentUpload({
datasetId,
onClose,
onSuccess,
}: DocumentUploadProps) {
// 步骤控制
const [step, setStep] = useState<1 | 2>(1);
// 文件相关
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [fileList, setFileList] = useState<UploadFile[]>([]);
// 多文档状态管理
const [uploadedDocuments, setUploadedDocuments] = useState<UploadedDocument[]>([]);
// 当前选中查看的文档索引
const [currentDocIndex, setCurrentDocIndex] = useState(0);
// 当前显示的分段设置(来自当前选中的文档)
const [currentSettings, setCurrentSettings] = useState<SegmentationSettings>(DEFAULT_SETTINGS);
// 预览相关
const [previewLoading, setPreviewLoading] = useState(false);
// 轮询定时器(支持多个文档)
const pollingTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// 清理所有轮询定时器
useEffect(() => {
return () => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
};
}, []);
/**
* 停止指定文档的轮询
*/
const stopPolling = useCallback((documentId: string) => {
const timer = pollingTimersRef.current.get(documentId);
if (timer) {
clearInterval(timer);
pollingTimersRef.current.delete(documentId);
}
}, []);
/**
* 停止所有轮询
*/
const stopAllPolling = useCallback(() => {
pollingTimersRef.current.forEach(timer => clearInterval(timer));
pollingTimersRef.current.clear();
}, []);
/**
* 加载分段预览
*/
const loadSegmentsPreview = useCallback(async (documentId: string, docIndex: number) => {
setPreviewLoading(true);
try {
const response = await fetchSegments(datasetId, documentId, 1, 50);
const segments = response.data || [];
// 更新对应文档的分段
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, segments } : doc
));
} catch (err: any) {
console.error('加载分段预览失败:', err);
message.error('加载分段预览失败');
} finally {
setPreviewLoading(false);
}
}, [datasetId]);
/**
* 轮询索引状态
*/
const pollIndexingStatus = useCallback(async (batch: string, documentId: string, docIndex: number) => {
try {
const response = await fetchIndexingStatus(datasetId, batch);
const documentStatus = response.data?.[0];
if (documentStatus) {
const status = documentStatus.indexing_status as IndexingStatus;
// 更新文档状态
setUploadedDocuments(prev => prev.map((doc, idx) => {
if (idx !== docIndex) return doc;
return { ...doc, indexingStatus: status };
}));
if (status === 'completed') {
stopPolling(documentId);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, stage: 'completed' } : doc
));
// message.success(`文档 "${uploadedDocuments[docIndex]?.file.name}" 处理完成!`);
// 自动加载分段预览
loadSegmentsPreview(documentId, docIndex);
} else if (status === 'error') {
stopPolling(documentId);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === docIndex ? { ...doc, stage: 'error', error: documentStatus.error || '处理失败' } : doc
));
}
}
} catch (err) {
console.error('获取索引状态失败:', err);
}
}, [datasetId, stopPolling, loadSegmentsPreview, uploadedDocuments]);
/**
* 开始轮询
*/
const startPolling = useCallback((batch: string, documentId: string, docIndex: number) => {
// 先停止之前的轮询
stopPolling(documentId);
// 开始新的轮询
const timer = setInterval(() => {
pollIndexingStatus(batch, documentId, docIndex);
}, 2000);
pollingTimersRef.current.set(documentId, timer);
// 立即执行一次
pollIndexingStatus(batch, documentId, docIndex);
}, [stopPolling, pollIndexingStatus]);
/**
* 构建上传配置
*/
const buildConfig = (s: SegmentationSettings) => ({
indexing_technique: s.indexingTechnique,
process_rule: {
mode: 'custom' as const,
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces' as const, enabled: s.removeExtraSpaces },
{ id: 'remove_urls_emails' as const, enabled: s.removeUrlsEmails },
],
segmentation: {
separator: s.separator.replace(/\\n/g, '\n'),
max_tokens: s.maxTokens,
},
},
},
});
/**
* 更新当前文档的设置
*/
const updateCurrentSettings = (key: keyof SegmentationSettings, value: any) => {
const newSettings = { ...currentSettings, [key]: value };
setCurrentSettings(newSettings);
// 同步更新到文档列表
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, settings: newSettings } : doc
));
};
/**
* 处理文件选择变化
*/
const handleFileChange: UploadProps['onChange'] = ({ fileList: newFileList }) => {
setFileList(newFileList);
// 提取实际文件对象
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
};
/**
* 移除文件
*/
const handleRemoveFile = (file: UploadFile) => {
const newFileList = fileList.filter(f => f.uid !== file.uid);
setFileList(newFileList);
const files = newFileList
.filter(f => f.originFileObj)
.map(f => f.originFileObj as File);
setSelectedFiles(files);
};
/**
* 上传单个文件
*/
const uploadSingleFile = async (file: File, index: number): Promise<void> => {
try {
// 更新状态为上传中
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, stage: 'uploading' as DocumentStage } : doc
));
const config = buildConfig(DEFAULT_SETTINGS);
const result = await uploadDocumentWithConfig(
datasetId,
file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新文档信息
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
documentId: result.document.id,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
} : doc
));
// 开始轮询索引状态
startPolling(result.batch, result.document.id, index);
} catch (err: any) {
console.error(`上传文档 ${file.name} 失败:`, err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === index ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '上传失败',
} : doc
));
}
};
/**
* 点击"下一步":立即上传所有文件
*/
const handleNextStep = async () => {
if (selectedFiles.length === 0) {
message.warning('请先选择文件');
return;
}
// 初始化所有文档状态
const docs: UploadedDocument[] = selectedFiles.map(file => ({
file,
documentId: '',
batch: '',
stage: 'pending' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
uploadProgress: 0,
settings: { ...DEFAULT_SETTINGS },
segments: [],
}));
setUploadedDocuments(docs);
setCurrentDocIndex(0);
setCurrentSettings({ ...DEFAULT_SETTINGS });
setStep(2);
// 依次上传所有文件
for (let i = 0; i < selectedFiles.length; i++) {
await uploadSingleFile(selectedFiles[i], i);
}
};
/**
* 切换查看的文档
*/
const handleDocumentChange = (docId: string) => {
const index = uploadedDocuments.findIndex(doc => doc.documentId === docId || doc.file.name === docId);
if (index !== -1) {
setCurrentDocIndex(index);
const doc = uploadedDocuments[index];
setCurrentSettings(doc.settings);
}
};
/**
* 修改参数后重新处理当前文档
*/
const handleReprocess = async () => {
const currentDoc = uploadedDocuments[currentDocIndex];
if (!currentDoc || !currentDoc.documentId) return;
// 更新状态
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'uploading' as DocumentStage,
uploadProgress: 0,
segments: [],
} : doc
));
try {
const config = buildConfig(currentSettings);
const result = await updateDocumentByFile(
datasetId,
currentDoc.documentId,
currentDoc.file,
config,
(percent) => {
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? { ...doc, uploadProgress: percent } : doc
));
}
);
// 更新 batch
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
batch: result.batch,
stage: 'indexing' as DocumentStage,
indexingStatus: 'waiting' as IndexingStatus,
} : doc
));
startPolling(result.batch, currentDoc.documentId, currentDocIndex);
} catch (err: any) {
console.error('重新处理失败:', err);
setUploadedDocuments(prev => prev.map((doc, idx) =>
idx === currentDocIndex ? {
...doc,
stage: 'error' as DocumentStage,
error: err.message || '重新处理失败',
} : doc
));
message.error(err.message || '重新处理失败');
}
};
/**
* 返回上一步
*/
const handlePrevStep = () => {
// 检查是否有文档正在处理
const hasProcessing = uploadedDocuments.some(doc =>
doc.stage === 'uploading' || doc.stage === 'indexing'
);
if (hasProcessing) {
message.warning('还有文档正在处理中,请等待完成');
return;
}
stopAllPolling();
setStep(1);
setUploadedDocuments([]);
setCurrentDocIndex(0);
setCurrentSettings(DEFAULT_SETTINGS);
};
/**
* 返回文档列表
*/
const handleGoToDocuments = () => {
stopAllPolling();
const hasCompleted = uploadedDocuments.some(doc => doc.stage === 'completed');
if (hasCompleted) {
onSuccess();
}
onClose();
};
/**
* 获取当前文档
*/
const getCurrentDocument = (): UploadedDocument | null => {
return uploadedDocuments[currentDocIndex] || null;
};
/**
* 获取当前文档的进度
*/
const getCurrentProgress = () => {
const doc = getCurrentDocument();
if (!doc) return 0;
if (doc.stage === 'uploading') {
return doc.uploadProgress;
}
if (doc.stage === 'indexing' || doc.stage === 'completed') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.percent || 0;
}
return 0;
};
/**
* 获取当前文档的状态文本
*/
const getStatusText = () => {
const doc = getCurrentDocument();
if (!doc) return '';
if (doc.stage === 'uploading') {
return `正在上传... ${doc.uploadProgress}%`;
}
if (doc.stage === 'indexing') {
return INDEXING_STATUS_CONFIG[doc.indexingStatus]?.text || '处理中...';
}
if (doc.stage === 'completed') {
return '处理完成';
}
if (doc.stage === 'error') {
return doc.error || '处理失败';
}
return '';
};
/**
* 判断当前文档是否正在处理
*/
const isCurrentDocProcessing = () => {
const doc = getCurrentDocument();
return doc?.stage === 'uploading' || doc?.stage === 'indexing';
};
/**
* 获取所有文档的完成状态统计
*/
const getCompletionStats = () => {
const completed = uploadedDocuments.filter(doc => doc.stage === 'completed').length;
const total = uploadedDocuments.length;
return { completed, total };
};
/**
* 渲染步骤指示器(两步流程)
*/
const renderSteps = () => (
<div className="upload-steps">
<div className={`step-item ${step === 1 ? 'active' : ''} ${step > 1 ? 'completed' : ''}`}>
<span className="step-number">1</span>
<span className="step-title"></span>
</div>
<div className={`step-divider ${step > 1 ? 'completed' : ''}`}></div>
<div className={`step-item ${step === 2 ? 'active' : ''}`}>
<span className="step-number">2</span>
<span className="step-title"></span>
</div>
</div>
);
/**
* 渲染第一步:选择文件(支持多文件)
*/
const renderStep1 = () => (
<div className="upload-step-content step1">
<h2 className="step-heading"></h2>
<p className="step-description">
广
</p>
<div className="file-drop-zone">
<Dragger
fileList={fileList}
onChange={handleFileChange}
beforeUpload={() => false}
multiple={true}
accept=".txt,.md,.mdx,.pdf,.html,.htm,.xlsx,.xls,.docx,.csv,.vtt,.properties"
showUploadList={false}
>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"> <span className="upload-link"></span></p>
<p className="ant-upload-hint">
{SUPPORTED_FORMATS} 15MB
</p>
</Dragger>
</div>
{/* 已选文件列表 */}
{selectedFiles.length > 0 && (
<div className="selected-files-section">
<h3 className="section-subtitle"> ({selectedFiles.length} )</h3>
<div className="selected-files-list">
{fileList.map((file) => (
<div key={file.uid} className="selected-file-item">
<FileTextOutlined className="file-icon" />
<div className="file-info">
<span className="file-name">{file.name}</span>
<span className="file-size">
{file.originFileObj
? `${file.originFileObj.type?.split('/')[1]?.toUpperCase() || 'FILE'}${(file.originFileObj.size / 1024 / 1024).toFixed(2)}MB`
: ''}
</span>
</div>
<Button
type="text"
icon={<DeleteOutlined />}
onClick={() => handleRemoveFile(file)}
className="remove-file-btn"
/>
</div>
))}
</div>
</div>
)}
<div className="step-actions">
<Button
type="primary"
onClick={handleNextStep}
disabled={selectedFiles.length === 0}
className="next-btn"
>
</Button>
</div>
</div>
);
/**
* 渲染第二步:分段配置与预览
* 左侧始终显示配置面板,右侧预览框内显示进度或分段内容
*/
const renderStep2 = () => {
const currentDoc = getCurrentDocument();
const isProcessing = isCurrentDocProcessing();
const stats = getCompletionStats();
return (
<div className="upload-step-content step2">
{/* 分段配置与预览 */}
<div className="document-detail-content">
{/* 左侧设置区域 */}
<div className="settings-panel">
<div className="settings-section">
<h3 className="section-title"></h3>
{/* 分段标识符 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="系统会在遇到指定分隔符时自动分段,默认值为 \n\n(按段落分段)">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<Input
value={currentSettings.separator}
onChange={(e) => updateCurrentSettings('separator', e.target.value)}
placeholder="\n\n"
className="setting-input"
disabled={isProcessing}
/>
</div>
{/* 分段最大长度 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="指定每个分段允许的最大字符数(100-4000),超过此限制系统会强制分段">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<div className="setting-input-with-suffix">
<InputNumber
value={currentSettings.maxTokens}
onChange={(value) => updateCurrentSettings('maxTokens', value || 1024)}
min={100}
max={4000}
className="setting-input-number"
disabled={isProcessing}
/>
<span className="input-suffix">characters</span>
</div>
</div>
{/* 分段重叠长度 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="相邻分段之间重叠的字符数,有助于保持上下文连贯性">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<div className="setting-input-with-suffix">
<InputNumber
value={currentSettings.chunkOverlap}
onChange={(value) => updateCurrentSettings('chunkOverlap', value || 50)}
min={0}
max={500}
className="setting-input-number"
disabled={isProcessing}
/>
<span className="input-suffix">characters</span>
</div>
</div>
</div>
<Divider />
{/* 文本预处理规则 */}
<div className="settings-section">
<h3 className="section-title"></h3>
<div className="checkbox-group">
<Checkbox
checked={currentSettings.removeExtraSpaces}
onChange={(e) => updateCurrentSettings('removeExtraSpaces', e.target.checked)}
disabled={isProcessing}
>
</Checkbox>
<Checkbox
checked={currentSettings.removeUrlsEmails}
onChange={(e) => updateCurrentSettings('removeUrlsEmails', e.target.checked)}
disabled={isProcessing}
>
URL
</Checkbox>
</div>
</div>
<Divider />
{/* 索引方式 */}
<div className="settings-section">
<h3 className="section-title"></h3>
<div className="index-options">
<div
className={`index-option ${currentSettings.indexingTechnique === 'high_quality' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateCurrentSettings('indexingTechnique', 'high_quality')}
>
<span className="option-radio"></span>
<span className="option-label"></span>
<span className="option-badge recommended"></span>
</div>
<div
className={`index-option ${currentSettings.indexingTechnique === 'economy' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateCurrentSettings('indexingTechnique', 'economy')}
>
<span className="option-radio"></span>
<span className="option-label"></span>
</div>
</div>
</div>
{/* 操作按钮 */}
<div className="settings-actions">
<Button onClick={handlePrevStep} disabled={isProcessing}>
<ArrowLeftOutlined />
</Button>
<Button
type="primary"
onClick={handleReprocess}
loading={isProcessing}
disabled={isProcessing || !currentDoc?.documentId}
>
</Button>
</div>
</div>
{/* 右侧预览区域 */}
<div className="preview-panel">
<Card
title={
<div className="preview-header">
<span></span>
{uploadedDocuments.length > 0 && (
<>
<Select
value={currentDoc?.documentId || currentDoc?.file.name}
style={{ width: 500 }}
onChange={handleDocumentChange}
options={uploadedDocuments.map((doc, idx) => ({
value: doc.documentId || doc.file.name,
label: (
<span className="file-select-option">
{doc.stage === 'completed' && <CheckCircleOutlined style={{ color: '#52c41a', marginRight: 4 }} />}
{(doc.stage === 'uploading' || doc.stage === 'indexing') && <LoadingOutlined style={{ color: '#00684a', marginRight: 4 }} />}
{doc.stage === 'error' && <ExclamationCircleOutlined style={{ color: '#ff4d4f', marginRight: 4 }} />}
{doc.file.name}
</span>
),
}))}
/>
{!isProcessing && currentDoc?.segments && (
<span className="segment-count">
{currentDoc.segments.length}
</span>
)}
</>
)}
</div>
}
className="preview-card"
>
{/* 处理进度(在预览框内显示) */}
{isProcessing ? (
<div className="preview-processing">
<div className="processing-file">
<FileTextOutlined className="file-icon" />
<span className="file-name">{currentDoc?.file.name}</span>
<LoadingOutlined className="status-icon loading" />
</div>
<Progress
percent={getCurrentProgress()}
status="active"
strokeColor={{
'0%': '#00684a',
'100%': '#52c41a',
}}
/>
<div className="status-text">{getStatusText()}</div>
{/* 索引阶段详情 */}
{currentDoc?.stage === 'indexing' && (
<div className="indexing-stages">
<div className={`stage-item ${['waiting', 'parsing', 'cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['parsing', 'cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['cleaning', 'splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['splitting', 'indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${['indexing', 'completed'].includes(currentDoc.indexingStatus) ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
<div className={`stage-item ${currentDoc.indexingStatus === 'completed' ? 'active' : ''}`}>
<span className="stage-dot"></span>
<span></span>
</div>
</div>
)}
</div>
) : currentDoc?.stage === 'error' ? (
<div className="preview-error">
<ExclamationCircleOutlined className="error-icon" />
<div className="error-text">{currentDoc.error || '处理失败'}</div>
</div>
) : previewLoading ? (
<div className="preview-loading">
<Spin size="large" />
<div className="loading-text">...</div>
</div>
) : (currentDoc?.segments?.length ?? 0) === 0 ? (
<div className="preview-empty">
<Empty description="等待处理完成后显示分段预览" />
</div>
) : (
<div className="preview-segments">
{currentDoc?.segments.map((segment, index) => (
<div key={segment.id} className="segment-item">
<div className="segment-header">
<span className="segment-index">#{index + 1}</span>
<span className="segment-chars">
{segment.word_count}
</span>
</div>
<div className="segment-content">
{segment.content}
</div>
</div>
))}
</div>
)}
</Card>
</div>
</div>
{/* 完成状态底部操作 */}
{stats.completed > 0 && (
<div className="completion-actions">
<span className="completion-stats">
{stats.completed}/{stats.total}
</span>
<Button type="primary" onClick={handleGoToDocuments}>
</Button>
</div>
)}
</div>
);
};
return (
<div className="document-upload-page">
{/* 页面头部 */}
<div className="upload-header">
<Button
type="text"
icon={<ArrowLeftOutlined />}
onClick={onClose}
className="back-btn"
>
</Button>
{renderSteps()}
</div>
{/* 内容区域 */}
<div className="upload-content">
{step === 1 && renderStep1()}
{step === 2 && renderStep2()}
</div>
</div>
);
}
@@ -0,0 +1,59 @@
import { type ActionFunctionArgs } from '@remix-run/node';
import { API_BASE_URL } from '~/config/api-config';
/**
* POST /api/dataset/datasets/:datasetId/documents/:documentId/update-by-file
* 通过文件更新文档(重新处理)
*/
export async function action({ request, params }: ActionFunctionArgs) {
try {
// 获取用户会话信息和 JWT
const { getUserSession } = await import("~/api/login/auth.server");
const { frontendJWT } = await getUserSession(request);
if (!frontendJWT) {
return new Response(
JSON.stringify({ error: 'JWT认证失败,请重新登录' }),
{ status: 401, headers: { 'Content-Type': 'application/json' } }
);
}
const { datasetId, documentId } = params;
if (!datasetId || !documentId) {
return new Response(
JSON.stringify({ error: '缺少 datasetId 或 documentId 参数' }),
{ status: 400, headers: { 'Content-Type': 'application/json' } }
);
}
// 获取表单数据
const formData = await request.formData();
console.log('[API] Update Document By File:', { datasetId, documentId });
// 转发请求到 FastAPI
// Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
const apiUrl = `${API_BASE_URL}/dify_dataset/datasets/${datasetId}/documents/${documentId}/update-by-file`;
const response = await fetch(apiUrl, {
method: 'POST',
headers: {
'Authorization': `Bearer ${frontendJWT}`,
},
body: formData,
});
const data = await response.json();
return new Response(JSON.stringify(data), {
status: response.status,
headers: { 'Content-Type': 'application/json' },
});
} catch (error: any) {
console.error('[API] Update Document By File - Error:', error.message);
return new Response(
JSON.stringify({ error: error.message || 'Failed to update document by file' }),
{ status: 500, headers: { 'Content-Type': 'application/json' } }
);
}
}
@@ -1425,3 +1425,919 @@
.segment-manager-page .anticon-spin {
color: rgb(0 104 74) !important;
}
/* ============================================================================
* 上传配置对话框样式
* ============================================================================ */
.upload-config-modal .ant-modal-header {
border-bottom: 1px solid #f0f0f0;
padding: 16px 24px;
}
.upload-config-modal .ant-modal-title {
font-size: 16px;
font-weight: 600;
color: #1a1a1a;
}
.upload-config-modal .ant-modal-body {
padding: 24px;
}
.upload-config-modal .ant-modal-footer {
border-top: 1px solid #f0f0f0;
padding: 12px 24px;
}
.upload-config-modal .ant-btn-primary {
background-color: #00684a;
border-color: #00684a;
}
.upload-config-modal .ant-btn-primary:hover {
background-color: #005a3f;
border-color: #005a3f;
}
/* 文件信息区域 */
.upload-config-form .file-info-section,
.upload-progress-container .file-info-section {
margin-bottom: 16px;
}
.upload-config-form .file-info,
.upload-progress-container .file-info {
display: flex;
align-items: center;
gap: 12px;
padding: 12px 16px;
background: #f5f7f9;
border-radius: 8px;
border: 1px solid #e5e5e5;
}
.upload-config-form .file-icon,
.upload-progress-container .file-icon {
font-size: 24px;
color: #00684a;
}
.upload-config-form .file-details,
.upload-progress-container .file-details {
display: flex;
flex-direction: column;
gap: 2px;
flex: 1;
min-width: 0;
}
.upload-config-form .file-name,
.upload-progress-container .file-name {
font-size: 14px;
font-weight: 500;
color: #1a1a1a;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.upload-config-form .file-size,
.upload-progress-container .file-size {
font-size: 12px;
color: #666;
}
/* 设置项样式 */
.upload-config-form .setting-item {
margin-bottom: 16px;
}
.upload-config-form .setting-label {
display: flex;
align-items: center;
gap: 6px;
font-size: 13px;
font-weight: 500;
color: #333;
margin-bottom: 8px;
}
.upload-config-form .help-icon {
color: #999;
font-size: 12px;
cursor: help;
}
.upload-config-form .checkbox-group {
display: flex;
flex-direction: column;
gap: 10px;
}
.upload-config-form .checkbox-group .ant-checkbox-wrapper {
color: #333;
font-size: 13px;
}
.upload-config-form .checkbox-group .ant-checkbox-checked .ant-checkbox-inner {
background-color: #00684a;
border-color: #00684a;
}
/* 进度容器 */
.upload-progress-container {
min-height: 200px;
}
.upload-progress-container .progress-section {
padding: 24px 0;
}
.upload-progress-container .status-text {
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
margin-top: 16px;
font-size: 14px;
color: #666;
}
.upload-progress-container .status-icon {
color: #00684a;
}
/* 成功状态 */
.upload-progress-container .success-state {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 24px;
}
.upload-progress-container .success-icon {
font-size: 48px;
color: #52c41a;
margin-bottom: 16px;
}
.upload-progress-container .success-text {
font-size: 16px;
font-weight: 500;
color: #1a1a1a;
}
/* 错误状态 */
.upload-progress-container .error-state {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 24px;
}
.upload-progress-container .error-icon {
font-size: 48px;
color: #ff4d4f;
margin-bottom: 16px;
}
.upload-progress-container .error-text {
font-size: 14px;
color: #ff4d4f;
text-align: center;
}
/* 索引阶段指示器 */
.upload-progress-container .indexing-stages {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 24px;
padding: 16px;
background: #f5f7f9;
border-radius: 8px;
}
.upload-progress-container .stage-item {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
color: #999;
padding: 4px 8px;
background: #fff;
border-radius: 4px;
border: 1px solid #e5e5e5;
transition: all 0.2s;
}
.upload-progress-container .stage-item.active {
color: #00684a;
border-color: #00684a;
background: rgba(0, 104, 74, 0.05);
}
.upload-progress-container .stage-dot {
width: 6px;
height: 6px;
border-radius: 50%;
background: #d9d9d9;
transition: background 0.2s;
}
.upload-progress-container .stage-item.active .stage-dot {
background: #00684a;
}
/* 进度条主题色 */
.upload-config-modal .ant-progress-bg {
background-color: #00684a;
}
.upload-config-modal .ant-progress-status-active .ant-progress-bg::before {
background: #fff;
}
/* Divider 样式 */
.upload-config-form .ant-divider {
margin: 16px 0;
border-color: #e5e5e5;
}
.upload-config-form .ant-divider-inner-text {
font-size: 13px;
font-weight: 500;
color: #666;
}
/* Select 和 Input 主题色 */
.upload-config-modal .ant-select:hover .ant-select-selector {
border-color: #00684a !important;
}
.upload-config-modal .ant-select-focused .ant-select-selector {
border-color: #00684a !important;
box-shadow: 0 0 0 2px rgba(0, 104, 74, 0.1) !important;
}
.upload-config-modal .ant-input:hover,
.upload-config-modal .ant-input-number:hover {
border-color: #00684a;
}
.upload-config-modal .ant-input:focus,
.upload-config-modal .ant-input-focused,
.upload-config-modal .ant-input-number:focus,
.upload-config-modal .ant-input-number-focused {
border-color: #00684a;
box-shadow: 0 0 0 2px rgba(0, 104, 74, 0.1);
}
/* Spin 主题色 */
.upload-config-modal .ant-spin .ant-spin-dot-item {
background-color: #00684a !important;
}
/* ============================================================================
* 文档上传页面样式
* ============================================================================ */
.document-upload-page {
display: flex;
flex-direction: column;
height: 100%;
background: #fff;
overflow: hidden;
}
/* 上传页面头部 */
.upload-header {
display: flex;
align-items: center;
justify-content: center;
padding: 16px 24px;
border-bottom: 1px solid #f0f0f0;
flex-shrink: 0;
position: relative;
}
.upload-header .back-btn {
position: absolute;
left: 24px;
color: #666;
font-size: 14px;
padding: 4px 12px;
height: auto;
}
.upload-header .back-btn:hover {
color: #00684a;
background: rgba(0, 104, 74, 0.08);
}
/* 步骤指示器 */
.upload-steps {
display: inline-flex;
align-items: center;
gap: 0;
}
.step-item {
display: inline-flex;
align-items: center;
gap: 8px;
color: #999;
font-size: 13px;
white-space: nowrap;
}
.step-item.active {
color: #00684a;
}
.step-item.completed {
color: #52c41a;
}
.step-number {
width: 24px;
height: 24px;
border-radius: 50%;
background: #f0f0f0;
display: inline-flex;
align-items: center;
justify-content: center;
font-size: 12px;
font-weight: 600;
transition: all 0.2s;
flex-shrink: 0;
}
.step-item.active .step-number {
background: #00684a;
color: #fff;
}
.step-item.completed .step-number {
background: #52c41a;
color: #fff;
}
.step-title {
font-weight: 500;
}
.step-divider {
width: 40px;
height: 1px;
background: #e5e5e5;
margin: 0 12px;
flex-shrink: 0;
}
.step-divider.completed {
background: #52c41a;
}
/* 上传内容区域 */
.upload-content {
flex: 1;
overflow-y: auto;
padding: 24px;
}
/* 第一步:选择文件 */
.upload-step-content.step1 {
max-width: 800px;
margin: 0 auto;
}
.step-heading {
font-size: 24px;
font-weight: 600;
color: #1a1a1a;
margin: 0 0 8px 0;
}
.step-description {
font-size: 14px;
color: #666;
margin: 0 0 24px 0;
line-height: 1.6;
}
/* 文件拖拽区域 */
.file-drop-zone {
margin-bottom: 24px;
}
.file-drop-zone .ant-upload-drag {
border: 2px dashed #d9d9d9;
border-radius: 12px;
background: #fafafa;
padding: 40px 20px;
transition: all 0.2s;
}
.file-drop-zone .ant-upload-drag:hover {
border-color: #00684a;
background: rgba(0, 104, 74, 0.02);
}
.file-drop-zone .ant-upload-drag-icon {
color: #999;
font-size: 48px !important;
}
.file-drop-zone .ant-upload-text {
font-size: 14px;
color: #333;
}
.file-drop-zone .upload-link {
color: #00684a;
font-weight: 500;
cursor: pointer;
}
.file-drop-zone .ant-upload-hint {
font-size: 12px;
color: #999;
margin-top: 8px;
}
/* 已选文件区域 */
.selected-files-section {
margin-bottom: 24px;
}
.section-subtitle {
font-size: 14px;
font-weight: 600;
color: #1a1a1a;
margin: 0 0 12px 0;
}
.selected-files-list {
display: flex;
flex-direction: column;
gap: 8px;
}
.selected-file-item {
display: flex;
align-items: center;
gap: 12px;
padding: 12px 16px;
background: #f5f7f9;
border-radius: 8px;
border: 1px solid #e5e5e5;
}
.selected-file-item .file-icon {
font-size: 24px;
color: #00684a;
}
.selected-file-item .file-info {
flex: 1;
min-width: 0;
}
.selected-file-item .file-name {
font-size: 14px;
font-weight: 500;
color: #1a1a1a;
display: block;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.selected-file-item .file-size {
font-size: 12px;
color: #666;
}
.selected-file-item .remove-file-btn {
color: #999;
}
.selected-file-item .remove-file-btn:hover {
color: #ff4d4f;
}
/* 步骤操作按钮 */
.step-actions {
display: flex;
justify-content: flex-end;
margin-top: 24px;
}
.step-actions .next-btn {
min-width: 120px;
height: 40px;
font-size: 14px;
background: #00684a;
border-color: #00684a;
}
.step-actions .next-btn:hover {
background: #005a3f;
border-color: #005a3f;
}
.step-actions .next-btn:disabled {
background: #d9d9d9;
border-color: #d9d9d9;
}
/* 第二步:分段配置与预览 */
.upload-step-content.step2 {
height: 100%;
display: flex;
flex-direction: column;
}
/* 处理进度区 */
.processing-status {
background: #f5f7f9;
border-radius: 12px;
padding: 24px;
margin-bottom: 24px;
}
.processing-file {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 16px;
}
.processing-file .file-icon {
font-size: 24px;
color: #00684a;
}
.processing-file .file-name {
flex: 1;
font-size: 14px;
font-weight: 500;
color: #1a1a1a;
}
.processing-file .status-icon {
font-size: 18px;
}
.processing-file .status-icon.success {
color: #52c41a;
}
.processing-file .status-icon.error {
color: #ff4d4f;
}
.processing-file .status-icon.loading {
color: #00684a;
}
.processing-status .status-text {
text-align: center;
font-size: 13px;
color: #666;
margin-top: 12px;
}
/* 索引阶段指示器(Step2) */
.processing-status .indexing-stages {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 16px;
justify-content: center;
}
.processing-status .stage-item {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
color: #999;
padding: 4px 10px;
background: #fff;
border-radius: 4px;
border: 1px solid #e5e5e5;
transition: all 0.2s;
}
.processing-status .stage-item.active {
color: #00684a;
border-color: #00684a;
background: rgba(0, 104, 74, 0.05);
}
.processing-status .stage-dot {
width: 6px;
height: 6px;
border-radius: 50%;
background: #d9d9d9;
transition: background 0.2s;
}
.processing-status .stage-item.active .stage-dot {
background: #00684a;
}
/* 错误状态 */
.error-state {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 48px;
text-align: center;
}
.error-state .error-icon {
font-size: 48px;
color: #ff4d4f;
margin-bottom: 16px;
}
.error-state .error-text {
font-size: 14px;
color: #666;
margin-bottom: 24px;
}
/* 索引方式选项 */
.index-options {
display: flex;
flex-direction: column;
gap: 8px;
}
.index-option {
display: flex;
align-items: center;
gap: 10px;
padding: 12px 16px;
background: #fff;
border: 1px solid #e5e5e5;
border-radius: 8px;
cursor: pointer;
transition: all 0.2s;
}
.index-option:hover {
border-color: #00684a;
}
.index-option.active {
border-color: #00684a;
background: rgba(0, 104, 74, 0.05);
}
.index-option .option-radio {
width: 16px;
height: 16px;
border-radius: 50%;
border: 2px solid #d9d9d9;
position: relative;
transition: all 0.2s;
}
.index-option.active .option-radio {
border-color: #00684a;
}
.index-option.active .option-radio::after {
content: '';
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 8px;
height: 8px;
border-radius: 50%;
background: #00684a;
}
.index-option .option-label {
font-size: 14px;
color: #333;
}
.index-option .option-badge {
font-size: 11px;
padding: 2px 6px;
border-radius: 4px;
margin-left: auto;
}
.index-option .option-badge.recommended {
background: rgba(0, 104, 74, 0.1);
color: #00684a;
}
/* 完成状态底部操作 */
.completion-actions {
display: flex;
justify-content: center;
padding: 24px;
border-top: 1px solid #f0f0f0;
margin-top: auto;
}
.completion-actions .ant-btn-primary {
min-width: 140px;
height: 40px;
font-size: 14px;
background: #00684a;
border-color: #00684a;
}
.completion-actions .ant-btn-primary:hover {
background: #005a3f;
border-color: #005a3f;
}
/* 上传页面的分段内容布局(复用 document-detail-content */
.upload-step-content .document-detail-content {
flex: 1;
min-height: 0;
}
/* 上传页面的设置操作按钮 */
.upload-step-content .settings-actions {
margin-top: 24px;
display: flex;
gap: 12px;
}
.upload-step-content .settings-actions .ant-btn-primary {
background: #00684a;
border-color: #00684a;
}
.upload-step-content .settings-actions .ant-btn-primary:hover {
background: #005a3f;
border-color: #005a3f;
}
/* 进度条主题色 */
.document-upload-page .ant-progress-bg {
background-color: #00684a;
}
.document-upload-page .ant-progress-status-active .ant-progress-bg::before {
background: #fff;
}
/* Spin 主题色 */
.document-upload-page .ant-spin .ant-spin-dot-item {
background-color: #00684a !important;
}
/* 预览框内的进度显示 */
.preview-processing {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 40px 24px;
min-height: 300px;
}
.preview-processing .processing-file {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 24px;
width: 100%;
max-width: 400px;
}
.preview-processing .file-icon {
font-size: 24px;
color: #00684a;
}
.preview-processing .file-name {
flex: 1;
font-size: 14px;
font-weight: 500;
color: #1a1a1a;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.preview-processing .status-icon {
font-size: 18px;
}
.preview-processing .status-icon.loading {
color: #00684a;
}
.preview-processing .ant-progress {
width: 100%;
max-width: 400px;
}
.preview-processing .status-text {
text-align: center;
font-size: 13px;
color: #666;
margin-top: 12px;
}
.preview-processing .indexing-stages {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 24px;
justify-content: center;
max-width: 500px;
}
.preview-processing .stage-item {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
color: #999;
padding: 4px 10px;
background: #f5f7f9;
border-radius: 4px;
border: 1px solid #e5e5e5;
transition: all 0.2s;
}
.preview-processing .stage-item.active {
color: #00684a;
border-color: #00684a;
background: rgba(0, 104, 74, 0.05);
}
.preview-processing .stage-dot {
width: 6px;
height: 6px;
border-radius: 50%;
background: #d9d9d9;
transition: background 0.2s;
}
.preview-processing .stage-item.active .stage-dot {
background: #00684a;
}
/* 索引选项禁用状态 */
.index-option.disabled {
opacity: 0.6;
cursor: not-allowed;
}
.index-option.disabled:hover {
border-color: #e5e5e5;
}
/* 预览框内错误状态 */
.preview-error {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 60px 24px;
min-height: 300px;
}
.preview-error .error-icon {
font-size: 48px;
color: #ff4d4f;
margin-bottom: 16px;
}
.preview-error .error-text {
color: #666;
font-size: 14px;
text-align: center;
}
/* 完成统计样式 */
.completion-stats {
color: #666;
font-size: 14px;
margin-right: 16px;
}
/* 文件选择下拉选项 */
.file-select-option {
display: flex;
align-items: center;
}
+462
View File
@@ -0,0 +1,462 @@
# Dify 知识库 API 功能实现清单
## 概述
本文档对比 Dify 官方知识库 API 与当前项目已实现的功能。
- ✅ 已实现
- ❌ 未实现
- 🚫 不开放
---
## 系统架构
### 完整数据流向
```
┌─────────────────────────────────────────────────────────────────────┐
│ 第一层:React 组件 (浏览器) │
│ 位置:app/components/dify-dataset-manager/ │
│ 调用:客户端 API 函数 │
└────────────────────────────┬────────────────────────────────────────┘
│ 使用 axios 发送 HTTP 请求
│ URL: /api/dataset/...
│ 自动携带 cookies (JWT)
┌─────────────────────────────────────────────────────────────────────┐
│ 第二层:客户端 API 层 (浏览器侧) │
│ 位置:app/api/dify-dataset/api/*.ts │
│ 作用:封装 axios 请求,提供类型安全的函数接口 │
│ 请求:axios.get('/api/dataset/datasets', { withCredentials: true })│
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (浏览器 → Remix 服务器)
┌─────────────────────────────────────────────────────────────────────┐
│ 第三层:Remix 路由层 (Node.js 服务端) │
│ 位置:app/routes/api.dataset.*.tsx │
│ 作用:接收浏览器请求,验证 JWT,转发到 FastAPI │
│ 请求:fetch(`${API_BASE_URL}/dify_dataset/...`, { headers: JWT }) │
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (Remix → FastAPI)
┌─────────────────────────────────────────────────────────────────────┐
│ 第四层:FastAPI 后端代理 (Python) │
│ 位置:docauditai/routers/dify_dataset.py │
│ 作用:验证用户 JWT,添加 Dify DATASET_API_KEY,转发请求 │
└────────────────────────────┬────────────────────────────────────────┘
│ HTTP 请求 (FastAPI → Dify)
┌─────────────────────────────────────────────────────────────────────┐
│ 第五层:Dify 官方知识库 API │
│ URLhttps://api.dify.ai/v1/datasets/... │
│ 鉴权:Authorization: Bearer {DATASET_API_KEY} │
└─────────────────────────────────────────────────────────────────────┘
```
### 为什么有两层 API
| 层级 | 位置 | 执行环境 | HTTP 库 | 作用 |
|------|------|----------|---------|------|
| **客户端 API** | `app/api/dify-dataset/api/*.ts` | 浏览器 | axios | 供 React 组件调用,类型安全 |
| **Remix 路由** | `app/routes/api.dataset.*.tsx` | Node.js | fetch | 接收浏览器请求,转发到 FastAPI |
**调用链路**
```
React 组件 → 客户端 API (axios) → Remix 路由 (fetch) → FastAPI → Dify API
```
---
## 一、知识库管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取知识库列表 | /datasets | GET | ✅ | `api.dataset.datasets.tsx` | `fetchDatasets()` |
| 查看知识库详情 | /datasets/{dataset_id} | GET | ✅ | `api.dataset.datasets.$datasetId.tsx` | `fetchDataset()` |
| 创建空知识库 | /datasets | POST | ❌ | - | - |
| 修改知识库名称 | /datasets/{dataset_id} | PATCH | ✅ | `api.dataset.datasets.$datasetId.tsx` | `updateDatasetName()` |
| 删除知识库 | /datasets/{dataset_id} | DELETE | 🚫 | - | - |
**说明**
- 修改知识库:仅允许修改 `name` 字段,其他字段不开放
- 删除知识库:出于安全考虑不对用户开放
---
## 二、文档管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取文档列表 | /datasets/{id}/documents | GET | ✅ | `api.dataset.datasets.$datasetId.documents.tsx` | `fetchDocuments()` |
| 获取文档详情 | /datasets/{id}/documents/{docId} | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | `fetchDocument()` |
| 通过文件创建文档 | /datasets/{id}/document/create-by-file | POST | ✅ | `api.dataset.datasets.$datasetId.documents.tsx` | `uploadDocument()` |
| 通过文本创建文档 | /datasets/{id}/document/create-by-text | POST | ❌ | - | - |
| 通过文件更新文档 | /datasets/{id}/documents/{docId}/update-by-file | POST | ❌ | - | - |
| 通过文本更新文档 | /datasets/{id}/documents/{docId}/update-by-text | POST | ❌ | - | - |
| 删除文档 | /datasets/{id}/documents/{docId} | DELETE | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | `deleteDocument()` |
| 更新文档状态 | /datasets/{id}/documents/status/{action} | PATCH | ✅ | `api.dataset.datasets.$datasetId.documents.status.$action.tsx` | `toggleDocumentStatus()` |
| 获取文档嵌入状态 | /datasets/{id}/documents/{batch}/indexing-status | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$batch.indexing-status.tsx` | `fetchIndexingStatus()` |
| 获取上传文件信息 | /datasets/{id}/documents/{docId}/upload-file | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.upload-file.tsx` | `fetchUploadFileInfo()` |
| 索引预估(预览分段) | /datasets/{id}/indexing-estimate | POST | ✅ | `api.dataset.datasets.$datasetId.indexing-estimate.tsx` | `fetchIndexingEstimate()` |
| 重新处理文档 | /datasets/{id}/documents/reprocess | POST | ✅ | `api.dataset.datasets.$datasetId.documents.reprocess.tsx` | `reprocessDocument()` |
**说明**
- 上传文档:支持 multipart/form-data 格式
- 文档状态:action 可选值为 `enable` / `disable` / `archive` / `un_archive`
- 索引预估:用于预览分段效果,不会实际修改文档
- 重新处理文档:通过 `original_document_id` 参数使用新的分段设置重新处理已有文档
---
## 三、分段管理
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 获取分段列表 | /datasets/{id}/documents/{docId}/segments | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | `fetchSegments()` |
| 获取分段详情 | /datasets/{id}/documents/{docId}/segments/{segId} | GET | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `fetchSegment()` |
| 新增分段 | /datasets/{id}/documents/{docId}/segments | POST | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | `createSegments()` |
| 更新分段 | /datasets/{id}/documents/{docId}/segments/{segId} | POST | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `updateSegment()` |
| 删除分段 | /datasets/{id}/documents/{docId}/segments/{segId} | DELETE | ✅ | `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | `deleteSegment()` |
**说明**
- 新增分段:支持批量新增 `{ segments: [...] }`
- 更新分段:可更新 `content`, `answer`, `keywords`, `enabled`
---
## 四、子分段管理(父子模式)
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 查询子分段 | .../segments/{segId}/child_chunks | GET | ✅ | `...segments.$segmentId.child_chunks.tsx` | `fetchChildChunks()` |
| 新增子分段 | .../segments/{segId}/child_chunks | POST | ✅ | `...segments.$segmentId.child_chunks.tsx` | `createChildChunk()` |
| 更新子分段 | .../segments/{segId}/child_chunks/{chunkId} | PATCH | ✅ | `...segments.$segmentId.child_chunks.$childChunkId.tsx` | `updateChildChunk()` |
| 删除子分段 | .../segments/{segId}/child_chunks/{chunkId} | DELETE | ✅ | `...segments.$segmentId.child_chunks.$childChunkId.tsx` | `deleteChildChunk()` |
**说明**:子分段用于 Dify 的父子模式分段策略
---
## 五、检索功能
| 功能 | API 端点 | 方法 | 状态 | 路由文件 | 客户端函数 |
|------|----------|------|------|----------|-----------|
| 检索知识库 | /datasets/{id}/retrieve | POST | ✅ | `api.dataset.datasets.$datasetId.retrieve.tsx` | `retrieveDataset()` |
**检索参数详解**
```typescript
{
query: string; // 检索关键词
retrieval_model: {
search_method: 'keyword_search' | 'semantic_search' | 'full_text_search' | 'hybrid_search';
reranking_enable: boolean; // 是否开启 rerank
reranking_model?: object; // Rerank 模型配置
top_k: number; // 返回结果数量
score_threshold_enabled: boolean;
score_threshold: number; // 分数阈值 (0-1)
}
}
```
---
## 六、元数据管理
**当前状态:❌ 全部未实现**
| 功能 | API 端点 | 方法 | 说明 |
|------|----------|------|------|
| 新增元数据 | /datasets/{id}/metadata | POST | type, name |
| 更新元数据 | /datasets/{id}/metadata/{metaId} | PATCH | name |
| 删除元数据 | /datasets/{id}/metadata/{metaId} | DELETE | |
| 查询元数据列表 | /datasets/{id}/metadata | GET | |
| 启用/禁用内置元数据 | /datasets/{id}/metadata/built-in/{action} | POST | |
| 更新文档元数据 | /datasets/{id}/documents/metadata | POST | 批量更新 |
---
## 七、模型查询
| 功能 | API 端点 | 方法 | 状态 | 说明 |
|------|----------|------|------|------|
| 获取嵌入模型列表 | /workspaces/current/models/model-types/text-embedding | GET | ❌ | 创建知识库时需要 |
---
## 八、标签管理
**当前状态:❌ 全部未实现**
| 功能 | API 端点 | 方法 | 说明 |
|------|----------|------|------|
| 新增标签 | /datasets/tags | POST | name (最大50字符) |
| 获取标签列表 | /datasets/tags | GET | |
| 修改标签名称 | /datasets/tags | PATCH | name, tag_id |
| 删除标签 | /datasets/tags | DELETE | tag_id |
| 绑定知识库到标签 | /datasets/tags/binding | POST | tag_ids, target_id |
| 解绑知识库和标签 | /datasets/tags/unbinding | POST | tag_id, target_id |
| 查询知识库已绑定的标签 | /datasets/{id}/tags | POST | |
---
## 功能统计
| 类别 | 已实现 | 未实现 | 不开放 | 完成度 |
|------|--------|--------|--------|--------|
| 知识库管理 | 3 | 1 | 1 | 75% |
| 文档管理 | 9 | 3 | 0 | 75% |
| 分段管理 | 5 | 0 | 0 | 100% |
| 子分段管理 | 4 | 0 | 0 | 100% |
| 检索功能 | 1 | 0 | 0 | 100% |
| 元数据管理 | 0 | 6 | 0 | 0% |
| 模型查询 | 0 | 1 | 0 | 0% |
| 标签管理 | 0 | 7 | 0 | 0% |
| **总计** | **22** | **18** | **1** | **55%** |
---
## 代码文件清单
### Remix 路由层 (服务端)
所有路由文件位于 `app/routes/` 目录:
| 文件名 | HTTP 方法 | 功能 |
|--------|----------|------|
| `api.dataset.datasets.tsx` | GET | 获取知识库列表 |
| `api.dataset.datasets.$datasetId.tsx` | GET / PATCH | 知识库详情 / 修改名称 |
| `api.dataset.datasets.$datasetId.documents.tsx` | GET / POST | 文档列表 / 上传文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.tsx` | GET / DELETE | 文档详情 / 删除文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.upload-file.tsx` | GET | 获取上传文件信息 |
| `api.dataset.datasets.$datasetId.documents.$batch.indexing-status.tsx` | GET | 获取嵌入状态 |
| `api.dataset.datasets.$datasetId.documents.status.$action.tsx` | PATCH | 更新文档状态 |
| `api.dataset.datasets.$datasetId.indexing-estimate.tsx` | POST | 索引预估(预览分段) |
| `api.dataset.datasets.$datasetId.documents.reprocess.tsx` | POST | 重新处理文档 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.tsx` | GET / POST | 分段列表 / 新增分段 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.tsx` | GET / POST / DELETE | 分段详情 / 更新 / 删除 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.child_chunks.tsx` | GET / POST | 子分段列表 / 新增 |
| `api.dataset.datasets.$datasetId.documents.$documentId.segments.$segmentId.child_chunks.$childChunkId.tsx` | PATCH / DELETE | 子分段更新 / 删除 |
| `api.dataset.datasets.$datasetId.retrieve.tsx` | POST | 检索知识库 |
### 客户端 API 层 (浏览器侧)
```
app/api/dify-dataset/
├── index.ts # 统一导出
├── client.server.ts # 服务端基础请求函数(备用)
├── type/ # 类型定义
│ ├── index.ts # 类型统一导出
│ ├── commonTypes.ts # 通用类型
│ ├── datasetTypes.ts # 知识库类型
│ ├── documentTypes.ts # 文档类型
│ └── segmentTypes.ts # 分段/子分段/检索类型
└── api/ # API 调用函数
├── index.ts # 函数统一导出
├── datasetApi.ts # 知识库 API
├── documentApi.ts # 文档 API
└── segmentApi.ts # 分段/子分段/检索 API
```
### 客户端函数清单
**datasetApi.ts - 知识库管理**
```typescript
fetchDatasets(page, limit) // 获取知识库列表
fetchDataset(datasetId) // 获取知识库详情
updateDatasetName(datasetId, name) // 修改知识库名称
```
**documentApi.ts - 文档管理**
```typescript
fetchDocuments(datasetId, page, limit, keyword) // 获取文档列表
fetchDocument(datasetId, documentId) // 获取文档详情
deleteDocument(datasetId, documentId) // 删除文档
toggleDocumentStatus(datasetId, documentId, enabled) // 启用/禁用文档
uploadDocument(datasetId, file, onProgress) // 上传文档
fetchIndexingStatus(datasetId, batch) // 获取嵌入状态
fetchUploadFileInfo(datasetId, documentId) // 获取上传文件信息
fetchIndexingEstimate(datasetId, fileId, processRule, docForm, docLanguage) // 索引预估(预览分段效果)
reprocessDocument(datasetId, originalDocumentId, processRule, docForm, docLanguage) // 重新处理文档
```
**segmentApi.ts - 分段/子分段/检索**
```typescript
// 分段
fetchSegments(datasetId, documentId, page, limit, keyword)
fetchSegment(datasetId, documentId, segmentId)
createSegments(datasetId, documentId, segments)
updateSegment(datasetId, documentId, segmentId, segment)
deleteSegment(datasetId, documentId, segmentId)
toggleSegmentStatus(datasetId, documentId, segmentId, enabled)
// 子分段
fetchChildChunks(datasetId, documentId, segmentId, page, limit, keyword)
createChildChunk(datasetId, documentId, segmentId, content)
updateChildChunk(datasetId, documentId, segmentId, childChunkId, content)
deleteChildChunk(datasetId, documentId, segmentId, childChunkId)
// 检索
retrieveDataset(datasetId, query, retrievalModel)
```
### UI 组件
```
app/components/dify-dataset-manager/
├── index.tsx # 主容器组件 - 状态管理、标签页切换
├── layout.tsx # 布局组件 - 左侧菜单栏 + 右侧内容区
├── document-list.tsx # 文档列表 - 表格、搜索、上传、删除
├── document-detail.tsx # 文档详情 - 分段设置、预览块
├── retrieve-test.tsx # 召回测试 - 知识库检索测试
└── dataset-settings.tsx # 知识库设置 - 名称、描述修改
```
### 布局结构(仿 Dify 风格)
```
┌─────────────────────────────────────────────────────────────┐
│ dataset-layout │
├──────────────────┬──────────────────────────────────────────┤
│ dataset-sidebar │ dataset-main │
│ │ │
│ ┌─────────────┐ │ 根据 activeTab 渲染: │
│ │ 知识库信息 │ │ - documents → DocumentList │
│ │ (名称/数量) │ │ - documents + selectedDoc → DocumentDetail│
│ └─────────────┘ │ - retrieve → RetrieveTest │
│ │ - settings → DatasetSettings │
│ ┌─────────────┐ │ │
│ │ 文档 │ │ │
│ │ 召回测试 │ │ │
│ │ 设置 │ │ │
│ └─────────────┘ │ │
└──────────────────┴──────────────────────────────────────────┘
```
---
## 鉴权机制
### 三层认证流程
```
┌────────────────────────────────────────────────────────────────┐
│ 浏览器 → Remix 服务器 │
│ 认证方式:Cookie (会话中的 JWT) │
│ axios 配置:{ withCredentials: true } │
└────────────────────────────────────────────────────────────────┘
┌────────────────────────────────────────────────────────────────┐
│ Remix 服务器 → FastAPI │
│ 认证方式:Authorization: Bearer {frontendJWT} │
│ JWT 来源:getUserSession(request) │
└────────────────────────────────────────────────────────────────┘
┌────────────────────────────────────────────────────────────────┐
│ FastAPI → Dify API │
│ 认证方式:Authorization: Bearer {DATASET_API_KEY} │
│ API Key:服务端环境变量配置 │
└────────────────────────────────────────────────────────────────┘
```
### 关键配置
```typescript
// app/config/api-config.ts
export const API_BASE_URL = apiConfig.baseUrl; // 如:http://10.79.97.17:8000
// 根据端口自动选择配置
const portConfigs = {
'51703': { baseUrl: 'http://172.16.0.55:8073' }, // 梅州
'51704': { baseUrl: 'http://10.79.97.17:8001' }, // 云浮
'51707': { baseUrl: 'http://10.79.97.17:8004' }, // 省级
// ...
};
```
---
## 常见错误码
| code | status | message |
|------|--------|---------|
| no_file_uploaded | 400 | Please upload your file. |
| too_many_files | 400 | Only one file is allowed. |
| file_too_large | 413 | File size exceeded. |
| unsupported_file_type | 415 | File type not allowed. |
| high_quality_dataset_only | 400 | Current operation only supports 'high-quality' datasets. |
| dataset_not_initialized | 400 | The dataset is still being initialized or indexing. |
| archived_document_immutable | 403 | The archived document is not editable. |
| dataset_name_duplicate | 409 | The dataset name already exists. |
| invalid_action | 400 | Invalid action. |
| document_already_finished | 400 | The document has been processed. |
| document_indexing | 400 | The document is being processed and cannot be edited. |
| invalid_metadata | 400 | The metadata content is incorrect. |
---
## 九、文档分段设置(上传时配置)
### API 支持的分段参数
在上传文档时,可以通过 `process_rule` 参数配置分段设置:
```typescript
{
indexing_technique: 'high_quality' | 'economy',
process_rule: {
mode: 'automatic' | 'custom',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: boolean }, // 替换连续空格
{ id: 'remove_urls_emails', enabled: boolean } // 删除URL和邮件
],
segmentation: {
separator: string, // 分段标识符,如 "\\n\\n"
max_tokens: number // 分段最大长度,100-4000
}
}
}
}
```
### 功能支持情况
| 功能 | API 支持 | 参数 | 说明 |
|------|----------|------|------|
| 分段标识符 | ✅ | `separator` | 如 `\\n\\n``###` |
| 分段最大长度 | ✅ | `max_tokens` | 100-4000 |
| 替换连续空格 | ✅ | `remove_extra_spaces` | 预处理规则 |
| 删除URL和邮件 | ✅ | `remove_urls_emails` | 预处理规则 |
| 分段重叠长度 | ❌ | - | API 不支持 |
| Q&A 分段 | ⚠️ | `doc_form: "qa_model"` | 需特殊配置 |
### 重要限制
⚠️ **已有文档无法直接修改分段设置**
Dify API 不支持修改已上传文档的分段规则。如需应用新设置,必须:
1. 使用 `original_document_id` 参数重新上传文档
2. 或删除文档后重新上传
---
## 优先级建议
### 高优先级(核心功能)
1. ~~**检索知识库**~~ ✅ 已实现
2. ~~获取文档嵌入状态~~ ✅ 已实现
3. **创建空知识库** - 让用户能创建新的知识库
4. **获取嵌入模型列表** - 创建知识库时需要选择模型
### 中优先级(完善功能)
5. **通过文本创建文档** - 支持直接输入文本
6. ~~**新增分段**~~ ✅ 已实现
7. ~~修改知识库详情~~ ✅ 已实现
8. ~~获取上传文件信息~~ ✅ 已实现
### 低优先级(扩展功能)
9. ~~子分段管理系列~~ ✅ 已实现(API层)
10. 元数据管理系列
11. 标签管理系列
12. 文档更新功能