425 lines
11 KiB
TypeScript
425 lines
11 KiB
TypeScript
/**
|
|
* Dify Dataset 文档 API 模块
|
|
*
|
|
* 提供浏览器端调用 Dify 文档管理 API 的函数
|
|
*
|
|
* @module api/dify-dataset/api/documentApi
|
|
*/
|
|
|
|
import axios from 'axios';
|
|
import type {
|
|
Document,
|
|
DocumentsResponse,
|
|
IndexingStatusResponse,
|
|
UploadFileInfo,
|
|
OperationResult,
|
|
} from '../type';
|
|
|
|
/**
|
|
* API 基础 URL
|
|
*/
|
|
const API_URL = '/api/dataset';
|
|
|
|
/**
|
|
* 获取知识库文档列表
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param page - 页码,默认 1
|
|
* @param limit - 每页数量,默认 20
|
|
* @param keyword - 搜索关键词
|
|
* @returns 文档列表响应
|
|
*/
|
|
export async function fetchDocuments(
|
|
datasetId: string,
|
|
page: number = 1,
|
|
limit: number = 20,
|
|
keyword?: string
|
|
): Promise<DocumentsResponse> {
|
|
const params = new URLSearchParams({
|
|
page: page.toString(),
|
|
limit: limit.toString(),
|
|
});
|
|
|
|
if (keyword) {
|
|
params.append('keyword', keyword);
|
|
}
|
|
|
|
console.log('[Dataset Client] 获取文档列表:', { datasetId, page, limit, keyword });
|
|
|
|
const response = await axios.get<DocumentsResponse>(
|
|
`${API_URL}/datasets/${datasetId}/documents?${params}`,
|
|
{ withCredentials: true }
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 获取单个文档详情
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @returns 文档详情
|
|
*/
|
|
export async function fetchDocument(
|
|
datasetId: string,
|
|
documentId: string
|
|
): Promise<Document> {
|
|
const response = await axios.get<Document>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${documentId}`,
|
|
{ withCredentials: true }
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 删除文档
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @returns 操作结果
|
|
*/
|
|
export async function deleteDocument(
|
|
datasetId: string,
|
|
documentId: string
|
|
): Promise<OperationResult> {
|
|
console.log('[Dataset Client] 删除文档:', { datasetId, documentId });
|
|
|
|
const response = await axios.delete<OperationResult>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${documentId}`,
|
|
{ withCredentials: true }
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 启用/禁用文档
|
|
* Dify API: PATCH /datasets/{dataset_id}/documents/status/{action}
|
|
* action: enable / disable / archive / un_archive
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @param enabled - 是否启用
|
|
* @returns 操作结果
|
|
*/
|
|
export async function toggleDocumentStatus(
|
|
datasetId: string,
|
|
documentId: string,
|
|
enabled: boolean
|
|
): Promise<OperationResult> {
|
|
const action = enabled ? 'enable' : 'disable';
|
|
console.log('[Dataset Client] 切换文档状态:', { datasetId, documentId, action });
|
|
|
|
const response = await axios.patch<OperationResult>(
|
|
`${API_URL}/datasets/${datasetId}/documents/status/${action}`,
|
|
{ document_ids: [documentId] },
|
|
{
|
|
headers: { 'Content-Type': 'application/json' },
|
|
withCredentials: true,
|
|
}
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 上传文件到知识库(使用默认配置)
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param file - 文件对象
|
|
* @param onProgress - 上传进度回调
|
|
* @returns 创建的文档信息
|
|
*/
|
|
export async function uploadDocument(
|
|
datasetId: string,
|
|
file: File,
|
|
onProgress?: (percent: number) => void
|
|
): Promise<UploadDocumentResponse> {
|
|
return uploadDocumentWithConfig(
|
|
datasetId,
|
|
file,
|
|
{
|
|
indexing_technique: 'high_quality',
|
|
process_rule: { mode: 'automatic' },
|
|
},
|
|
onProgress
|
|
);
|
|
}
|
|
|
|
/**
|
|
* 上传文件到知识库(使用自定义配置)
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param file - 文件对象
|
|
* @param config - 上传配置(索引模式、分段规则等)
|
|
* @param onProgress - 上传进度回调
|
|
* @returns 创建的文档信息,包含 batch 用于查询索引进度
|
|
*/
|
|
export async function uploadDocumentWithConfig(
|
|
datasetId: string,
|
|
file: File,
|
|
config: UploadDocumentConfig,
|
|
onProgress?: (percent: number) => void
|
|
): Promise<UploadDocumentResponse> {
|
|
const formData = new FormData();
|
|
formData.append('file', file);
|
|
formData.append('data', JSON.stringify(config));
|
|
|
|
console.log('[Dataset Client] 上传文档:', {
|
|
datasetId,
|
|
fileName: file.name,
|
|
config,
|
|
});
|
|
|
|
const response = await axios.post<UploadDocumentResponse>(
|
|
`${API_URL}/datasets/${datasetId}/documents`,
|
|
formData,
|
|
{
|
|
withCredentials: true,
|
|
onUploadProgress: (progressEvent) => {
|
|
if (progressEvent.total && onProgress) {
|
|
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
|
|
onProgress(percent);
|
|
}
|
|
},
|
|
}
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 获取文档嵌入状态(索引进度)
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param batch - 上传文档的批次号
|
|
* @returns 索引状态列表
|
|
*/
|
|
export async function fetchIndexingStatus(
|
|
datasetId: string,
|
|
batch: string
|
|
): Promise<IndexingStatusResponse> {
|
|
console.log('[Dataset Client] 获取索引状态:', { datasetId, batch });
|
|
|
|
const response = await axios.get<IndexingStatusResponse>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${batch}/indexing-status`,
|
|
{ withCredentials: true }
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 获取文档上传文件信息
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @returns 上传文件信息
|
|
*/
|
|
export async function fetchUploadFileInfo(
|
|
datasetId: string,
|
|
documentId: string
|
|
): Promise<UploadFileInfo> {
|
|
console.log('[Dataset Client] 获取上传文件信息:', { datasetId, documentId });
|
|
|
|
const response = await axios.get<UploadFileInfo>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${documentId}/upload-file`,
|
|
{ withCredentials: true }
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 下载文档原始文件
|
|
* 通过代理路由下载 Dify 知识库中的原始文件
|
|
*
|
|
* @param uploadFileInfo - 上传文件信息(从 fetchUploadFileInfo 获取)
|
|
* @returns File 对象
|
|
*/
|
|
export async function downloadOriginalFile(
|
|
uploadFileInfo: UploadFileInfo
|
|
): Promise<File> {
|
|
if (!uploadFileInfo.download_url) {
|
|
throw new Error('无法获取原始文件下载地址');
|
|
}
|
|
|
|
// download_url 格式: /files/xxx/file-preview?...
|
|
// 转换为代理路由: /api/dataset/dify-files/xxx/file-preview?...
|
|
const downloadPath = uploadFileInfo.download_url.replace(/^\/files\//, '');
|
|
const proxyUrl = `${API_URL}/dify-files/${downloadPath}`;
|
|
|
|
console.log('[Dataset Client] 下载原始文件:', {
|
|
originalUrl: uploadFileInfo.download_url,
|
|
proxyUrl,
|
|
});
|
|
|
|
const response = await axios.get(proxyUrl, {
|
|
responseType: 'blob',
|
|
withCredentials: true,
|
|
});
|
|
|
|
const file = new File(
|
|
[response.data],
|
|
uploadFileInfo.name || 'document',
|
|
{ type: uploadFileInfo.mime_type || 'application/octet-stream' }
|
|
);
|
|
|
|
return file;
|
|
}
|
|
|
|
/**
|
|
* 预处理规则 ID
|
|
*/
|
|
export type PreProcessingRuleId = 'remove_extra_spaces' | 'remove_urls_emails';
|
|
|
|
/**
|
|
* 预处理规则配置
|
|
*/
|
|
export interface PreProcessingRule {
|
|
id: PreProcessingRuleId;
|
|
enabled: boolean;
|
|
}
|
|
|
|
/**
|
|
* 分段配置
|
|
*/
|
|
export interface SegmentationConfig {
|
|
separator: string;
|
|
max_tokens: number;
|
|
chunk_overlap?: number;
|
|
}
|
|
|
|
/**
|
|
* 自定义处理规则
|
|
*/
|
|
export interface CustomRules {
|
|
pre_processing_rules?: PreProcessingRule[];
|
|
segmentation?: SegmentationConfig;
|
|
}
|
|
|
|
/**
|
|
* 文档处理规则配置
|
|
*/
|
|
export interface ProcessRule {
|
|
mode: 'automatic' | 'custom';
|
|
rules?: CustomRules;
|
|
}
|
|
|
|
/**
|
|
* 更新文档设置参数
|
|
*/
|
|
export interface UpdateDocumentSettings {
|
|
indexing_technique?: 'high_quality' | 'economy';
|
|
process_rule?: ProcessRule;
|
|
}
|
|
|
|
/**
|
|
* 上传文档配置参数
|
|
*/
|
|
export interface UploadDocumentConfig {
|
|
indexing_technique: 'high_quality' | 'economy';
|
|
process_rule: ProcessRule;
|
|
}
|
|
|
|
/**
|
|
* 上传文档响应
|
|
*/
|
|
export interface UploadDocumentResponse {
|
|
document: {
|
|
id: string;
|
|
position: number;
|
|
data_source_type: string;
|
|
data_source_info: {
|
|
upload_file_id: string;
|
|
};
|
|
dataset_process_rule_id: string;
|
|
name: string;
|
|
created_from: string;
|
|
created_by: string;
|
|
created_at: number;
|
|
tokens: number;
|
|
indexing_status: string;
|
|
error: string | null;
|
|
enabled: boolean;
|
|
disabled_at: number | null;
|
|
disabled_by: string | null;
|
|
archived: boolean;
|
|
display_status: string;
|
|
word_count: number;
|
|
hit_count: number;
|
|
doc_form: string;
|
|
};
|
|
batch: string;
|
|
}
|
|
|
|
/**
|
|
* 更新文档设置并重新处理
|
|
* 注意:Dify API 不直接支持修改已有文档的分段设置
|
|
* 此函数尝试通过更新接口应用新设置
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @param settings - 更新设置
|
|
* @returns 操作结果
|
|
*/
|
|
export async function updateDocumentWithSettings(
|
|
datasetId: string,
|
|
documentId: string,
|
|
settings: UpdateDocumentSettings
|
|
): Promise<OperationResult> {
|
|
console.log('[Dataset Client] 更新文档设置:', { datasetId, documentId, settings });
|
|
|
|
const response = await axios.post<OperationResult>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-settings`,
|
|
settings,
|
|
{
|
|
headers: { 'Content-Type': 'application/json' },
|
|
withCredentials: true,
|
|
}
|
|
);
|
|
return response.data;
|
|
}
|
|
|
|
/**
|
|
* 通过文件更新文档
|
|
* Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
|
|
*
|
|
* 用于在用户修改分段参数后,使用同一文件重新处理文档
|
|
*
|
|
* @param datasetId - 知识库 ID
|
|
* @param documentId - 文档 ID
|
|
* @param file - 文件对象(使用原上传的文件)
|
|
* @param config - 新的分段配置
|
|
* @param onProgress - 上传进度回调
|
|
* @returns 更新后的文档信息,包含新的 batch 用于查询索引进度
|
|
*/
|
|
export async function updateDocumentByFile(
|
|
datasetId: string,
|
|
documentId: string,
|
|
file: File,
|
|
config: UploadDocumentConfig,
|
|
onProgress?: (percent: number) => void
|
|
): Promise<UploadDocumentResponse> {
|
|
const formData = new FormData();
|
|
formData.append('file', file);
|
|
formData.append('data', JSON.stringify(config));
|
|
|
|
console.log('[Dataset Client] 通过文件更新文档:', {
|
|
datasetId,
|
|
documentId,
|
|
fileName: file.name,
|
|
config,
|
|
});
|
|
|
|
const response = await axios.post<UploadDocumentResponse>(
|
|
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-by-file`,
|
|
formData,
|
|
{
|
|
withCredentials: true,
|
|
onUploadProgress: (progressEvent) => {
|
|
if (progressEvent.total && onProgress) {
|
|
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
|
|
onProgress(percent);
|
|
}
|
|
},
|
|
}
|
|
);
|
|
return response.data;
|
|
}
|