Files
leaudit-platform-frontend/app/api/dify-dataset/api/documentApi.ts
T

425 lines
11 KiB
TypeScript

/**
* Dify Dataset 文档 API 模块
*
* 提供浏览器端调用 Dify 文档管理 API 的函数
*
* @module api/dify-dataset/api/documentApi
*/
import axios from 'axios';
import type {
Document,
DocumentsResponse,
IndexingStatusResponse,
UploadFileInfo,
OperationResult,
} from '../type';
/**
* API 基础 URL
*/
const API_URL = '/api/dataset';
/**
* 获取知识库文档列表
*
* @param datasetId - 知识库 ID
* @param page - 页码,默认 1
* @param limit - 每页数量,默认 20
* @param keyword - 搜索关键词
* @returns 文档列表响应
*/
export async function fetchDocuments(
datasetId: string,
page: number = 1,
limit: number = 20,
keyword?: string
): Promise<DocumentsResponse> {
const params = new URLSearchParams({
page: page.toString(),
limit: limit.toString(),
});
if (keyword) {
params.append('keyword', keyword);
}
console.log('[Dataset Client] 获取文档列表:', { datasetId, page, limit, keyword });
const response = await axios.get<DocumentsResponse>(
`${API_URL}/datasets/${datasetId}/documents?${params}`,
{ withCredentials: true }
);
return response.data;
}
/**
* 获取单个文档详情
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @returns 文档详情
*/
export async function fetchDocument(
datasetId: string,
documentId: string
): Promise<Document> {
const response = await axios.get<Document>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}`,
{ withCredentials: true }
);
return response.data;
}
/**
* 删除文档
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @returns 操作结果
*/
export async function deleteDocument(
datasetId: string,
documentId: string
): Promise<OperationResult> {
console.log('[Dataset Client] 删除文档:', { datasetId, documentId });
const response = await axios.delete<OperationResult>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}`,
{ withCredentials: true }
);
return response.data;
}
/**
* 启用/禁用文档
* Dify API: PATCH /datasets/{dataset_id}/documents/status/{action}
* action: enable / disable / archive / un_archive
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @param enabled - 是否启用
* @returns 操作结果
*/
export async function toggleDocumentStatus(
datasetId: string,
documentId: string,
enabled: boolean
): Promise<OperationResult> {
const action = enabled ? 'enable' : 'disable';
console.log('[Dataset Client] 切换文档状态:', { datasetId, documentId, action });
const response = await axios.patch<OperationResult>(
`${API_URL}/datasets/${datasetId}/documents/status/${action}`,
{ document_ids: [documentId] },
{
headers: { 'Content-Type': 'application/json' },
withCredentials: true,
}
);
return response.data;
}
/**
* 上传文件到知识库(使用默认配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
* @param onProgress - 上传进度回调
* @returns 创建的文档信息
*/
export async function uploadDocument(
datasetId: string,
file: File,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
return uploadDocumentWithConfig(
datasetId,
file,
{
indexing_technique: 'high_quality',
process_rule: { mode: 'automatic' },
},
onProgress
);
}
/**
* 上传文件到知识库(使用自定义配置)
*
* @param datasetId - 知识库 ID
* @param file - 文件对象
* @param config - 上传配置(索引模式、分段规则等)
* @param onProgress - 上传进度回调
* @returns 创建的文档信息,包含 batch 用于查询索引进度
*/
export async function uploadDocumentWithConfig(
datasetId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 上传文档:', {
datasetId,
fileName: file.name,
config,
});
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents`,
formData,
{
withCredentials: true,
onUploadProgress: (progressEvent) => {
if (progressEvent.total && onProgress) {
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
onProgress(percent);
}
},
}
);
return response.data;
}
/**
* 获取文档嵌入状态(索引进度)
*
* @param datasetId - 知识库 ID
* @param batch - 上传文档的批次号
* @returns 索引状态列表
*/
export async function fetchIndexingStatus(
datasetId: string,
batch: string
): Promise<IndexingStatusResponse> {
console.log('[Dataset Client] 获取索引状态:', { datasetId, batch });
const response = await axios.get<IndexingStatusResponse>(
`${API_URL}/datasets/${datasetId}/documents/${batch}/indexing-status`,
{ withCredentials: true }
);
return response.data;
}
/**
* 获取文档上传文件信息
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @returns 上传文件信息
*/
export async function fetchUploadFileInfo(
datasetId: string,
documentId: string
): Promise<UploadFileInfo> {
console.log('[Dataset Client] 获取上传文件信息:', { datasetId, documentId });
const response = await axios.get<UploadFileInfo>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}/upload-file`,
{ withCredentials: true }
);
return response.data;
}
/**
* 下载文档原始文件
* 通过代理路由下载 Dify 知识库中的原始文件
*
* @param uploadFileInfo - 上传文件信息(从 fetchUploadFileInfo 获取)
* @returns File 对象
*/
export async function downloadOriginalFile(
uploadFileInfo: UploadFileInfo
): Promise<File> {
if (!uploadFileInfo.download_url) {
throw new Error('无法获取原始文件下载地址');
}
// download_url 格式: /files/xxx/file-preview?...
// 转换为代理路由: /api/dataset/dify-files/xxx/file-preview?...
const downloadPath = uploadFileInfo.download_url.replace(/^\/files\//, '');
const proxyUrl = `${API_URL}/dify-files/${downloadPath}`;
console.log('[Dataset Client] 下载原始文件:', {
originalUrl: uploadFileInfo.download_url,
proxyUrl,
});
const response = await axios.get(proxyUrl, {
responseType: 'blob',
withCredentials: true,
});
const file = new File(
[response.data],
uploadFileInfo.name || 'document',
{ type: uploadFileInfo.mime_type || 'application/octet-stream' }
);
return file;
}
/**
* 预处理规则 ID
*/
export type PreProcessingRuleId = 'remove_extra_spaces' | 'remove_urls_emails';
/**
* 预处理规则配置
*/
export interface PreProcessingRule {
id: PreProcessingRuleId;
enabled: boolean;
}
/**
* 分段配置
*/
export interface SegmentationConfig {
separator: string;
max_tokens: number;
chunk_overlap?: number;
}
/**
* 自定义处理规则
*/
export interface CustomRules {
pre_processing_rules?: PreProcessingRule[];
segmentation?: SegmentationConfig;
}
/**
* 文档处理规则配置
*/
export interface ProcessRule {
mode: 'automatic' | 'custom';
rules?: CustomRules;
}
/**
* 更新文档设置参数
*/
export interface UpdateDocumentSettings {
indexing_technique?: 'high_quality' | 'economy';
process_rule?: ProcessRule;
}
/**
* 上传文档配置参数
*/
export interface UploadDocumentConfig {
indexing_technique: 'high_quality' | 'economy';
process_rule: ProcessRule;
}
/**
* 上传文档响应
*/
export interface UploadDocumentResponse {
document: {
id: string;
position: number;
data_source_type: string;
data_source_info: {
upload_file_id: string;
};
dataset_process_rule_id: string;
name: string;
created_from: string;
created_by: string;
created_at: number;
tokens: number;
indexing_status: string;
error: string | null;
enabled: boolean;
disabled_at: number | null;
disabled_by: string | null;
archived: boolean;
display_status: string;
word_count: number;
hit_count: number;
doc_form: string;
};
batch: string;
}
/**
* 更新文档设置并重新处理
* 注意:Dify API 不直接支持修改已有文档的分段设置
* 此函数尝试通过更新接口应用新设置
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @param settings - 更新设置
* @returns 操作结果
*/
export async function updateDocumentWithSettings(
datasetId: string,
documentId: string,
settings: UpdateDocumentSettings
): Promise<OperationResult> {
console.log('[Dataset Client] 更新文档设置:', { datasetId, documentId, settings });
const response = await axios.post<OperationResult>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-settings`,
settings,
{
headers: { 'Content-Type': 'application/json' },
withCredentials: true,
}
);
return response.data;
}
/**
* 通过文件更新文档
* Dify API: POST /datasets/{dataset_id}/documents/{document_id}/update-by-file
*
* 用于在用户修改分段参数后,使用同一文件重新处理文档
*
* @param datasetId - 知识库 ID
* @param documentId - 文档 ID
* @param file - 文件对象(使用原上传的文件)
* @param config - 新的分段配置
* @param onProgress - 上传进度回调
* @returns 更新后的文档信息,包含新的 batch 用于查询索引进度
*/
export async function updateDocumentByFile(
datasetId: string,
documentId: string,
file: File,
config: UploadDocumentConfig,
onProgress?: (percent: number) => void
): Promise<UploadDocumentResponse> {
const formData = new FormData();
formData.append('file', file);
formData.append('data', JSON.stringify(config));
console.log('[Dataset Client] 通过文件更新文档:', {
datasetId,
documentId,
fileName: file.name,
config,
});
const response = await axios.post<UploadDocumentResponse>(
`${API_URL}/datasets/${datasetId}/documents/${documentId}/update-by-file`,
formData,
{
withCredentials: true,
onUploadProgress: (progressEvent) => {
if (progressEvent.total && onProgress) {
const percent = Math.round((progressEvent.loaded * 100) / progressEvent.total);
onProgress(percent);
}
},
}
);
return response.data;
}