Merge branch 'PingChuan' into shiy-login

This commit is contained in:
2025-12-05 00:05:40 +08:00
8 changed files with 392 additions and 96 deletions
+39
View File
@@ -225,6 +225,44 @@ export async function fetchUploadFileInfo(
return response.data;
}
/**
* 下载文档原始文件
* 通过代理路由下载 Dify 知识库中的原始文件
*
* @param uploadFileInfo - 上传文件信息(从 fetchUploadFileInfo 获取)
* @returns File 对象
*/
export async function downloadOriginalFile(
uploadFileInfo: UploadFileInfo
): Promise<File> {
if (!uploadFileInfo.download_url) {
throw new Error('无法获取原始文件下载地址');
}
// download_url 格式: /files/xxx/file-preview?...
// 转换为代理路由: /api/dataset/dify-files/xxx/file-preview?...
const downloadPath = uploadFileInfo.download_url.replace(/^\/files\//, '');
const proxyUrl = `${API_URL}/dify-files/${downloadPath}`;
console.log('[Dataset Client] 下载原始文件:', {
originalUrl: uploadFileInfo.download_url,
proxyUrl,
});
const response = await axios.get(proxyUrl, {
responseType: 'blob',
withCredentials: true,
});
const file = new File(
[response.data],
uploadFileInfo.name || 'document',
{ type: uploadFileInfo.mime_type || 'application/octet-stream' }
);
return file;
}
/**
* 预处理规则 ID
*/
@@ -244,6 +282,7 @@ export interface PreProcessingRule {
export interface SegmentationConfig {
separator: string;
max_tokens: number;
chunk_overlap?: number;
}
/**
@@ -3,12 +3,12 @@ import {
Button,
InputNumber,
Checkbox,
Select,
Card,
Empty,
Spin,
Divider,
Tooltip,
Progress,
} from 'antd';
import {
QuestionCircleOutlined,
@@ -17,6 +17,7 @@ import {
} from '@ant-design/icons';
import { useDocumentDetail } from '~/hooks/dify-dataset-manager/document-detail';
import type { DocumentDetailProps } from '~/types/dify-dataset-manager/document-detail';
import { INDEXING_STATUS_CONFIG } from '~/types/dify-dataset-manager/document-detail';
/**
* 文档详情组件
@@ -32,6 +33,8 @@ export default function DocumentDetail({
previewLoading,
showPreview,
saving,
isProcessing,
indexingStatus,
updateSettings,
handleReset,
handlePreview,
@@ -56,7 +59,7 @@ export default function DocumentDetail({
<h3 className="section-title"></h3>
{/* 分块模式 */}
<div className="setting-item mode-selector">
{/* <div className="setting-item mode-selector">
<div className="mode-option active">
<div className="mode-icon">
<i className="ri-text-spacing"></i>
@@ -66,13 +69,13 @@ export default function DocumentDetail({
<span className="mode-desc">通用文本分块模式,检索和召回的块是相同的</span>
</div>
</div>
</div>
</div> */}
{/* 分段标识符 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="系统会在遇到指定分隔符时自动分段,默认值为 \n\n(按段落分段)">
<Tooltip title="分隔符是用于分隔文本的字符。\n\n和 \n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\n\n,\n)当段落超过最大块长度时,会按行进行分割。你也可以使用自定义的特殊分隔符(例如 ***)">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
@@ -80,6 +83,7 @@ export default function DocumentDetail({
value={settings.separator}
onChange={(e) => updateSettings('separator', e.target.value)}
placeholder="\n\n"
disabled={isProcessing}
className="setting-input"
/>
</div>
@@ -98,6 +102,28 @@ export default function DocumentDetail({
onChange={(value) => updateSettings('maxTokens', value || 500)}
min={100}
max={4000}
disabled={isProcessing}
className="setting-input-number"
/>
<span className="input-suffix">characters</span>
</div>
</div>
{/* 分段重叠长度 */}
<div className="setting-item">
<label className="setting-label">
<Tooltip title="设置分段之间的重叠长度可以保留分段之间的语义关系,提升召回效果建议设置为最大分段长度的10%-25%">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
<div className="setting-input-with-suffix">
<InputNumber
value={settings.chunkOverlap}
onChange={(value) => updateSettings('chunkOverlap', value || 50)}
min={0}
max={500}
disabled={isProcessing}
className="setting-input-number"
/>
<span className="input-suffix">characters</span>
@@ -115,6 +141,7 @@ export default function DocumentDetail({
<Checkbox
checked={settings.removeExtraSpaces}
onChange={(e) => updateSettings('removeExtraSpaces', e.target.checked)}
disabled={isProcessing}
>
</Checkbox>
@@ -122,6 +149,7 @@ export default function DocumentDetail({
<Checkbox
checked={settings.removeUrlsEmails}
onChange={(e) => updateSettings('removeUrlsEmails', e.target.checked)}
disabled={isProcessing}
>
URL
</Checkbox>
@@ -130,27 +158,25 @@ export default function DocumentDetail({
<Divider />
{/* Q&A 分段 */}
{/* 索引方式 */}
<div className="settings-section">
<div className="qa-segment-row">
<Checkbox
checked={settings.useQASegment}
onChange={(e) => updateSettings('useQASegment', e.target.checked)}
<h3 className="section-title"></h3>
<div className="index-options">
<div
className={`index-option ${settings.indexingTechnique === 'high_quality' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateSettings('indexingTechnique', 'high_quality')}
>
使 Q&A
</Checkbox>
<Select
value={settings.qaLanguage}
onChange={(value) => updateSettings('qaLanguage', value)}
disabled={!settings.useQASegment}
style={{ width: 120 }}
options={[
{ value: 'Chinese', label: 'Chinese' },
{ value: 'English', label: 'English' },
{ value: 'Japanese', label: 'Japanese' },
{ value: 'Korean', label: 'Korean' },
]}
/>
<span className="option-radio"></span>
<span className="option-label"></span>
<span className="option-badge recommended"></span>
</div>
<div
className={`index-option ${settings.indexingTechnique === 'economy' ? 'active' : ''} ${isProcessing ? 'disabled' : ''}`}
onClick={() => !isProcessing && updateSettings('indexingTechnique', 'economy')}
>
<span className="option-radio"></span>
<span className="option-label"></span>
</div>
</div>
</div>
@@ -160,12 +186,14 @@ export default function DocumentDetail({
icon={<EyeOutlined />}
onClick={handlePreview}
loading={previewLoading}
disabled={isProcessing}
>
</Button>
<Button
icon={<ReloadOutlined />}
onClick={handleReset}
disabled={isProcessing}
>
</Button>
@@ -179,12 +207,10 @@ export default function DocumentDetail({
type="primary"
onClick={handleSaveAndProcess}
loading={saving}
disabled={isProcessing}
block
>
</Button>
<Button block style={{ marginTop: 8 }}>
{isProcessing ? '处理中...' : '保存并处理'}
</Button>
</div>
</div>
@@ -195,12 +221,7 @@ export default function DocumentDetail({
title={
<div className="preview-header">
<span></span>
<Select
value={document.name}
style={{ width: 200 }}
disabled
options={[{ value: document.name, label: document.name }]}
/>
<div>{document.name}</div>
<span className="segment-count">
{showPreview ? `${previewSegments.length} 段块` : '0 段块'}
</span>
@@ -208,36 +229,59 @@ export default function DocumentDetail({
}
className="preview-card"
>
{previewLoading ? (
<div className="preview-loading">
<Spin size="large" />
<div className="loading-text">...</div>
</div>
) : !showPreview ? (
<div className="preview-empty">
<div className="empty-icon">
<EyeOutlined />
{/* 处理进度显示 */}
{isProcessing && indexingStatus && (
<div className="processing-status">
<div className="processing-title">
<Spin size="small" />
<span>...</span>
</div>
<Progress
percent={INDEXING_STATUS_CONFIG[indexingStatus]?.percent || 0}
status="active"
strokeColor={{ '0%': '#00684a', '100%': '#52c41a' }}
/>
<div className="status-text">
{INDEXING_STATUS_CONFIG[indexingStatus]?.text || '处理中...'}
</div>
<p>"预览块"</p>
</div>
) : previewSegments.length === 0 ? (
<Empty description="暂无分段数据" />
) : (
<div className="preview-segments">
{previewSegments.map((segment, index) => (
<div key={segment.id} className="segment-item">
<div className="segment-header">
<span className="segment-index">#{index + 1}</span>
<span className="segment-chars">
{segment.word_count}
</span>
</div>
<div className="segment-content">
{segment.content}
</div>
)}
{/* 预览内容 */}
{!isProcessing && (
<>
{previewLoading ? (
<div className="preview-loading">
<Spin size="large" />
<div className="loading-text">...</div>
</div>
))}
</div>
) : !showPreview ? (
<div className="preview-empty">
<div className="empty-icon">
<EyeOutlined />
</div>
<p>"预览块"</p>
</div>
) : previewSegments.length === 0 ? (
<Empty description="暂无分段数据" />
) : (
<div className="preview-segments">
{previewSegments.map((segment, index) => (
<div key={segment.id} className="segment-item">
<div className="segment-header">
<span className="segment-index">#{index + 1}</span>
<span className="segment-chars">
{segment.word_count}
</span>
</div>
<div className="segment-content">
{segment.content}
</div>
</div>
))}
</div>
)}
</>
)}
</Card>
</div>
@@ -1,29 +1,29 @@
import {
Button,
Input,
Table,
Tag,
Space,
Tooltip,
Popconfirm,
Switch,
Empty,
Spin,
} from 'antd';
import {
SearchOutlined,
ReloadOutlined,
CloudUploadOutlined,
DeleteOutlined,
FileTextOutlined,
CloudUploadOutlined,
EyeOutlined,
ReloadOutlined,
SearchOutlined,
UnorderedListOutlined
} from '@ant-design/icons';
import {
Button,
Empty,
Input,
Popconfirm,
Space,
Spin,
Switch,
Table,
Tag,
Tooltip,
} from 'antd';
import type { ColumnsType } from 'antd/es/table';
import type { Document, IndexingStatus } from '~/api/dify-dataset/type/documentTypes';
import { useDocumentList } from '~/hooks/dify-dataset-manager/document-list';
import type { DocumentListProps } from '~/types/dify-dataset-manager/document-list';
import DocumentUpload from './document-upload';
import '../../styles/components/dify-dataset-manager/index.css';
import DocumentUpload from './document-upload';
/**
* 文档列表组件
@@ -128,11 +128,11 @@ export default function DocumentList({
width: 120,
render: (_, record) => (
<Space size="small">
<Tooltip title="查看分段">
<Tooltip title="分段设置">
<Button
type="text"
size="small"
icon={<EyeOutlined />}
icon={<UnorderedListOutlined />}
onClick={() => onViewDocument?.(record)}
/>
</Tooltip>
@@ -237,7 +237,7 @@ export default function DocumentUpload({
<div className="setting-item">
<label className="setting-label">
<Tooltip title="相邻分段之间重叠的字符数,有助于保持上下文连贯性">
<Tooltip title="设置分段之间重叠长度可以保留分段之间的语义关系,提升召回效果建议设置为最大分段长度的10%-25%">
<QuestionCircleOutlined className="help-icon" />
</Tooltip>
</label>
@@ -1,8 +1,13 @@
import { message } from 'antd';
import { useCallback, useEffect, useState } from 'react';
import { updateDocumentWithSettings } from '~/api/dify-dataset/api/documentApi';
import { useCallback, useEffect, useRef, useState } from 'react';
import {
fetchUploadFileInfo,
downloadOriginalFile,
updateDocumentByFile,
fetchIndexingStatus,
} from '~/api/dify-dataset/api/documentApi';
import { fetchSegments } from '~/api/dify-dataset/api/segmentApi';
import type { Segment } from '~/api/dify-dataset/type';
import type { Segment, IndexingStatus } from '~/api/dify-dataset/type';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
import type { DocumentDetailSegmentationSettings } from '~/types/dify-dataset-manager/document-detail';
import { DEFAULT_DOCUMENT_DETAIL_SETTINGS } from '~/types/dify-dataset-manager/document-detail';
@@ -22,6 +27,73 @@ export function useDocumentDetail(datasetId: string, document: Document | null)
// 保存状态
const [saving, setSaving] = useState(false);
// 处理状态(嵌入进度)
const [isProcessing, setIsProcessing] = useState(false);
const [indexingStatus, setIndexingStatus] = useState<IndexingStatus | null>(null);
const pollingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
/**
* 停止轮询
*/
const stopPolling = useCallback(() => {
if (pollingTimerRef.current) {
clearInterval(pollingTimerRef.current);
pollingTimerRef.current = null;
}
}, []);
/**
* 轮询索引状态
*/
const pollIndexingStatus = useCallback(async (batch: string) => {
try {
const response = await fetchIndexingStatus(datasetId, batch);
const status = response.data?.[0];
if (status) {
setIndexingStatus(status.indexing_status);
if (status.indexing_status === 'completed') {
// 停止轮询
stopPolling();
setIsProcessing(false);
message.success('文档处理完成');
// 刷新分段预览
setPreviewLoading(true);
try {
const segmentResponse = await fetchSegments(datasetId, document?.id || '', 1, 50);
setPreviewSegments(segmentResponse.data || []);
setShowPreview(true);
} catch (err) {
console.error('刷新分段失败:', err);
} finally {
setPreviewLoading(false);
}
} else if (status.indexing_status === 'error') {
// 停止轮询
stopPolling();
setIsProcessing(false);
message.error(status.error || '处理失败');
}
}
} catch (err) {
console.error('获取索引状态失败:', err);
}
}, [datasetId, document?.id, stopPolling]);
/**
* 开始轮询
*/
const startPolling = useCallback((batch: string) => {
stopPolling();
pollingTimerRef.current = setInterval(() => {
pollIndexingStatus(batch);
}, 2000);
// 立即执行一次
pollIndexingStatus(batch);
}, [stopPolling, pollIndexingStatus]);
// 当文档变化时重置设置
useEffect(() => {
if (document) {
@@ -29,8 +101,18 @@ export function useDocumentDetail(datasetId: string, document: Document | null)
setSettings(DEFAULT_DOCUMENT_DETAIL_SETTINGS);
setPreviewSegments([]);
setShowPreview(false);
setIsProcessing(false);
setIndexingStatus(null);
stopPolling();
}
}, [document?.id]);
}, [document?.id, stopPolling]);
// 组件卸载时清理定时器
useEffect(() => {
return () => {
stopPolling();
};
}, [stopPolling]);
/**
* 更新设置
@@ -73,14 +155,27 @@ export function useDocumentDetail(datasetId: string, document: Document | null)
/**
* 保存并处理
* 流程:获取原始文件 → 下载 → 用新参数重新上传 → 轮询嵌入状态
*/
const handleSaveAndProcess = useCallback(async () => {
if (!document) return;
setIsProcessing(true);
setSaving(true);
setIndexingStatus('waiting');
try {
await updateDocumentWithSettings(datasetId, document.id, {
indexing_technique: 'high_quality',
// 1. 获取原始文件信息
message.loading({ content: '正在获取原始文件信息...', key: 'save-process' });
const uploadFileInfo = await fetchUploadFileInfo(datasetId, document.id);
// 2. 下载原始文件(通过代理路由)
message.loading({ content: '正在下载原始文件...', key: 'save-process' });
const file = await downloadOriginalFile(uploadFileInfo);
// 3. 用新参数重新上传
message.loading({ content: '正在应用新配置并重新处理...', key: 'save-process' });
const result = await updateDocumentByFile(datasetId, document.id, file, {
indexing_technique: settings.indexingTechnique,
process_rule: {
mode: 'custom',
rules: {
@@ -91,18 +186,25 @@ export function useDocumentDetail(datasetId: string, document: Document | null)
segmentation: {
separator: settings.separator.replace(/\\n/g, '\n'),
max_tokens: settings.maxTokens,
chunk_overlap: settings.chunkOverlap,
},
},
},
});
message.success('设置已保存,文档正在重新处理...');
message.success({ content: '文档正在处理中...', key: 'save-process' });
// 4. 开始轮询嵌入状态
startPolling(result.batch);
} catch (err: any) {
console.error('保存设置失败:', err);
message.error(err.message || '保存失败');
message.error({ content: err.message || '保存失败', key: 'save-process' });
setIsProcessing(false);
setIndexingStatus(null);
} finally {
setSaving(false);
}
}, [datasetId, document, settings]);
}, [datasetId, document, settings, startPolling]);
return {
// 状态
@@ -111,6 +213,8 @@ export function useDocumentDetail(datasetId: string, document: Document | null)
previewLoading,
showPreview,
saving,
isProcessing,
indexingStatus,
// 方法
updateSettings,
+80
View File
@@ -0,0 +1,80 @@
import { type LoaderFunctionArgs } from '@remix-run/node';
import { API_BASE_URL } from '~/config/api-config';
/**
* GET /api/dataset/dify-files/*
* 代理 Dify 文件下载请求
*
* 用于下载 Dify 知识库中的原始文件
* 将 /api/dataset/dify-files/xxx 转发到 Dify 的 /files/xxx
*/
export async function loader({ request, params }: LoaderFunctionArgs) {
try {
const { getUserSession } = await import("~/api/login/auth.server");
const { frontendJWT } = await getUserSession(request);
if (!frontendJWT) {
return new Response(
JSON.stringify({ error: 'JWT认证失败,请重新登录' }),
{ status: 401, headers: { 'Content-Type': 'application/json' } }
);
}
// 获取完整路径(包含查询参数)
const url = new URL(request.url);
const filePath = params['*'] || '';
const queryString = url.search;
console.log('[API] Dify File Proxy:', { filePath, queryString });
// 构建 Dify 文件下载 URL
// 使用专门的文件下载代理路由 /dify_file/
// 因为 Dify 文件 API (/files/) 不需要 /v1 前缀,与其他 API 不同
const difyFileUrl = `${API_BASE_URL}/dify_file/${filePath}${queryString}`;
console.log('[API] Proxying to:', difyFileUrl);
// 转发请求到 Dify
const response = await fetch(difyFileUrl, {
method: 'GET',
headers: {
'Authorization': `Bearer ${frontendJWT}`,
},
});
if (!response.ok) {
console.error('[API] Dify File Proxy - Error:', response.status, response.statusText);
return new Response(
JSON.stringify({ error: `文件下载失败: ${response.statusText}` }),
{ status: response.status, headers: { 'Content-Type': 'application/json' } }
);
}
// 获取文件内容
const fileBuffer = await response.arrayBuffer();
// 返回文件,保持原始的 Content-Type
const contentType = response.headers.get('Content-Type') || 'application/octet-stream';
const contentDisposition = response.headers.get('Content-Disposition');
const headers: Record<string, string> = {
'Content-Type': contentType,
};
if (contentDisposition) {
headers['Content-Disposition'] = contentDisposition;
}
return new Response(fileBuffer, {
status: 200,
headers,
});
} catch (error: any) {
console.error('[API] Dify File Proxy - Error:', error.message);
return new Response(
JSON.stringify({ error: error.message || 'Failed to download file' }),
{ status: 500, headers: { 'Content-Type': 'application/json' } }
);
}
}
@@ -1977,6 +1977,17 @@
margin-bottom: 24px;
}
.processing-status .processing-title {
display: flex;
align-items: center;
justify-content: center;
gap: 10px;
font-size: 14px;
font-weight: 500;
color: #1a1a1a;
margin-bottom: 16px;
}
.processing-file {
display: flex;
align-items: center;
@@ -1,4 +1,4 @@
import type { Segment } from '~/api/dify-dataset/type';
import type { Segment, IndexingStatus } from '~/api/dify-dataset/type';
import type { Document } from '~/api/dify-dataset/type/documentTypes';
/**
@@ -11,20 +11,22 @@ export interface DocumentDetailProps {
/**
* 分段设置配置(文档详情专用)
* 注意:Dify API 支持的参数有限
* 注意:update-by-file API 支持的参数
* - separator: ✅ 支持
* - maxTokens: ✅ 支持
* - chunkOverlap: ✅ 支持(分段重叠长度)
* - removeExtraSpaces: ✅ 支持
* - removeUrlsEmails: ✅ 支持
* - useQASegment: ⚠️ 需要 doc_form: "qa_model"
* - indexingTechnique: ✅ 支持(high_quality/economy
* - doc_form/doc_language: ❌ 不支持(仅 create-by-file 支持)
*/
export interface DocumentDetailSegmentationSettings {
separator: string;
maxTokens: number;
chunkOverlap: number;
removeExtraSpaces: boolean;
removeUrlsEmails: boolean;
useQASegment: boolean;
qaLanguage: string;
indexingTechnique: 'high_quality' | 'economy';
}
/**
@@ -33,10 +35,10 @@ export interface DocumentDetailSegmentationSettings {
export const DEFAULT_DOCUMENT_DETAIL_SETTINGS: DocumentDetailSegmentationSettings = {
separator: '\\n\\n',
maxTokens: 500,
chunkOverlap: 50,
removeExtraSpaces: true,
removeUrlsEmails: false,
useQASegment: false,
qaLanguage: 'Chinese',
indexingTechnique: 'high_quality',
};
/**
@@ -48,4 +50,20 @@ export interface DocumentDetailState {
previewLoading: boolean;
showPreview: boolean;
saving: boolean;
isProcessing: boolean;
indexingStatus: IndexingStatus | null;
}
/**
* 索引状态配置
*/
export const INDEXING_STATUS_CONFIG: Record<IndexingStatus, { text: string; percent: number }> = {
waiting: { text: '等待处理...', percent: 10 },
parsing: { text: '解析文档...', percent: 30 },
cleaning: { text: '清洗文本...', percent: 50 },
splitting: { text: '分段处理...', percent: 70 },
indexing: { text: '建立索引...', percent: 85 },
completed: { text: '处理完成', percent: 100 },
paused: { text: '已暂停', percent: 0 },
error: { text: '处理失败', percent: 0 },
};