From 90317d2b4bae0fd9f920d077b4e87e1e6fa7e005 Mon Sep 17 00:00:00 2001 From: awen Date: Wed, 9 Apr 2025 02:25:49 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8A=BD=E5=8F=96=E8=AE=BE=E7=BD=AE=E4=BC=98?= =?UTF-8?q?=E5=8C=96-=E6=9C=AA=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../rules/new/ExtractionSettings.tsx | 1262 +++++++++++------ 1 file changed, 799 insertions(+), 463 deletions(-) diff --git a/app/components/rules/new/ExtractionSettings.tsx b/app/components/rules/new/ExtractionSettings.tsx index e8e8b3a..bc064c7 100644 --- a/app/components/rules/new/ExtractionSettings.tsx +++ b/app/components/rules/new/ExtractionSettings.tsx @@ -1,7 +1,14 @@ -import { useState, KeyboardEvent, FormEvent, useContext, useEffect, useCallback, useRef } from 'react'; -import { RuleContext } from '~/contexts/RuleContext'; -import { processFieldName } from '~/utils'; -import type { PromptType, VLMFieldType } from '~/models/evaluation_points'; +import React, { + useState, + KeyboardEvent, + FormEvent, + useContext, + useEffect, + useCallback, + useRef, +} from "react"; +import { RuleContext } from "~/contexts/RuleContext"; +import type { VLMFieldType } from "~/models/evaluation_points"; // 定义通知函数的类型 type NotifyFn = (data: Record) => void; @@ -9,30 +16,56 @@ type NotifyFn = (data: Record) => void; // 添加防抖工具函数,使用简单的函数类型 const debounce = (fn: NotifyFn, ms = 300): NotifyFn => { let timeoutId: ReturnType; - return function(data: Record): void { + return function (data: Record): void { clearTimeout(timeoutId); timeoutId = setTimeout(() => fn(data), ms); }; }; +// 添加类型守卫函数 +const isVlmField = (field: string | VlmField): field is VlmField => { + return typeof field !== "string" && "name" in field && "type" in field; +}; + +// 安全处理字段名提取 - 使用不同的函数名避免冲突 +const safeProcessFieldName = (field: string | VlmField): string => { + if (isVlmField(field)) { + return field.name; // 如果是VlmField类型,直接返回name属性 + } else { + // 如果是字符串,假设格式为 name_type,尝试分割 + const parts = field.split("_"); + return parts[0]; // 返回字段名部分 + } +}; + /** * ExtractionSettings 组件 - * + * * 功能: * - 提供三种抽取设置方式:大模型抽取、多模态抽取和正则抽取 * - 允许在三个标签页中添加不同类型的字段 * - 统一的更新机制,确保点击"更新全部字段"按钮时,所有三种类型的字段都会被收集和更新 - * + * * 优化后的交互逻辑: * 1. 用户可以在三个标签页之间切换,在每个标签页中添加对应类型的字段 * 2. 添加字段后,会自动标记为"有未保存更改"状态 * 3. 无论当前在哪个标签页,点击底部的"更新全部字段"按钮都会收集所有三种类型的字段 * 4. 更新成功后会显示详细的字段数量统计信息,包括每种类型的字段数 * 5. 系统会自动检查字段名重复,确保所有字段名唯一 - * + * * 注意: * - 仅当点击"更新全部字段"按钮后,字段才会真正提交给父组件和规则上下文 * - 用户必须手动点击更新按钮,才能在评查设置中使用这些字段 + * + * 类型定义: + * - LogicType: 'and' | 'or' | 'custom' - 用于评查配置中多个规则的组合逻辑 + * - 'and': 所有规则都必须满足 + * - 'or': 任一规则满足即可 + * - 'custom': 自定义逻辑表达式,如 "(规则1 AND 规则2) OR 规则3" + * + * - LogicOperator: 'and' | 'or' - 用于单个规则内的条件组合 + * - 'and': 规则内所有条件都必须满足 + * - 'or': 规则内任一条件满足即可 */ interface RegexField { @@ -73,91 +106,155 @@ interface ExtractionSettingsProps { fields?: RegexField[]; }; }; - promptTypeOptions?: Array<{value: string, label: string}>; - vlmFieldTypeOptions?: Array<{value: string, label: string}>; + promptTypeOptions?: Array<{ value: string; label: string }>; + vlmFieldTypeOptions?: Array<{ value: string; label: string }>; } -export function ExtractionSettings({ - onChange, +// 更新类型定义 +type FormDataType = { + fields: { + llm: string[]; + vlm: (string | VlmField)[]; + }; + regexFields: RegexField[]; + promptType: { + llm: string; + vlm: string; + }; + promptContent: { + llm: string; + vlm: string; + }; + selectedTemplate: { + llm: string; + vlm: string; + }; +}; + +export function ExtractionSettings({ + onChange, initialData, - promptTypeOptions = [], - vlmFieldTypeOptions = [] + promptTypeOptions = [], + vlmFieldTypeOptions = [], }: ExtractionSettingsProps) { const ruleContext = useContext(RuleContext); - const lastUpdateTimeRef = useRef(0); // 添加一个ref来记录上次更新时间 + + // 核心数据状态 + const [formData, setFormData] = useState({ + // 字段配置 + fields: { + llm: initialData?.llm?.fields ?? [], + vlm: initialData?.vlm?.fields + ? // 处理两种可能的vlm字段格式 + Array.isArray(initialData.vlm.fields) && + initialData.vlm.fields.length > 0 + ? typeof initialData.vlm.fields[0] === "string" + ? initialData.vlm.fields as string[] + : (initialData.vlm.fields as VlmField[]).map((field) => + field.type && field.type !== "default" + ? `${field.name}_${field.type}` + : field.name + ) + : [] + : [], + }, + // 正则字段配置 + regexFields: (initialData?.regex?.fields && initialData.regex.fields.length > 0) + ? [...initialData.regex.fields] + : [{ field: "", pattern: "" }], + // 提示词配置 + promptType: { + llm: initialData?.llm?.prompt_setting?.type ?? "system", + vlm: initialData?.vlm?.prompt_setting?.type ?? "system", + }, + promptContent: { + llm: initialData?.llm?.prompt_setting?.template ?? "", + vlm: initialData?.vlm?.prompt_setting?.template ?? "", + }, + selectedTemplate: { + llm: "", + vlm: "", + }, + }); + + // 为了简化访问,解构出常用字段 + const { fields, regexFields, promptType, promptContent, selectedTemplate } = + formData; + + // UI状态和临时状态 + const [currentTab, setCurrentTab] = useState("llm"); + const [inputValue, setInputValue] = useState({ llm: "", vlm: "" }); + const [selectedFieldType, setSelectedFieldType] = useState("default"); + const [fieldEditStatus, setFieldEditStatus] = useState<{ + [id: string]: boolean; + }>({}); + const [statusMessage, setStatusMessage] = useState<{ + id: string; + message: string; + } | null>(null); + const [updateStatus, setUpdateStatus] = useState<{ + success: boolean; + message: string; + } | null>(null); + const [hasPendingChanges, setHasPendingChanges] = useState(false); + + // 引用值 + const lastUpdateTimeRef = useRef(0); const lastEventFieldsRef = useRef([]); const ignoreEmptyFieldsRef = useRef(false); - // 添加对防抖通知函数的引用,使用具体的函数类型 const debouncedNotifyParentRef = useRef(null); - - const [currentTab, setCurrentTab] = useState('llm'); - const [fields, setFields] = useState<{ [key: string]: Array }>({ - llm: [], - vlm: [], - }); - const [inputValue, setInputValue] = useState({ - llm: '', - vlm: '', - }); - const [selectedFieldType, setSelectedFieldType] = useState('default'); - const [regexFields, setRegexFields] = useState([{ field: '', pattern: '' }]); - const [promptType, setPromptType] = useState({ llm: 'system', vlm: 'system' }); - const [promptContent, setPromptContent] = useState({ llm: '', vlm: '' }); - const [selectedTemplate, setSelectedTemplate] = useState({ llm: '', vlm: '' }); - - // 添加状态记录每个字段的编辑状态 - const [fieldEditStatus, setFieldEditStatus] = useState<{[id: string]: boolean}>({}); - // 添加标记表示正在填写的字段ID const activeFieldRef = useRef(null); - // 添加定时器引用 const fieldEditResetTimeoutRef = useRef(null); - // 添加状态提示显示 - const [statusMessage, setStatusMessage] = useState<{id: string, message: string} | null>(null); - // 添加字段更新状态 - const [updateStatus, setUpdateStatus] = useState<{success: boolean, message: string} | null>(null); - // 添加待更新状态 - const [hasPendingChanges, setHasPendingChanges] = useState(false); // 加载初始数据 useEffect(() => { if (initialData) { - const newFields: { [key: string]: Array } = { - llm: initialData.llm?.fields || [], + const newFields: { llm: string[], vlm: (string | VlmField)[] } = { + llm: initialData.llm?.fields ?? [], vlm: [], }; - + // 处理vlm字段 if (initialData.vlm?.fields) { // 处理两种可能的格式:字符串数组或对象数组 if (Array.isArray(initialData.vlm.fields)) { if (initialData.vlm.fields.length > 0) { - if (typeof initialData.vlm.fields[0] === 'string') { + if (typeof initialData.vlm.fields[0] === "string") { // 如果是字符串数组,直接使用 newFields.vlm = initialData.vlm.fields as string[]; } else { // 如果是对象数组,转换为字符串数组 (name_type 格式) - newFields.vlm = (initialData.vlm.fields as VlmField[]).map(field => - field.type && field.type !== 'default' - ? `${field.name}_${field.type}` - : field.name + newFields.vlm = (initialData.vlm.fields as VlmField[]).map( + (field) => + field.type && field.type !== "default" + ? `${field.name}_${field.type}` + : field.name ); } } } } - - setFields(newFields); - setPromptType({ - llm: initialData.llm?.prompt_setting?.type || 'system', - vlm: initialData.vlm?.prompt_setting?.type || 'system', - }); - setPromptContent({ - llm: initialData.llm?.prompt_setting?.template || '', - vlm: initialData.vlm?.prompt_setting?.template || '', - }); - if (initialData.regex?.fields?.length) { - setRegexFields(initialData.regex.fields); - } + + // 安全地设置表单数据 + setFormData((prev) => ({ + ...prev, + fields: { + ...prev.fields, + llm: newFields.llm, + vlm: newFields.vlm, + }, + promptType: { + llm: initialData.llm?.prompt_setting?.type ?? prev.promptType.llm, + vlm: initialData.vlm?.prompt_setting?.type ?? prev.promptType.vlm, + }, + promptContent: { + llm: initialData.llm?.prompt_setting?.template ?? prev.promptContent.llm, + vlm: initialData.vlm?.prompt_setting?.template ?? prev.promptContent.vlm, + }, + regexFields: initialData.regex?.fields && initialData.regex.fields.length > 0 + ? [...initialData.regex.fields] + : prev.regexFields + })); } }, [initialData]); // 只依赖 initialData,避免 ruleContext 导致频繁触发 @@ -166,16 +263,19 @@ export function ExtractionSettings({ useEffect(() => { // 仅标记有未保存的更改,不立即触发onChange setHasPendingChanges(true); - + // 这些字段变化不会立即反映到父组件,只在点击更新按钮时才会提交 - }, [fields, regexFields]); + }, [formData.fields, formData.regexFields]); // 独立处理父组件传过来的初始数据 useEffect(() => { if (!initialData && ruleContext?.extractionFields?.length > 0) { - setFields((prevFields) => ({ - ...prevFields, - [currentTab]: [...ruleContext.extractionFields], + setFormData((prev) => ({ + ...prev, + fields: { + ...prev.fields, + [currentTab]: [...ruleContext.extractionFields], + }, })); } }, [ruleContext?.extractionFields, currentTab, initialData]); // 依赖具体属性而非整个 ruleContext @@ -184,18 +284,15 @@ export function ExtractionSettings({ const getAllFields = useCallback(() => { // 1. 收集大模型抽取字段 const llm_fields = fields.llm || []; - + // 2. 收集多模态抽取字段(去掉类型后缀) - const vlm_fields = (fields.vlm || []).map((field) => { - // 从字段名_类型格式中提取字段名部分 - return field.split('_')[0]; - }); - + const vlm_fields = (fields.vlm || []).map(safeProcessFieldName); + // 3. 收集正则抽取字段(仅保留有效字段) const regex_fields = regexFields - .filter((field) => field.field && field.field.trim() !== '') + .filter((field) => field.field && field.field.trim() !== "") .map((field) => field.field.trim()); - + // 4. 合并所有字段并确保唯一性(使用Set去重) // 这样即使用户在不同标签页添加了同名字段,最终也只会保留一个 return [...new Set([...llm_fields, ...vlm_fields, ...regex_fields])]; @@ -205,26 +302,28 @@ export function ExtractionSettings({ const isFieldNameExists = useCallback( (fieldName: string, excludeId?: string): boolean => { if (!fieldName || !fieldName.trim()) return false; - + const fieldNameTrimmed = fieldName.trim(); const fieldNameLower = fieldNameTrimmed.toLowerCase(); - + // 获取所有字段(不包括regexFields,这部分单独处理) const llm_fields = fields.llm || []; - const vlm_fields = (fields.vlm || []).map(processFieldName); - + const vlm_fields = (fields.vlm || []).map(safeProcessFieldName); + // 检查是否在其他类型字段中存在 - if (llm_fields.some(f => f.toLowerCase() === fieldNameLower) || - vlm_fields.some(f => f.toLowerCase() === fieldNameLower)) { + if ( + llm_fields.some((f) => f.toLowerCase() === fieldNameLower) || + vlm_fields.some((f) => f.toLowerCase() === fieldNameLower) + ) { return true; } - + // 检查是否在其他正则字段中存在(排除当前正在编辑的字段) const otherRegexFields = regexFields .filter((f) => !excludeId || f.field !== excludeId) - .map((f) => f.field ? f.field.trim() : ''); - - return otherRegexFields.some(f => f.toLowerCase() === fieldNameLower); + .map((f) => (f.field ? f.field.trim() : "")); + + return otherRegexFields.some((f) => f.toLowerCase() === fieldNameLower); }, [fields, regexFields] ); @@ -234,21 +333,23 @@ export function ExtractionSettings({ try { // 收集所有三种类型的字段,无论当前在哪个标签页 // 验证正则字段,只需要字段名有值即可,不要求正则表达式必须有值 - const validRegexFields = regexFields.filter(field => field.field && field.field.trim() !== ''); - + const validRegexFields = regexFields.filter( + (field) => field.field && field.field.trim() !== "" + ); + // 检查字段名称是否重复 const fieldNames = new Map(); let hasDuplicates = false; const duplicateFields: string[] = []; - + // 收集所有字段名 - 不受当前标签页影响,始终收集所有类型的字段 const allFieldNamesList = [ - ...fields.llm, - ...fields.vlm.map(f => processFieldName(f)), - ...validRegexFields.map(f => f.field.trim()) - ].filter(name => name); // 过滤空值 - - allFieldNamesList.forEach(name => { + ...fields.llm, + ...fields.vlm.map((f) => safeProcessFieldName(f)), + ...validRegexFields.map((f) => f.field.trim()), + ].filter((name) => name); // 过滤空值 + + allFieldNamesList.forEach((name) => { const lowercaseName = name.toLowerCase(); fieldNames.set(lowercaseName, (fieldNames.get(lowercaseName) || 0) + 1); if (fieldNames.get(lowercaseName)! > 1) { @@ -256,15 +357,17 @@ export function ExtractionSettings({ duplicateFields.push(name); } }); - + if (hasDuplicates) { setUpdateStatus({ success: false, - message: `发现重复字段: ${[...new Set(duplicateFields)].join(', ')},请修正后再更新` + message: `发现重复字段: ${[...new Set(duplicateFields)].join( + ", " + )},请修正后再更新`, }); return false; } - + // 更新有效的字段列表 - 确保获取所有三种类型的字段 const allFields = getAllFields(); @@ -273,51 +376,53 @@ export function ExtractionSettings({ const vlmCount = fields.vlm.length; const regexCount = validRegexFields.length; const totalCount = allFields.length; - + // 更新ruleContext if (ruleContext?.updateFields) { ruleContext.updateFields(allFields); } - + // 触发父组件的onChange回调 - 始终传递所有三种类型的字段数据 if (onChange) { onChange({ fields: { llm: fields.llm, - vlm: fields.vlm + vlm: fields.vlm, }, regexFields: validRegexFields, allFields, - pendingUpdate: false // 标记已完成更新 + pendingUpdate: false, // 标记已完成更新 }); } - + // 不再使用自定义事件,统一通过Context共享数据 - + // 更新上次发送的字段列表和时间 lastEventFieldsRef.current = [...allFields]; lastUpdateTimeRef.current = Date.now(); - + // 清除待更新状态 setHasPendingChanges(false); - + // 生成更详细的成功消息,列出每种类型的字段数量 setUpdateStatus({ success: true, - message: `已成功更新${totalCount}个字段(大模型字段: ${llmCount},多模态字段: ${vlmCount},正则字段: ${regexCount})` + message: `已成功更新${totalCount}个字段(大模型字段: ${llmCount},多模态字段: ${vlmCount},正则字段: ${regexCount})`, }); - + // 3秒后清除更新状态 setTimeout(() => { setUpdateStatus(null); }, 3000); - + return true; } catch (error) { - console.error('更新字段时出错:', error); + console.error("更新字段时出错:", error); setUpdateStatus({ success: false, - message: `更新失败: ${error instanceof Error ? error.message : '未知错误'}` + message: `更新失败: ${ + error instanceof Error ? error.message : "未知错误" + }`, }); return false; } @@ -326,11 +431,14 @@ export function ExtractionSettings({ // 初始化防抖函数 useEffect(() => { if (onChange) { - debouncedNotifyParentRef.current = debounce((data: Record) => { - onChange(data); - }, 500); // 500ms的防抖延迟 + debouncedNotifyParentRef.current = debounce( + (data: Record) => { + onChange(data); + }, + 500 + ); // 500ms的防抖延迟 } - + return () => { // 组件卸载时清理 debouncedNotifyParentRef.current = null; @@ -338,33 +446,36 @@ export function ExtractionSettings({ }, [onChange]); // 通知父组件的包装函数,使用防抖 - const notifyParent = useCallback((data: Record, immediate = false) => { - if (!onChange) return; - - if (immediate) { - // 对于需要立即响应的操作,直接调用onChange - onChange(data); - } else if (debouncedNotifyParentRef.current) { - // 对于可以延迟处理的操作,使用防抖函数 - debouncedNotifyParentRef.current(data); - } - }, [onChange]); + const notifyParent = useCallback( + (data: Record, immediate = false) => { + if (!onChange) return; + + if (immediate) { + // 对于需要立即响应的操作,直接调用onChange + onChange(data); + } else if (debouncedNotifyParentRef.current) { + // 对于可以延迟处理的操作,使用防抖函数 + debouncedNotifyParentRef.current(data); + } + }, + [onChange] + ); // 修改addField函数,使用防抖通知 - const addField = (type: 'llm' | 'vlm') => { + const addField = (type: "llm" | "vlm") => { const value = inputValue[type].trim(); if (!value) return; const newFields = { ...fields }; - if (type === 'llm') { + if (type === "llm") { // 大模型抽取支持一次性添加多个字段 const fieldsToAdd = value .split(/[\s、,]+/) .map((f) => f.trim()) .filter((f) => f && !isFieldNameExists(f)); if (fieldsToAdd.length === 0) { - alert('所有字段名已存在,请确保字段名称唯一'); + alert("所有字段名已存在,请确保字段名称唯一"); return; } newFields[type] = [...fields[type], ...fieldsToAdd]; @@ -376,290 +487,344 @@ export function ExtractionSettings({ } newFields[type] = [...fields[type], `${value}_${selectedFieldType}`]; } - - setFields(newFields); - setInputValue((prev) => ({ ...prev, [type]: '' })); - + + setFormData((prev) => ({ + ...prev, + fields: newFields, + })); + setInputValue((prev) => ({ ...prev, [type]: "" })); + // 标记有未保存的更改 setHasPendingChanges(true); - + // 添加字段后通知父组件,使用防抖 notifyParent({ fields: newFields, pendingUpdate: true, - allFields: getFieldsWithNewAddition(newFields, type === 'llm' ? value : `${value}_${selectedFieldType}`) + allFields: getFieldsWithNewAddition( + newFields, + type === "llm" ? value : `${value}_${selectedFieldType}` + ), }); }; // 新增辅助函数,计算包含新添加字段的完整字段列表 - const getFieldsWithNewAddition = (fieldsObj: {[key: string]: string[]}, newField: string) => { + const getFieldsWithNewAddition = ( + fieldsObj: { llm: string[]; vlm: (string | VlmField)[] }, + newField: string + ) => { // 收集大模型抽取字段 const llm_fields = fieldsObj.llm || []; - + // 收集多模态抽取字段(去掉类型后缀) - const vlm_fields = (fieldsObj.vlm || []).map((field) => { - // 从字段名_类型格式中提取字段名部分 - return field.split('_')[0]; - }); - + const vlm_fields = (fieldsObj.vlm || []).map(safeProcessFieldName); + // 收集正则抽取字段(仅保留有效字段) const regex_fields = regexFields - .filter((field) => field.field && field.field.trim() !== '') + .filter((field) => field.field && field.field.trim() !== "") .map((field) => field.field.trim()); - + // 添加新字段(处理新字段格式) - const newFieldName = newField.split('_')[0]; - + const newFieldName = newField.split("_")[0]; + // 合并所有字段并确保唯一性(使用Set去重) - return [...new Set([...llm_fields, ...vlm_fields, ...regex_fields, newFieldName])]; + return [ + ...new Set([...llm_fields, ...vlm_fields, ...regex_fields, newFieldName]), + ]; }; // 修改removeField函数,使用防抖通知 - const removeField = (type: 'llm' | 'vlm', index: number) => { + const removeField = (type: "llm" | "vlm", index: number) => { const newFields = { ...fields }; - const tempFields = [...fields[type]]; - // 保存被删除的字段,以便从allFields中移除 - const removedField = tempFields[index]; - tempFields.splice(index, 1); - newFields[type] = tempFields; - - setFields(newFields); - + + if (type === "llm") { + const tempFields = [...fields.llm]; + const removedField = tempFields[index]; + tempFields.splice(index, 1); + newFields.llm = tempFields; + + setFormData((prev) => ({ + ...prev, + fields: { + ...prev.fields, + llm: tempFields, + }, + })); + + // 删除字段后通知父组件,使用防抖 + notifyParent({ + fields: newFields, + pendingUpdate: true, + allFields: getFieldsWithRemoval(newFields, removedField), + }); + } else { + // vlm类型 + const tempFields = [...fields.vlm]; + const removedField = tempFields[index]; + tempFields.splice(index, 1); + newFields.vlm = tempFields; + + setFormData((prev) => ({ + ...prev, + fields: { + ...prev.fields, + vlm: tempFields, + }, + })); + + // 删除字段后通知父组件,使用防抖 + notifyParent({ + fields: newFields, + pendingUpdate: true, + allFields: getFieldsWithRemoval(newFields, removedField), + }); + } + // 标记有未保存的更改 setHasPendingChanges(true); - - // 删除字段后通知父组件,使用防抖 - notifyParent({ - fields: newFields, - pendingUpdate: true, - allFields: getFieldsWithRemoval(newFields, type === 'llm' ? removedField : removedField.split('_')[0]) - }); }; // 新增辅助函数,计算移除字段后的完整字段列表 - const getFieldsWithRemoval = (fieldsObj: {[key: string]: string[]}, removedField: string) => { + const getFieldsWithRemoval = ( + fieldsObj: { llm: string[]; vlm: (string | VlmField)[] }, + removedField: string | VlmField + ) => { // 收集大模型抽取字段 const llm_fields = fieldsObj.llm || []; - + // 收集多模态抽取字段(去掉类型后缀) - const vlm_fields = (fieldsObj.vlm || []).map((field) => { - // 从字段名_类型格式中提取字段名部分 - return field.split('_')[0]; - }); - + const vlm_fields = (fieldsObj.vlm || []).map(safeProcessFieldName); + // 收集正则抽取字段(仅保留有效字段) const regex_fields = regexFields - .filter((field) => field.field && field.field.trim() !== '') + .filter((field) => field.field && field.field.trim() !== "") .map((field) => field.field.trim()); - - // 移除字段处理:如果是多模态字段,提取字段名部分 - const fieldToRemove = removedField.split('_')[0]; - + + // 移除字段处理 + const fieldToRemove = safeProcessFieldName(removedField); + // 合并所有字段并确保唯一性(使用Set去重) - const allFields = [...new Set([...llm_fields, ...vlm_fields, ...regex_fields])]; - return allFields.filter(field => field !== fieldToRemove); + const allFields = [ + ...new Set([...llm_fields, ...vlm_fields, ...regex_fields]), + ]; + return allFields.filter((field) => field !== fieldToRemove); }; // 修改addRegexFieldRow函数,使用防抖通知 const addRegexFieldRow = () => { // 使用时间戳和随机数生成唯一ID const newId = `regex_${Date.now()}_${Math.floor(Math.random() * 100000)}`; - + // 设置标记表示正在添加新字段,临时忽略空字段检查 ignoreEmptyFieldsRef.current = true; // 记录正在添加的字段ID activeFieldRef.current = newId; // 标记新字段为编辑状态 - setFieldEditStatus(prev => ({ ...prev, [newId]: true })); - + setFieldEditStatus((prev) => ({ ...prev, [newId]: true })); + // 添加空字段但不会立即触发验证和更新 - setRegexFields(prev => { - const newFields = [...prev, { field: '', pattern: '' }]; - - // 延迟聚焦到新添加的字段 - setTimeout(() => { - const input = document.getElementById(`regex-field-name-${newId}`); - if (input) { - input.focus(); - } - - // 设置一个非常长的超时时间,确保字段不会因为空值而被自动删除 - // 用户需要手动点击更新按钮才会处理这些字段 - setTimeout(() => { - ignoreEmptyFieldsRef.current = false; - }, 3600000); // 设置为1小时,基本上确保用户有足够的时间完成编辑 - }, 50); - - return newFields; - }); - + setFormData((prev) => ({ + ...prev, + regexFields: [...prev.regexFields, { field: "", pattern: "" }], + })); + // 手动触发一次onChange,确保父组件知道我们添加了新字段 // 但不触发完整的字段验证和更新,此处立即通知,不使用防抖 - notifyParent({ - regexFields: [...regexFields, { field: '', pattern: '' }], - pendingUpdate: true // 标记有待更新的内容 - }, true); + notifyParent( + { + regexFields: [...regexFields, { field: "", pattern: "" }], + pendingUpdate: true, // 标记有待更新的内容 + }, + true + ); }; const removeRegexFieldRow = (id: string) => { if (regexFields.length <= 1) return; - + // 先保存更新前的状态,以便通知父组件 - const updatedRegexFields = regexFields.filter((field) => field.field !== id); - - setRegexFields(updatedRegexFields); - + const updatedRegexFields = regexFields.filter( + (field) => field.field !== id + ); + + setFormData((prev) => ({ + ...prev, + regexFields: updatedRegexFields, + })); + // 标记有未保存的更改 setHasPendingChanges(true); - + // 删除字段后通知父组件,使用立即通知模式确保立即删除 - notifyParent({ - regexFields: updatedRegexFields, - pendingUpdate: true, - allFields: getAllFields().filter(field => { - // 找到被删除的字段名 - const deletedField = regexFields.find(f => f.field === id); - return deletedField ? field !== deletedField.field.trim() : true; - }) - }, true); // 使用立即通知,确保字段立即删除 + notifyParent( + { + regexFields: updatedRegexFields, + pendingUpdate: true, + allFields: getAllFields().filter((field) => { + // 找到被删除的字段名 + const deletedField = regexFields.find((f) => f.field === id); + return deletedField ? field !== deletedField.field.trim() : true; + }), + }, + true + ); // 使用立即通知,确保字段立即删除 }; // 修改updateRegexField函数,使用防抖通知 - const updateRegexField = (id: string, key: 'field' | 'pattern', value: string) => { + const updateRegexField = ( + id: string, + key: "field" | "pattern", + value: string + ) => { // 标记此字段为正在编辑状态 - setFieldEditStatus(prev => ({ ...prev, [id]: true })); + setFieldEditStatus((prev) => ({ ...prev, [id]: true })); // 记录当前活动字段ID activeFieldRef.current = id; - - setRegexFields((prev) => - prev.map((field) => (field.field === id ? { ...field, [key]: value } : field)) - ); - + + setFormData((prev) => ({ + ...prev, + regexFields: regexFields.map((field) => + field.field === id ? { ...field, [key]: value } : field + ), + })); + // 标记有未保存的更改 setHasPendingChanges(true); - + // 如果状态消息是关于这个字段的,且该字段有内容了,则清除状态消息 - if (statusMessage?.id === id && value.trim() !== '') { + if (statusMessage?.id === id && value.trim() !== "") { setTimeout(() => { setStatusMessage(null); }, 1500); } - + // 如果用户正在输入,重置编辑状态计时器 if (fieldEditResetTimeoutRef.current) { clearTimeout(fieldEditResetTimeoutRef.current); } - + // 设置一个更长的超时时间,给用户充分的编辑时间 fieldEditResetTimeoutRef.current = setTimeout(() => { // 检查字段是否已填写完成 - const currentField = regexFields.find(f => f.field === id); + const currentField = regexFields.find((f) => f.field === id); if (currentField) { // 只有当字段名有值时,才考虑将字段标记为完成状态 - if (currentField.field && currentField.field.trim() !== '') { + if (currentField.field && currentField.field.trim() !== "") { // 即使正则为空,也不要自动删除字段,只是更新编辑状态 - setFieldEditStatus(prev => ({ ...prev, [id]: false })); + setFieldEditStatus((prev) => ({ ...prev, [id]: false })); if (activeFieldRef.current === id) { activeFieldRef.current = null; } - + // 如果正则为空,提示用户填写,但不删除字段 - if (!currentField.pattern || currentField.pattern.trim() === '') { + if (!currentField.pattern || currentField.pattern.trim() === "") { setStatusMessage({ id, - message: "正则表达式为空,此字段会保留但不会执行抽取。" + message: "正则表达式为空,此字段会保留但不会执行抽取。", }); - + // 5秒后自动隐藏提示 setTimeout(() => { - setStatusMessage(current => current?.id === id ? null : current); + setStatusMessage((current) => + current?.id === id ? null : current + ); }, 5000); } } } }, 3600000); // 设置为1小时,确保用户有足够时间完成编辑 - + // 每次字段更新都触发onChange,确保父组件知道字段状态变化,使用防抖 notifyParent({ - regexFields: regexFields.map(field => + regexFields: regexFields.map((field) => field.field === id ? { ...field, [key]: value } : field ), - pendingUpdate: true + pendingUpdate: true, }); }; // 修改handleRegexFieldBlur函数,使用防抖通知 - const handleRegexFieldBlur = (id: string, key: 'field' | 'pattern') => { + const handleRegexFieldBlur = (id: string, key: "field" | "pattern") => { // 如果用户从正则表达式字段离开并且字段名和正则都已填写,则标记字段编辑完成 const field = regexFields.find((f) => f.field === id); if (!field) return; - - if (key === 'field') { + + if (key === "field") { // 如果字段名为空,不进行任何操作,保留字段 - if (!field.field || field.field.trim() === '') { + if (!field.field || field.field.trim() === "") { return; } - + // 检查重复字段 if (isFieldNameExists(field.field, id)) { alert(`字段名 "${field.field.trim()}" 已存在,请确保字段名称唯一`); - setRegexFields((prev) => - prev.map((f) => (f.field === id ? { ...f, field: '' } : f)) - ); - - // 通知父组件字段已更新,此处立即通知,不使用防抖 - notifyParent({ - regexFields: regexFields.map(f => - f.field === id ? { ...f, field: '' } : f + setFormData((prev) => ({ + ...prev, + regexFields: prev.regexFields.map((f) => + f.field === id ? { ...f, field: "" } : f ), - pendingUpdate: true - }, true); - } else if (field.field.trim() !== '') { + })); + + // 通知父组件字段已更新,此处立即通知,不使用防抖 + notifyParent( + { + regexFields: regexFields.map((f) => + f.field === id ? { ...f, field: "" } : f + ), + pendingUpdate: true, + }, + true + ); + } else if (field.field.trim() !== "") { // 如果字段名不为空且不重复,通知父组件字段已更新,使用防抖 notifyParent({ - regexFields, - pendingUpdate: true + regexFields: regexFields, + pendingUpdate: true, }); } - } else if (key === 'pattern') { + } else if (key === "pattern") { // 如果字段名和正则都已填写,标记为完成状态 - if (field.field && field.field.trim() !== '') { + if (field.field && field.field.trim() !== "") { // 即使正则为空,也不要自动删除字段 setTimeout(() => { // 只有当正则不为空时,才显示完成提示 - if (field.pattern && field.pattern.trim() !== '') { - setFieldEditStatus(prev => ({ ...prev, [id]: false })); + if (field.pattern && field.pattern.trim() !== "") { + setFieldEditStatus((prev) => ({ ...prev, [id]: false })); if (activeFieldRef.current === id) { activeFieldRef.current = null; } - + // 显示完成提示 setStatusMessage({ id, - message: "字段配置完成" + message: "字段配置完成", }); - + // 2秒后自动隐藏提示 setTimeout(() => { - setStatusMessage(current => current?.id === id ? null : current); + setStatusMessage((current) => + current?.id === id ? null : current + ); }, 2000); } else { // 正则为空时,显示提示但不删除字段 setStatusMessage({ id, - message: "未设置正则表达式,此字段会保留但不会执行抽取。" + message: "未设置正则表达式,此字段会保留但不会执行抽取。", }); - + // 5秒后自动隐藏提示 setTimeout(() => { - setStatusMessage(current => current?.id === id ? null : current); + setStatusMessage((current) => + current?.id === id ? null : current + ); }, 5000); } - + // 不管正则是否为空,都通知父组件字段已更新,使用防抖 notifyParent({ - regexFields, - pendingUpdate: true + regexFields: regexFields, + pendingUpdate: true, }); }, 200); } @@ -669,95 +834,119 @@ export function ExtractionSettings({ const applyRegexTemplate = (regex: string) => { const lastField = regexFields[regexFields.length - 1]; if (lastField) { - updateRegexField(lastField.field, 'pattern', regex); + updateRegexField(lastField.field, "pattern", regex); } }; const getFieldInfo = (field: string) => { - const [fieldName, fieldType = 'default'] = field.split('_'); + const [fieldName, fieldType = "default"] = field.split("_"); const typeName = { - default: '默认', - seal: '印章', - 'cross-seal': '骑缝章', - handwriting: '手写体', - print: '印刷体', - english: '英文', - number: '数字', - currency: '货币', - }[fieldType] || '默认'; + default: "默认", + seal: "印章", + "cross-seal": "骑缝章", + handwriting: "手写体", + print: "印刷体", + english: "英文", + number: "数字", + currency: "货币", + }[fieldType] || "默认"; const badgeClass = { - default: 'bg-blue-100 text-blue-800', - seal: 'bg-red-100 text-red-800', - 'cross-seal': 'bg-red-100 text-red-800', - handwriting: 'bg-yellow-100 text-yellow-800', - print: 'bg-purple-100 text-purple-800', - english: 'bg-indigo-100 text-indigo-800', - number: 'bg-gray-100 text-gray-800', - currency: 'bg-green-100 text-green-800', - }[fieldType] || 'bg-blue-100 text-blue-800'; + default: "bg-blue-100 text-blue-800", + seal: "bg-red-100 text-red-800", + "cross-seal": "bg-red-100 text-red-800", + handwriting: "bg-yellow-100 text-yellow-800", + print: "bg-purple-100 text-purple-800", + english: "bg-indigo-100 text-indigo-800", + number: "bg-gray-100 text-gray-800", + currency: "bg-green-100 text-green-800", + }[fieldType] || "bg-blue-100 text-blue-800"; return { fieldName, fieldType, typeName, badgeClass }; }; - const handlePromptTypeChange = (e: FormEvent, type: 'llm' | 'vlm') => { + const handleTemplateChange = ( + e: FormEvent, + type: "llm" | "vlm" + ) => { const value = e.currentTarget.value; - setPromptType((prev) => ({ ...prev, [type]: value })); - // 标记有未保存的更改,但不触发onChange - setHasPendingChanges(true); - }; - - const handleTemplateChange = (e: FormEvent, type: 'llm' | 'vlm') => { - const value = e.currentTarget.value; - setSelectedTemplate((prev) => ({ ...prev, [type]: value })); + setFormData((prev) => ({ + ...prev, + selectedTemplate: { ...prev.selectedTemplate, [type]: value }, + })); if (value) { const templateData = getPromptTemplateById(Number(value)); if (templateData) { let content = templateData.template_content; - if (content.includes('{fieldsList}') && fields[type].length > 0) { + if (content.includes("{fieldsList}") && fields[type].length > 0) { const fieldListStr = - type === 'llm' - ? fields[type].map((field, idx) => `${idx + 1}. ${field}`).join('\n') + type === "llm" + ? fields[type] + .map((field, idx) => `${idx + 1}. ${field}`) + .join("\n") : fields[type] .map((field, idx) => { - const { fieldName, typeName } = getFieldInfo(field); + const { fieldName, typeName } = getFieldInfo( + typeof field === "string" + ? field + : `${field.name}_${field.type}` + ); return `${idx + 1}. ${fieldName} (${typeName})`; }) - .join('\n'); - content = content.replace('{fieldsList}', fieldListStr); + .join("\n"); + content = content.replace("{fieldsList}", fieldListStr); } - setPromptContent((prev) => ({ ...prev, [type]: content })); + setFormData((prev) => ({ + ...prev, + promptContent: { ...prev.promptContent, [type]: content }, + })); // 标记有未保存的更改,但不触发onChange setHasPendingChanges(true); } } else { - setPromptContent((prev) => ({ ...prev, [type]: '' })); + setFormData((prev) => ({ + ...prev, + promptContent: { ...prev.promptContent, [type]: "" }, + })); // 标记有未保存的更改,但不触发onChange setHasPendingChanges(true); } }; - const handlePromptContentChange = (e: FormEvent, type: 'llm' | 'vlm') => { + const handlePromptContentChange = ( + e: FormEvent, + type: "llm" | "vlm" + ) => { const value = e.currentTarget.value; - setPromptContent((prev) => ({ ...prev, [type]: value })); + setFormData((prev) => ({ + ...prev, + promptContent: { ...prev.promptContent, [type]: value }, + })); // 标记有未保存的更改,但不触发onChange setHasPendingChanges(true); }; - const applyVariableToPrompt = (variable: string, type: 'llm' | 'vlm') => { + const applyVariableToPrompt = (variable: string, type: "llm" | "vlm") => { const textarea = document.getElementById( - type === 'llm' ? 'llm-prompt-content' : 'multimodal-prompt-content' + type === "llm" ? "llm-prompt-content" : "multimodal-prompt-content" ) as HTMLTextAreaElement; if (textarea) { const start = textarea.selectionStart; const end = textarea.selectionEnd; const text = textarea.value; - const newText = text.substring(0, start) + `{${variable}}` + text.substring(end); - setPromptContent((prev) => ({ ...prev, [type]: newText })); + const newText = + text.substring(0, start) + `{${variable}}` + text.substring(end); + setFormData((prev) => ({ + ...prev, + promptContent: { ...prev.promptContent, [type]: newText }, + })); setTimeout(() => { textarea.focus(); - textarea.setSelectionRange(start + variable.length + 2, start + variable.length + 2); + textarea.setSelectionRange( + start + variable.length + 2, + start + variable.length + 2 + ); }, 0); // 标记有未保存的更改,但不触发onChange setHasPendingChanges(true); @@ -768,44 +957,44 @@ export function ExtractionSettings({ const templates: Record = { 1: { id: 1, - template_name: '行政处罚-抽取通用模板', - template_type: 'Extraction', + template_name: "行政处罚-抽取通用模板", + template_type: "Extraction", template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息:\n{fieldsList}\n请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。`, }, 4: { id: 4, - template_name: '采购合同-乙方资质抽取', - template_type: 'Extraction', + template_name: "采购合同-乙方资质抽取", + template_type: "Extraction", template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息:\n需要抽取的信息包括:\n{fieldsList}\n{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。`, }, 5: { id: 5, - template_name: '合同-关键条款抽取', - template_type: 'Extraction', + template_name: "合同-关键条款抽取", + template_type: "Extraction", template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息:\n{fieldsList}\n文档ID: {documentId}\n审核日期: {date}\n请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。`, }, 6: { id: 6, - template_name: '烟草许可证-信息抽取', - template_type: 'Extraction', + template_name: "烟草许可证-信息抽取", + template_type: "Extraction", template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息:\n{fieldsList}\n这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。`, }, 7: { id: 7, - template_name: '多模态-印章识别模板', - template_type: 'Multimodal', + template_name: "多模态-印章识别模板", + template_type: "Multimodal", template_content: `请识别并提取文档中的所有印章信息,包括:\n{fieldsList}\n文档类型: {docType}\n页面范围: {pageRange}\n请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。`, }, 8: { id: 8, - template_name: '多模态-表格抽取模板', - template_type: 'Multimodal', + template_name: "多模态-表格抽取模板", + template_type: "Multimodal", template_content: `请从文档中的表格提取以下信息:\n{fieldsList}\n文档类型: {docType}\n表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。`, }, 9: { id: 9, - template_name: '多模态-手写内容识别模板', - template_type: 'Multimodal', + template_name: "多模态-手写内容识别模板", + template_type: "Multimodal", template_content: `请识别文档中的手写内容,特别关注:\n{fieldsList}\n文档类型: {docType}\n内容类型: {contentType}\n对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。`, }, }; @@ -813,8 +1002,11 @@ export function ExtractionSettings({ }; // 修复缺失的handleKeyDown函数 - const handleKeyDown = (e: KeyboardEvent, type: 'llm' | 'vlm') => { - if (e.key === 'Enter') { + const handleKeyDown = ( + e: KeyboardEvent, + type: "llm" | "vlm" + ) => { + if (e.key === "Enter") { e.preventDefault(); addField(type); } @@ -826,49 +1018,56 @@ export function ExtractionSettings({ if (validateAndUpdateFields()) { // 当更新成功时,才传递字段数据到父组件 // 保留所有有字段名的正则字段,包括那些正则表达式为空的字段 - const validRegexFields = regexFields.filter(field => field.field && field.field.trim() !== ''); - + const validRegexFields = regexFields.filter( + (field) => field.field && field.field.trim() !== "" + ); + if (onChange) { // 更新按钮点击时使用立即通知,不使用防抖 - notifyParent({ - fields: { - llm: fields.llm, - vlm: fields.vlm - }, - regexFields: validRegexFields, - allFields: getAllFields(), - pendingUpdate: false, // 标记已完成更新 - - // 同时提交提示词设置 - promptType, - promptContent, - promptSettings: { - llm: { - type: promptType.llm, - content: promptContent.llm, - template: selectedTemplate.llm + notifyParent( + { + fields: { + llm: fields.llm, + vlm: fields.vlm, }, - vlm: { - type: promptType.vlm, - content: promptContent.vlm, - template: selectedTemplate.vlm - } - } - }, true); - + regexFields: validRegexFields, + allFields: getAllFields(), + pendingUpdate: false, // 标记已完成更新 + + // 同时提交提示词设置 + promptType, + promptContent, + promptSettings: { + llm: { + type: promptType.llm, + content: promptContent.llm, + template: selectedTemplate.llm, + }, + vlm: { + type: promptType.vlm, + content: promptContent.vlm, + template: selectedTemplate.vlm, + }, + }, + }, + true + ); + // 更新完成后,取消所有编辑状态 setFieldEditStatus({}); // 清除活动字段引用 activeFieldRef.current = null; // 重置待更新状态 setHasPendingChanges(false); - + // 显示成功提示,包含字段数量统计 setUpdateStatus({ success: true, - message: `更新成功!共更新字段 ${getAllFields().length} 个 (大模型: ${fields.llm.length}, 多模态: ${fields.vlm.length}, 正则: ${validRegexFields.length})` + message: `更新成功!共更新字段 ${getAllFields().length} 个 (大模型: ${ + fields.llm.length + }, 多模态: ${fields.vlm.length}, 正则: ${validRegexFields.length})`, }); - + // 5秒后自动隐藏成功提示 setTimeout(() => { setUpdateStatus(null); @@ -879,12 +1078,15 @@ export function ExtractionSettings({ const handleTabChange = (tab: string) => { setCurrentTab(tab); - + // 不触发父组件的onChange回调,只记录当前标签页,使界面切换 // onChange?.({ extractionMethod: tab }); }; - const handleFieldInputChange = (e: FormEvent, type: 'llm' | 'vlm') => { + const handleFieldInputChange = ( + e: FormEvent, + type: "llm" | "vlm" + ) => { setInputValue({ ...inputValue, [type]: e.currentTarget.value }); }; @@ -893,7 +1095,7 @@ export function ExtractionSettings({ }; // 在渲染选择模态字段类型的下拉列表时使用vlmFieldTypeOptions - const renderVlmFieldTypeSelect = (field: string, index: number) => { + const renderVlmFieldTypeSelect = () => { return ( ); }; - + // 在渲染提示词类型的选择器时使用promptTypeOptions - const renderPromptTypeSelect = (type: string, promptType: 'llm' | 'vlm') => { + const renderPromptTypeSelect = ( + type: string, + promptTypeKey: "llm" | "vlm" + ) => { return ( handleTemplateChange(e, 'llm')} + onChange={(e) => handleTemplateChange(e, "llm")} > @@ -1068,7 +1298,10 @@ export function ExtractionSettings({
-