diff --git a/app/components/rules/new/ExtractionSettings.tsx b/app/components/rules/new/ExtractionSettings.tsx index ca90f65..535a5cb 100644 --- a/app/components/rules/new/ExtractionSettings.tsx +++ b/app/components/rules/new/ExtractionSettings.tsx @@ -1,4 +1,4 @@ -import { useState, KeyboardEvent, FormEvent, useContext, useEffect } from 'react'; +import { useState, KeyboardEvent, FormEvent, useContext, useEffect, useCallback } from 'react'; import { RuleContext } from './ReviewSettings'; interface RegexField { @@ -22,239 +22,146 @@ interface ExtractionSettingsProps { prompt_setting?: { type?: string; template?: string; - } + }; }; llm_vl?: { fields?: string[]; prompt_setting?: { type?: string; template?: string; - } + }; }; ocr_regex?: { fields?: RegexField[]; - } + }; }; } export function ExtractionSettings({ onChange, initialData }: ExtractionSettingsProps) { - // 使用RuleContext获取全局状态 const ruleContext = useContext(RuleContext); - + const [currentTab, setCurrentTab] = useState('llm_ocr'); - const [fields, setFields] = useState<{[key: string]: string[]}>({ + const [fields, setFields] = useState<{ [key: string]: string[] }>({ llm_ocr: [], - llm: [] + llm: [], }); const [inputValue, setInputValue] = useState({ llm_ocr: '', - llm: '' + llm: '', }); const [selectedFieldType, setSelectedFieldType] = useState('default'); - const [regexFields, setRegexFields] = useState([ - { id: '1', fieldName: '', regex: '' } - ]); - - // 提示词相关状态 - const [promptType, setPromptType] = useState({ - llm_ocr: 'system', - llm: 'system' - }); - const [promptContent, setPromptContent] = useState({ - llm_ocr: '', - llm: '' - }); - const [selectedTemplate, setSelectedTemplate] = useState({ - llm_ocr: '', - llm: '' - }); + const [regexFields, setRegexFields] = useState([{ id: '1', fieldName: '', regex: '' }]); + const [promptType, setPromptType] = useState({ llm_ocr: 'system', llm: 'system' }); + const [promptContent, setPromptContent] = useState({ llm_ocr: '', llm: '' }); + const [selectedTemplate, setSelectedTemplate] = useState({ llm_ocr: '', llm: '' }); // 加载初始数据 useEffect(() => { if (initialData) { - // 设置字段数据 const newFields = { llm_ocr: initialData.llm_ocr?.fields || [], - llm: initialData.llm_vl?.fields || [] + llm: initialData.llm_vl?.fields || [], }; setFields(newFields); - - // 设置提示词类型 setPromptType({ llm_ocr: initialData.llm_ocr?.prompt_setting?.type || 'system', - llm: initialData.llm_vl?.prompt_setting?.type || 'system' + llm: initialData.llm_vl?.prompt_setting?.type || 'system', }); - - // 设置提示词内容 setPromptContent({ llm_ocr: initialData.llm_ocr?.prompt_setting?.template || '', - llm: initialData.llm_vl?.prompt_setting?.template || '' + llm: initialData.llm_vl?.prompt_setting?.template || '', }); - - // 设置正则字段 - if (initialData.ocr_regex && initialData.ocr_regex.fields && initialData.ocr_regex.fields.length > 0) { - const regexFieldsWithIds = initialData.ocr_regex.fields.map((field: RegexField, index: number) => ({ - id: (index + 1).toString(), - fieldName: field.fieldName || '', - regex: field.regex || '' - })); - setRegexFields(regexFieldsWithIds); - } - - // 更新全局字段列表 - const allFields = [ - ...newFields.llm_ocr, - ...newFields.llm, - ...(initialData.ocr_regex?.fields || []).map((f: RegexField) => f.fieldName) - ].filter(Boolean); - - if (ruleContext && ruleContext.updateFields) { - ruleContext.updateFields(allFields); + if (initialData.ocr_regex?.fields?.length) { + setRegexFields( + initialData.ocr_regex.fields.map((field: RegexField, index: number) => ({ + id: (index + 1).toString(), + fieldName: field.fieldName || '', + regex: field.regex || '', + })) + ); } } - }, [initialData, ruleContext?.updateFields]); + }, [initialData]); // 只依赖 initialData,避免 ruleContext 导致频繁触发 - // 在组件首次加载时更新字段 + // 从 Context 初始化字段(仅在无 initialData 时) useEffect(() => { - if (!initialData) { - updateAllFields(); - } - }, []); - - // 当组件首次加载时,如果Context中已有字段数据,则使用Context数据初始化 - useEffect(() => { - if (!initialData && ruleContext && ruleContext.extractionFields.length > 0) { - // 将Context中的字段数据添加到当前激活的抽取方式中 - setFields(prevFields => ({ + if (!initialData && ruleContext?.extractionFields?.length > 0) { + setFields((prevFields) => ({ ...prevFields, - [currentTab]: [...ruleContext.extractionFields] + [currentTab]: [...ruleContext.extractionFields], })); } - }, []); + }, [ruleContext?.extractionFields, currentTab, initialData]); // 依赖具体属性而非整个 ruleContext - // 获取所有可用字段(合并大模型、多模态和正则抽取的字段) - const getAllFields = (): string[] => { - // 从大模型OCR抽取中获取字段 + // 获取所有字段(使用 useCallback 稳定函数引用) + const getAllFields = useCallback(() => { const llm_ocr_fields = fields.llm_ocr || []; - - // 从多模态字段中提取基本字段名(去除类型后缀) - const llm_fields = (fields.llm || []).map(field => { - const [fieldName] = field.split('_'); - return fieldName; - }); - - // 获取正则字段名 - const regex_fields = regexFields - .map(field => field.fieldName) - .filter(name => name.trim() !== ''); - - // 合并并去重 - const allFields = [...new Set([...llm_ocr_fields, ...llm_fields, ...regex_fields])]; - console.log("所有可用字段:", allFields); - return allFields; - }; + const llm_fields = (fields.llm || []).map((field) => field.split('_')[0]); + const regex_fields = regexFields.map((field) => field.fieldName).filter((name) => name.trim() !== ''); + return [...new Set([...llm_ocr_fields, ...llm_fields, ...regex_fields])]; + }, [fields, regexFields]); - // 检查字段名是否存在(精确匹配) - const isFieldNameExists = (fieldName: string, excludeId?: string): boolean => { - // 获取所有字段名称(不转换为小写) - const existingFields = getAllFields(); - - // 检查精确匹配(区分大小写) - for (const existingField of existingFields) { - // 严格相等比较,确保完全匹配而不是部分匹配 - if (existingField === fieldName) { - console.log(`字段名 '${fieldName}' 在现有字段中存在(严格匹配)`); + // 检查字段名是否存在 + const isFieldNameExists = useCallback( + (fieldName: string, excludeId?: string): boolean => { + const allFields = getAllFields(); + if (allFields.includes(fieldName)) return true; + + const otherRegexFields = regexFields + .filter((f) => !excludeId || f.id !== excludeId) + .map((f) => f.fieldName); + if (otherRegexFields.includes(fieldName)) return true; + + const fieldNameLower = fieldName.toLowerCase(); + if ( + allFields.some((f) => f.toLowerCase() === fieldNameLower) || + otherRegexFields.some((f) => f.toLowerCase() === fieldNameLower) + ) { return true; } - } + return false; + }, + [getAllFields, regexFields] + ); - // 检查正则字段组中的其他字段(精确匹配) - // 排除当前正在编辑的字段ID - const otherRegexFields = regexFields - .filter(f => !excludeId || f.id !== excludeId) - .map(f => f.fieldName); - - for (const regexField of otherRegexFields) { - // 严格相等比较 - if (regexField === fieldName) { - console.log(`字段名 '${fieldName}' 在正则字段中存在(严格匹配)`); - return true; - } - } + // 更新全局字段和触发 onChange(使用防抖) + useEffect(() => { + const timeout = setTimeout(() => { + const allFields = getAllFields(); + ruleContext?.updateFields?.(allFields); - // 不区分大小写的检查(保留这部分功能,但仍然是精确匹配) - const fieldNameLower = fieldName.toLowerCase(); - const existingFieldsLower = existingFields.map(f => f.toLowerCase()); - const otherRegexFieldsLower = otherRegexFields.map(f => f.toLowerCase()); - - if (existingFieldsLower.includes(fieldNameLower)) { - console.log(`字段名 '${fieldName}' 在现有字段中存在(不区分大小写)`); - return true; - } - - if (otherRegexFieldsLower.includes(fieldNameLower)) { - console.log(`字段名 '${fieldName}' 在正则字段中存在(不区分大小写)`); - return true; - } + document.dispatchEvent( + new CustomEvent('extraction-fields-updated', { + detail: { + fields: allFields, + tab: currentTab, + fieldsData: { + llm_ocr: fields.llm_ocr || [], + llm: fields.llm || [], + regex: regexFields.map((f) => f.fieldName).filter((name) => name.trim() !== ''), + }, + }, + }) + ); - return false; - }; - - // 更新所有抽取字段到Context - const updateAllFields = () => { - const allFields = getAllFields(); - - // 更新全局Context中的字段 - if (ruleContext) { - ruleContext.updateFields(allFields); - } - - // 触发自定义事件,通知字段已更新(兼容非Context的实现) - const event = new CustomEvent('extraction-fields-updated', { - detail: { - fields: allFields, - tab: currentTab, - fieldsData: { - llm_ocr: fields.llm_ocr || [], - llm: fields.llm || [], - regex: regexFields.map(f => f.fieldName).filter(name => name.trim() !== '') - } - } - }); - document.dispatchEvent(event); - - if (onChange) { - onChange({ + onChange?.({ extractionMethod: currentTab, fields, regexFields, - allFields // 添加合并后的所有字段 + allFields, }); - } - }; - - // 使用useEffect监听字段变化并更新Context - useEffect(() => { - const debouncedUpdate = setTimeout(() => { - updateAllFields(); }, 300); - - return () => clearTimeout(debouncedUpdate); - }, [fields.llm_ocr, fields.llm, regexFields]); + + return () => clearTimeout(timeout); + }, [fields, regexFields, currentTab, getAllFields, ruleContext?.updateFields, onChange]); const handleTabChange = (tab: string) => { setCurrentTab(tab); - - if (onChange) { - onChange({ extractionMethod: tab }); - } + onChange?.({ extractionMethod: tab }); }; const handleFieldInputChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { - setInputValue({ - ...inputValue, - [type]: e.currentTarget.value - }); + setInputValue({ ...inputValue, [type]: e.currentTarget.value }); }; const handleFieldTypeChange = (e: FormEvent) => { @@ -262,55 +169,28 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings }; const addField = (type: 'llm_ocr' | 'llm') => { - if (inputValue[type].trim()) { - let newFields: string[] = []; - - // OCR+LLM模式下,支持多个字段同时添加(用逗号、顿号或空格分隔) - if (type === 'llm_ocr') { - const fieldsToAdd = inputValue[type].split(/[\s、,]+/) - .map(f => f.trim()) - .filter(f => f !== ''); - - console.log(`添加OCR字段:`, fieldsToAdd); - - // 仅添加不存在的字段 - const uniqueFields = fieldsToAdd.filter(field => !isFieldNameExists(field)); - - if (uniqueFields.length === 0) { - // 如果没有唯一字段可添加,显示提示并返回 - alert("所有字段名已存在,请确保字段名称唯一"); - return; - } - - newFields = [...fields[type], ...uniqueFields]; - } else { - // 多模态抽取模式下,处理字段名称唯一性 - const fieldName = inputValue[type].trim(); - console.log(`添加多模态字段:${fieldName}`); - - // 检查字段名是否已存在 - if (isFieldNameExists(fieldName)) { - alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`); - return; - } - - newFields = [...fields[type], `${fieldName}_${selectedFieldType}`]; - } + const value = inputValue[type].trim(); + if (!value) return; - setFields(prevFields => ({ - ...prevFields, - [type]: newFields - })); - - setInputValue({ - ...inputValue, - [type]: '' - }); - - if (type === 'llm') { - setSelectedFieldType('default'); + if (type === 'llm_ocr') { + const fieldsToAdd = value + .split(/[\s、,]+/) + .map((f) => f.trim()) + .filter((f) => f && !isFieldNameExists(f)); + if (fieldsToAdd.length === 0) { + alert('所有字段名已存在,请确保字段名称唯一'); + return; } + setFields((prev) => ({ ...prev, [type]: [...prev[type], ...fieldsToAdd] })); + } else { + if (isFieldNameExists(value)) { + alert(`字段名 "${value}" 已存在,请确保字段名称唯一`); + return; + } + setFields((prev) => ({ ...prev, [type]: [...prev[type], `${value}_${selectedFieldType}`] })); + setSelectedFieldType('default'); } + setInputValue((prev) => ({ ...prev, [type]: '' })); }; const handleKeyDown = (e: KeyboardEvent, type: 'llm_ocr' | 'llm') => { @@ -321,410 +201,193 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings }; const removeField = (type: 'llm_ocr' | 'llm', index: number) => { - const newFields = [...fields[type]]; - newFields.splice(index, 1); - - // 使用新的方式更新,确保状态立即更新并触发后续操作 - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [type]: newFields - }; - - // 状态更新后立即触发字段更新事件 - Promise.resolve().then(() => updateAllFields()); - - return updatedFields; + setFields((prev) => { + const newFields = [...prev[type]]; + newFields.splice(index, 1); + return { ...prev, [type]: newFields }; }); }; - // 添加正则表达式字段行 const addRegexFieldRow = () => { - const newId = `${regexFields.length + 1}`; - setRegexFields([...regexFields, { id: newId, fieldName: '', regex: '' }]); - - // 如果是新增了regex字段,也要更新字段列表通知评查设置组件 - setTimeout(() => updateAllFields(), 0); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - regexFields: [...regexFields, { id: newId, fieldName: '', regex: '' }] - }); - } + setRegexFields((prev) => [...prev, { id: `${prev.length + 1}`, fieldName: '', regex: '' }]); }; - // 删除正则表达式字段行 const removeRegexFieldRow = (id: string) => { - // 至少保留一行 - if (regexFields.length <= 1) { - return; - } - - const newRegexFields = regexFields.filter(field => field.id !== id); - setRegexFields(newRegexFields); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - regexFields: newRegexFields - }); - } + if (regexFields.length <= 1) return; + setRegexFields((prev) => prev.filter((field) => field.id !== id)); }; - // 更新正则表达式字段 const updateRegexField = (id: string, key: 'fieldName' | 'regex', value: string) => { - // 更新字段值 - const newRegexFields = regexFields.map(field => { - if (field.id === id) { - return { ...field, [key]: value }; - } - return field; - }); - - // 仅更新状态,不触发其他事件 - setRegexFields(newRegexFields); - - // 更新onChange回调 - if (onChange) { - onChange({ - extractionMethod: currentTab, - regexFields: newRegexFields - }); - } + setRegexFields((prev) => + prev.map((field) => (field.id === id ? { ...field, [key]: value } : field)) + ); }; - // 处理正则字段失去焦点事件,检查唯一性并更新字段列表 const handleRegexFieldBlur = (id: string, key: 'fieldName' | 'regex') => { - // 只有在修改字段名时需要检查唯一性并更新字段列表 - if (key === 'fieldName') { - const currentField = regexFields.find(field => field.id === id); - if (currentField && currentField.fieldName.trim() !== '') { - const fieldName = currentField.fieldName.trim(); - console.log(`检查正则字段 '${fieldName}' 的唯一性,ID: ${id}`); - - // 检查当前正则字段组中是否有重名(排除自身) - const duplicateInRegex = regexFields - .filter(f => f.id !== id) - .find(f => f.fieldName === fieldName); - - if (duplicateInRegex) { - console.log(`字段名 '${fieldName}' 在正则字段中存在重复,ID: ${duplicateInRegex.id}`); - alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`); - - // 重置为空字段名 - const resetFields = regexFields.map(field => { - if (field.id === id) { - return { ...field, fieldName: '' }; - } - return field; - }); - - setRegexFields(resetFields); - return; - } - - // 检查其他抽取方法中的字段(不区分大小写) - const otherExtractFields = [ - ...fields.llm_ocr.map(f => f.toLowerCase()), - ...fields.llm.map(f => { - const [name] = f.split('_'); - return name.toLowerCase(); - }) - ]; - - const fieldNameLower = fieldName.toLowerCase(); - const duplicateInOtherMethods = otherExtractFields.includes(fieldNameLower); - - if (duplicateInOtherMethods) { - console.log(`字段名 '${fieldName}' 在其他抽取方法中存在(不区分大小写)`); - alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`); - - // 重置为空字段名 - const resetFields = regexFields.map(field => { - if (field.id === id) { - return { ...field, fieldName: '' }; - } - return field; - }); - - setRegexFields(resetFields); - return; - } - - // 字段名有效,更新字段列表 - console.log(`字段名 '${fieldName}' 检查通过,更新字段列表`); - updateAllFields(); - } - } else { - // 对于regex字段,只需更新字段列表 - updateAllFields(); + if (key !== 'fieldName') return; + const field = regexFields.find((f) => f.id === id); + if (!field?.fieldName.trim()) return; + + const fieldName = field.fieldName.trim(); + if (isFieldNameExists(fieldName, id)) { + alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`); + setRegexFields((prev) => + prev.map((f) => (f.id === id ? { ...f, fieldName: '' } : f)) + ); } }; - // 应用正则模板 const applyRegexTemplate = (regex: string) => { - // 找到当前正在编辑的行,或者最后一行 const lastField = regexFields[regexFields.length - 1]; updateRegexField(lastField.id, 'regex', regex); }; - - // 从字段字符串中提取字段名和类型(用于多模态抽取) + const getFieldInfo = (field: string) => { const [fieldName, fieldType = 'default'] = field.split('_'); - const typeName = { - 'default': '默认', - 'seal': '印章', - 'cross-seal': '骑缝章', - 'handwriting': '手写体', - 'print': '印刷体', - 'english': '英文', - 'number': '数字', - 'currency': '货币' - }[fieldType] || '默认'; - - const badgeClass = { - 'default': 'bg-blue-100 text-blue-800', - 'seal': 'bg-red-100 text-red-800', - 'cross-seal': 'bg-red-100 text-red-800', - 'handwriting': 'bg-yellow-100 text-yellow-800', - 'print': 'bg-purple-100 text-purple-800', - 'english': 'bg-indigo-100 text-indigo-800', - 'number': 'bg-gray-100 text-gray-800', - 'currency': 'bg-green-100 text-green-800' - }[fieldType] || 'bg-blue-100 text-blue-800'; - + const typeName = + { + default: '默认', + seal: '印章', + 'cross-seal': '骑缝章', + handwriting: '手写体', + print: '印刷体', + english: '英文', + number: '数字', + currency: '货币', + }[fieldType] || '默认'; + const badgeClass = + { + default: 'bg-blue-100 text-blue-800', + seal: 'bg-red-100 text-red-800', + 'cross-seal': 'bg-red-100 text-red-800', + handwriting: 'bg-yellow-100 text-yellow-800', + print: 'bg-purple-100 text-purple-800', + english: 'bg-indigo-100 text-indigo-800', + number: 'bg-gray-100 text-gray-800', + currency: 'bg-green-100 text-green-800', + }[fieldType] || 'bg-blue-100 text-blue-800'; return { fieldName, fieldType, typeName, badgeClass }; }; - // 处理提示词类型切换 const handlePromptTypeChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; - setPromptType({ - ...promptType, - [type]: value + setPromptType((prev) => ({ ...prev, [type]: value })); + onChange?.({ + extractionMethod: currentTab, + promptSettings: { type: value, template: selectedTemplate[type], content: promptContent[type] }, }); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - promptSettings: { - type: value, - template: selectedTemplate[type], - content: promptContent[type] - } - }); - } }; - // 处理提示词模板选择 const handleTemplateChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; - setSelectedTemplate({ - ...selectedTemplate, - [type]: value - }); - + setSelectedTemplate((prev) => ({ ...prev, [type]: value })); + if (value) { const templateData = getPromptTemplateById(Number(value)); if (templateData) { - // 基础模板内容 let content = templateData.template_content; - - // 替换字段列表变量 if (content.includes('{fieldsList}') && fields[type].length > 0) { - let fieldListStr = ''; - - if (type === 'llm_ocr') { - // 普通字段列表 - fieldListStr = fields[type].map((field, idx) => `${idx+1}. ${field}`).join('\n'); - } else if (type === 'llm') { - // 带类型的字段列表 - fieldListStr = fields[type].map((field, idx) => { - const { fieldName, typeName } = getFieldInfo(field); - return `${idx+1}. ${fieldName} (${typeName})`; - }).join('\n'); - } - + const fieldListStr = + type === 'llm_ocr' + ? fields[type].map((field, idx) => `${idx + 1}. ${field}`).join('\n') + : fields[type] + .map((field, idx) => { + const { fieldName, typeName } = getFieldInfo(field); + return `${idx + 1}. ${fieldName} (${typeName})`; + }) + .join('\n'); content = content.replace('{fieldsList}', fieldListStr); } - - setPromptContent({ - ...promptContent, - [type]: content + setPromptContent((prev) => ({ ...prev, [type]: content })); + onChange?.({ + extractionMethod: currentTab, + promptSettings: { type: promptType[type], template: value, content }, }); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - promptSettings: { - type: promptType[type], - template: value, - content: content - } - }); - } } } else { - // 清空内容 - setPromptContent({ - ...promptContent, - [type]: '' + setPromptContent((prev) => ({ ...prev, [type]: '' })); + onChange?.({ + extractionMethod: currentTab, + promptSettings: { type: promptType[type], template: '', content: '' }, }); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - promptSettings: { - type: promptType[type], - template: '', - content: '' - } - }); - } } }; - // 处理提示词内容变更 const handlePromptContentChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; - setPromptContent({ - ...promptContent, - [type]: value + setPromptContent((prev) => ({ ...prev, [type]: value })); + onChange?.({ + extractionMethod: currentTab, + promptSettings: { type: promptType[type], template: selectedTemplate[type], content: value }, }); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - promptSettings: { - type: promptType[type], - template: selectedTemplate[type], - content: value - } - }); - } }; - // 应用变量标签到提示词 const applyVariableToPrompt = (variable: string, type: 'llm_ocr' | 'llm') => { - const textarea = document.getElementById(type === 'llm_ocr' ? 'llm-prompt-content' : 'multimodal-prompt-content') as HTMLTextAreaElement; + const textarea = document.getElementById( + type === 'llm_ocr' ? 'llm-prompt-content' : 'multimodal-prompt-content' + ) as HTMLTextAreaElement; if (textarea) { const start = textarea.selectionStart; const end = textarea.selectionEnd; const text = textarea.value; const newText = text.substring(0, start) + `{${variable}}` + text.substring(end); - - setPromptContent({ - ...promptContent, - [type]: newText - }); - - // 使焦点回到文本框并设置光标位置 + setPromptContent((prev) => ({ ...prev, [type]: newText })); setTimeout(() => { textarea.focus(); textarea.setSelectionRange(start + variable.length + 2, start + variable.length + 2); }, 0); - - if (onChange) { - onChange({ - extractionMethod: currentTab, - promptSettings: { - type: promptType[type], - template: selectedTemplate[type], - content: newText - } - }); - } + onChange?.({ + extractionMethod: currentTab, + promptSettings: { type: promptType[type], template: selectedTemplate[type], content: newText }, + }); } }; - // 模拟获取提示词模板 const getPromptTemplateById = (id: number): PromptTemplate | null => { - // 模拟的模板数据,实际应用中应从服务器获取 const templates: Record = { 1: { id: 1, template_name: '行政处罚-抽取通用模板', template_type: 'Extraction', - template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息: - -{fieldsList} - -请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。` + template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息:\n{fieldsList}\n请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。`, }, 4: { id: 4, template_name: '采购合同-乙方资质抽取', template_type: 'Extraction', - template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息: - -需要抽取的信息包括: -{fieldsList} - -{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。` + template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息:\n需要抽取的信息包括:\n{fieldsList}\n{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。`, }, 5: { id: 5, template_name: '合同-关键条款抽取', template_type: 'Extraction', - template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息: - -{fieldsList} - -文档ID: {documentId} -审核日期: {date} - -请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。` + template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息:\n{fieldsList}\n文档ID: {documentId}\n审核日期: {date}\n请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。`, }, 6: { id: 6, template_name: '烟草许可证-信息抽取', template_type: 'Extraction', - template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息: - -{fieldsList} - -这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。` + template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息:\n{fieldsList}\n这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。`, }, 7: { id: 7, template_name: '多模态-印章识别模板', template_type: 'Multimodal', - template_content: `请识别并提取文档中的所有印章信息,包括: - -{fieldsList} - -文档类型: {docType} -页面范围: {pageRange} - -请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。` + template_content: `请识别并提取文档中的所有印章信息,包括:\n{fieldsList}\n文档类型: {docType}\n页面范围: {pageRange}\n请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。`, }, 8: { id: 8, template_name: '多模态-表格抽取模板', template_type: 'Multimodal', - template_content: `请从文档中的表格提取以下信息: - -{fieldsList} - -文档类型: {docType} -表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。` + template_content: `请从文档中的表格提取以下信息:\n{fieldsList}\n文档类型: {docType}\n表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。`, }, 9: { id: 9, template_name: '多模态-手写内容识别模板', template_type: 'Multimodal', - template_content: `请识别文档中的手写内容,特别关注: - -{fieldsList} - -文档类型: {docType} -内容类型: {contentType} - -对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。` - } + template_content: `请识别文档中的手写内容,特别关注:\n{fieldsList}\n文档类型: {docType}\n内容类型: {contentType}\n对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。`, + }, }; - return templates[id] || null; }; @@ -735,24 +398,23 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- {/* 切换按钮 */}
- - -
- - {/* 大模型抽取配置 */} +
- +
- handleFieldInputChange(e, 'llm_ocr')} - onKeyDown={(e) => handleKeyDown(e, 'llm_ocr')} + onKeyDown={(e) => handleKeyDown(e, 'llm_ocr')} /> - + > + 添加 +
{fields.llm_ocr.map((field, index) => (
- {field} - removeField('llm_ocr', index)} onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - removeField('llm_ocr', index); - } + if (e.key === 'Enter' || e.key === ' ') removeField('llm_ocr', index); }} role="button" tabIndex={0} aria-label={`删除字段 ${field}`} - >× + > + × +
))}
@@ -808,24 +473,26 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- +
- -
系统将根据评查点类型和抽取目标自动生成适合的提示词,您无需额外配置。
- -
- - handleTemplateChange(e, 'llm_ocr')} @@ -863,54 +532,41 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- -
-

支持的变量(点击变量将其添加到提示词中):

+

+ 支持的变量(点击变量将其添加到提示词中): +

- - - - - - - + {[ + 'docType', + 'fieldsList', + 'companyName', + 'documentId', + 'date', + 'industry', + 'ocrText', + ].map((variable) => ( + + ))}
@@ -918,24 +574,25 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- - {/* 多模态抽取配置 */} +
- +
- handleFieldInputChange(e, 'llm')} onKeyDown={(e) => handleKeyDown(e, 'llm')} /> - - + > + 添加 +
{fields.llm.map((field, index) => { const { fieldName, fieldType, typeName, badgeClass } = getFieldInfo(field); return (
- {fieldName} - {typeName} - + {typeName} + + removeField('llm', index)} onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - removeField('llm', index); - } + if (e.key === 'Enter' || e.key === ' ') removeField('llm', index); }} role="button" tabIndex={0} aria-label={`删除字段 ${fieldName}`} - >× + > + × +
); })} @@ -982,16 +643,18 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
请为每个字段选择适当的抽取类型,有助于提高识别准确率
- +
- +
-
系统将根据评查点类型和抽取目标自动生成适合的提示词,支持图表、印章等图像内容抽取。
- -
- - handleTemplateChange(e, 'llm')} @@ -1038,69 +703,44 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- -
-

支持的变量(点击变量将其添加到提示词中):

+

+ 支持的变量(点击变量将其添加到提示词中): +

- - - - - - - - - - + {[ + 'docType', + 'fieldsList', + 'companyName', + 'documentId', + 'date', + 'industry', + 'contentType', + 'pageRange', + 'colorMode', + 'ocrText', + ].map((variable) => ( + + ))}
@@ -1108,56 +748,69 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
- - {/* 正则抽取配置 */} +
- -
- +
- {/* 字段-正则表达式配置行 */} {regexFields.map((field) => ( -
+
- - + 字段名称 + + updateRegexField(field.id, 'fieldName', e.target.value)} onBlur={() => handleRegexFieldBlur(field.id, 'fieldName')} />
- - + 正则表达式 + + updateRegexField(field.id, 'regex', e.target.value)} onBlur={() => handleRegexFieldBlur(field.id, 'regex')} />
-
- +
-
applyRegexTemplate("\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?")} - role="button" - tabIndex={0} - onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - applyRegexTemplate("\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?"); - } - }} - >日期格式:yyyy-mm-dd
-
applyRegexTemplate("[A-Z]{2,5}-\\d{4,10}")} - role="button" - tabIndex={0} - onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - applyRegexTemplate("[A-Z]{2,5}-\\d{4,10}"); - } - }} - >合同编号格式
-
applyRegexTemplate("(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?")} - role="button" - tabIndex={0} - onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - applyRegexTemplate("(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?"); - } - }} - >金额格式
-
applyRegexTemplate("\\d{3}-\\d{8}|\\d{4}-\\d{7,8}")} - role="button" - tabIndex={0} - onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - applyRegexTemplate("\\d{3}-\\d{8}|\\d{4}-\\d{7,8}"); - } - }} - >座机号码格式
-
applyRegexTemplate("1[3-9]\\d{9}")} - role="button" - tabIndex={0} - onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { - applyRegexTemplate("1[3-9]\\d{9}"); - } - }} - >手机号码格式
+ {[ + { + label: '日期格式:yyyy-mm-dd', + regex: '\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?', + }, + { label: '合同编号格式', regex: '[A-Z]{2,5}-\\d{4,10}' }, + { + label: '金额格式', + regex: '(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?', + }, + { label: '座机号码格式', regex: '\\d{3}-\\d{8}|\\d{4}-\\d{7,8}' }, + { label: '手机号码格式', regex: '1[3-9]\\d{9}' }, + ].map(({ label, regex }) => ( +
applyRegexTemplate(regex)} + role="button" + tabIndex={0} + onKeyDown={(e) => { + if (e.key === 'Enter' || e.key === ' ') applyRegexTemplate(regex); + }} + > + {label} +
+ ))}
@@ -1239,4 +860,4 @@ export function ExtractionSettings({ onChange, initialData }: ExtractionSettings
); -} \ No newline at end of file +} \ No newline at end of file