import { useState, KeyboardEvent, FormEvent, useContext, useEffect, useCallback } from 'react'; import { RuleContext } from './ReviewSettings'; interface RegexField { id?: string; fieldName: string; regex: string; } interface PromptTemplate { id: number; template_name: string; template_type: string; template_content: string; } interface ExtractionSettingsProps { onChange?: (data: Record) => void; initialData?: { llm_ocr?: { fields?: string[]; prompt_setting?: { type?: string; template?: string; }; }; llm_vl?: { fields?: string[]; prompt_setting?: { type?: string; template?: string; }; }; ocr_regex?: { fields?: RegexField[]; }; }; } export function ExtractionSettings({ onChange, initialData }: ExtractionSettingsProps) { const ruleContext = useContext(RuleContext); const [currentTab, setCurrentTab] = useState('llm_ocr'); const [fields, setFields] = useState<{ [key: string]: string[] }>({ llm_ocr: [], llm: [], }); const [inputValue, setInputValue] = useState({ llm_ocr: '', llm: '', }); const [selectedFieldType, setSelectedFieldType] = useState('default'); const [regexFields, setRegexFields] = useState([{ id: '1', fieldName: '', regex: '' }]); const [promptType, setPromptType] = useState({ llm_ocr: 'system', llm: 'system' }); const [promptContent, setPromptContent] = useState({ llm_ocr: '', llm: '' }); const [selectedTemplate, setSelectedTemplate] = useState({ llm_ocr: '', llm: '' }); // 加载初始数据 useEffect(() => { if (initialData) { const newFields = { llm_ocr: initialData.llm_ocr?.fields || [], llm: initialData.llm_vl?.fields || [], }; setFields(newFields); setPromptType({ llm_ocr: initialData.llm_ocr?.prompt_setting?.type || 'system', llm: initialData.llm_vl?.prompt_setting?.type || 'system', }); setPromptContent({ llm_ocr: initialData.llm_ocr?.prompt_setting?.template || '', llm: initialData.llm_vl?.prompt_setting?.template || '', }); if (initialData.ocr_regex?.fields?.length) { setRegexFields( initialData.ocr_regex.fields.map((field: RegexField, index: number) => ({ id: (index + 1).toString(), fieldName: field.fieldName || '', regex: field.regex || '', })) ); } } }, [initialData]); // 只依赖 initialData,避免 ruleContext 导致频繁触发 // 从 Context 初始化字段(仅在无 initialData 时) useEffect(() => { if (!initialData && ruleContext?.extractionFields?.length > 0) { setFields((prevFields) => ({ ...prevFields, [currentTab]: [...ruleContext.extractionFields], })); } }, [ruleContext?.extractionFields, currentTab, initialData]); // 依赖具体属性而非整个 ruleContext // 获取所有字段(使用 useCallback 稳定函数引用) const getAllFields = useCallback(() => { const llm_ocr_fields = fields.llm_ocr || []; const llm_fields = (fields.llm || []).map((field) => field.split('_')[0]); const regex_fields = regexFields.map((field) => field.fieldName).filter((name) => name.trim() !== ''); return [...new Set([...llm_ocr_fields, ...llm_fields, ...regex_fields])]; }, [fields, regexFields]); // 检查字段名是否存在 const isFieldNameExists = useCallback( (fieldName: string, excludeId?: string): boolean => { const allFields = getAllFields(); if (allFields.includes(fieldName)) return true; const otherRegexFields = regexFields .filter((f) => !excludeId || f.id !== excludeId) .map((f) => f.fieldName); if (otherRegexFields.includes(fieldName)) return true; const fieldNameLower = fieldName.toLowerCase(); if ( allFields.some((f) => f.toLowerCase() === fieldNameLower) || otherRegexFields.some((f) => f.toLowerCase() === fieldNameLower) ) { return true; } return false; }, [getAllFields, regexFields] ); // 更新全局字段和触发 onChange(使用防抖) useEffect(() => { const timeout = setTimeout(() => { const allFields = getAllFields(); ruleContext?.updateFields?.(allFields); document.dispatchEvent( new CustomEvent('extraction-fields-updated', { detail: { fields: allFields, tab: currentTab, fieldsData: { llm_ocr: fields.llm_ocr || [], llm: fields.llm || [], regex: regexFields.map((f) => f.fieldName).filter((name) => name.trim() !== ''), }, }, }) ); onChange?.({ extractionMethod: currentTab, fields, regexFields, allFields, }); }, 300); return () => clearTimeout(timeout); }, [fields, regexFields, currentTab, getAllFields, ruleContext?.updateFields, onChange]); const handleTabChange = (tab: string) => { setCurrentTab(tab); onChange?.({ extractionMethod: tab }); }; const handleFieldInputChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { setInputValue({ ...inputValue, [type]: e.currentTarget.value }); }; const handleFieldTypeChange = (e: FormEvent) => { setSelectedFieldType(e.currentTarget.value); }; const addField = (type: 'llm_ocr' | 'llm') => { const value = inputValue[type].trim(); if (!value) return; if (type === 'llm_ocr') { const fieldsToAdd = value .split(/[\s、,]+/) .map((f) => f.trim()) .filter((f) => f && !isFieldNameExists(f)); if (fieldsToAdd.length === 0) { alert('所有字段名已存在,请确保字段名称唯一'); return; } setFields((prev) => ({ ...prev, [type]: [...prev[type], ...fieldsToAdd] })); } else { if (isFieldNameExists(value)) { alert(`字段名 "${value}" 已存在,请确保字段名称唯一`); return; } setFields((prev) => ({ ...prev, [type]: [...prev[type], `${value}_${selectedFieldType}`] })); setSelectedFieldType('default'); } setInputValue((prev) => ({ ...prev, [type]: '' })); }; const handleKeyDown = (e: KeyboardEvent, type: 'llm_ocr' | 'llm') => { if (e.key === 'Enter') { e.preventDefault(); addField(type); } }; const removeField = (type: 'llm_ocr' | 'llm', index: number) => { setFields((prev) => { const newFields = [...prev[type]]; newFields.splice(index, 1); return { ...prev, [type]: newFields }; }); }; const addRegexFieldRow = () => { setRegexFields((prev) => [...prev, { id: `${prev.length + 1}`, fieldName: '', regex: '' }]); }; const removeRegexFieldRow = (id: string) => { if (regexFields.length <= 1) return; setRegexFields((prev) => prev.filter((field) => field.id !== id)); }; const updateRegexField = (id: string, key: 'fieldName' | 'regex', value: string) => { setRegexFields((prev) => prev.map((field) => (field.id === id ? { ...field, [key]: value } : field)) ); }; const handleRegexFieldBlur = (id: string, key: 'fieldName' | 'regex') => { if (key !== 'fieldName') return; const field = regexFields.find((f) => f.id === id); if (!field?.fieldName.trim()) return; const fieldName = field.fieldName.trim(); if (isFieldNameExists(fieldName, id)) { alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`); setRegexFields((prev) => prev.map((f) => (f.id === id ? { ...f, fieldName: '' } : f)) ); } }; const applyRegexTemplate = (regex: string) => { const lastField = regexFields[regexFields.length - 1]; updateRegexField(lastField.id, 'regex', regex); }; const getFieldInfo = (field: string) => { const [fieldName, fieldType = 'default'] = field.split('_'); const typeName = { default: '默认', seal: '印章', 'cross-seal': '骑缝章', handwriting: '手写体', print: '印刷体', english: '英文', number: '数字', currency: '货币', }[fieldType] || '默认'; const badgeClass = { default: 'bg-blue-100 text-blue-800', seal: 'bg-red-100 text-red-800', 'cross-seal': 'bg-red-100 text-red-800', handwriting: 'bg-yellow-100 text-yellow-800', print: 'bg-purple-100 text-purple-800', english: 'bg-indigo-100 text-indigo-800', number: 'bg-gray-100 text-gray-800', currency: 'bg-green-100 text-green-800', }[fieldType] || 'bg-blue-100 text-blue-800'; return { fieldName, fieldType, typeName, badgeClass }; }; const handlePromptTypeChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; setPromptType((prev) => ({ ...prev, [type]: value })); onChange?.({ extractionMethod: currentTab, promptSettings: { type: value, template: selectedTemplate[type], content: promptContent[type] }, }); }; const handleTemplateChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; setSelectedTemplate((prev) => ({ ...prev, [type]: value })); if (value) { const templateData = getPromptTemplateById(Number(value)); if (templateData) { let content = templateData.template_content; if (content.includes('{fieldsList}') && fields[type].length > 0) { const fieldListStr = type === 'llm_ocr' ? fields[type].map((field, idx) => `${idx + 1}. ${field}`).join('\n') : fields[type] .map((field, idx) => { const { fieldName, typeName } = getFieldInfo(field); return `${idx + 1}. ${fieldName} (${typeName})`; }) .join('\n'); content = content.replace('{fieldsList}', fieldListStr); } setPromptContent((prev) => ({ ...prev, [type]: content })); onChange?.({ extractionMethod: currentTab, promptSettings: { type: promptType[type], template: value, content }, }); } } else { setPromptContent((prev) => ({ ...prev, [type]: '' })); onChange?.({ extractionMethod: currentTab, promptSettings: { type: promptType[type], template: '', content: '' }, }); } }; const handlePromptContentChange = (e: FormEvent, type: 'llm_ocr' | 'llm') => { const value = e.currentTarget.value; setPromptContent((prev) => ({ ...prev, [type]: value })); onChange?.({ extractionMethod: currentTab, promptSettings: { type: promptType[type], template: selectedTemplate[type], content: value }, }); }; const applyVariableToPrompt = (variable: string, type: 'llm_ocr' | 'llm') => { const textarea = document.getElementById( type === 'llm_ocr' ? 'llm-prompt-content' : 'multimodal-prompt-content' ) as HTMLTextAreaElement; if (textarea) { const start = textarea.selectionStart; const end = textarea.selectionEnd; const text = textarea.value; const newText = text.substring(0, start) + `{${variable}}` + text.substring(end); setPromptContent((prev) => ({ ...prev, [type]: newText })); setTimeout(() => { textarea.focus(); textarea.setSelectionRange(start + variable.length + 2, start + variable.length + 2); }, 0); onChange?.({ extractionMethod: currentTab, promptSettings: { type: promptType[type], template: selectedTemplate[type], content: newText }, }); } }; const getPromptTemplateById = (id: number): PromptTemplate | null => { const templates: Record = { 1: { id: 1, template_name: '行政处罚-抽取通用模板', template_type: 'Extraction', template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息:\n{fieldsList}\n请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。`, }, 4: { id: 4, template_name: '采购合同-乙方资质抽取', template_type: 'Extraction', template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息:\n需要抽取的信息包括:\n{fieldsList}\n{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。`, }, 5: { id: 5, template_name: '合同-关键条款抽取', template_type: 'Extraction', template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息:\n{fieldsList}\n文档ID: {documentId}\n审核日期: {date}\n请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。`, }, 6: { id: 6, template_name: '烟草许可证-信息抽取', template_type: 'Extraction', template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息:\n{fieldsList}\n这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。`, }, 7: { id: 7, template_name: '多模态-印章识别模板', template_type: 'Multimodal', template_content: `请识别并提取文档中的所有印章信息,包括:\n{fieldsList}\n文档类型: {docType}\n页面范围: {pageRange}\n请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。`, }, 8: { id: 8, template_name: '多模态-表格抽取模板', template_type: 'Multimodal', template_content: `请从文档中的表格提取以下信息:\n{fieldsList}\n文档类型: {docType}\n表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。`, }, 9: { id: 9, template_name: '多模态-手写内容识别模板', template_type: 'Multimodal', template_content: `请识别文档中的手写内容,特别关注:\n{fieldsList}\n文档类型: {docType}\n内容类型: {contentType}\n对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。`, }, }; return templates[id] || null; }; return (

抽取设置

handleFieldInputChange(e, 'llm_ocr')} onKeyDown={(e) => handleKeyDown(e, 'llm_ocr')} />
{fields.llm_ocr.map((field, index) => (
{field} removeField('llm_ocr', index)} onKeyDown={(e) => { if (e.key === 'Enter' || e.key === ' ') removeField('llm_ocr', index); }} role="button" tabIndex={0} aria-label={`删除字段 ${field}`} > ×
))}
支持一次输入多个字段
系统将根据评查点类型和抽取目标自动生成适合的提示词,您无需额外配置。

支持的变量(点击变量将其添加到提示词中):

{[ 'docType', 'fieldsList', 'companyName', 'documentId', 'date', 'industry', 'ocrText', ].map((variable) => ( ))}
handleFieldInputChange(e, 'llm')} onKeyDown={(e) => handleKeyDown(e, 'llm')} />
{fields.llm.map((field, index) => { const { fieldName, fieldType, typeName, badgeClass } = getFieldInfo(field); return (
{fieldName} {typeName} removeField('llm', index)} onKeyDown={(e) => { if (e.key === 'Enter' || e.key === ' ') removeField('llm', index); }} role="button" tabIndex={0} aria-label={`删除字段 ${fieldName}`} > ×
); })}
请为每个字段选择适当的抽取类型,有助于提高识别准确率
系统将根据评查点类型和抽取目标自动生成适合的提示词,支持图表、印章等图像内容抽取。

支持的变量(点击变量将其添加到提示词中):

{[ 'docType', 'fieldsList', 'companyName', 'documentId', 'date', 'industry', 'contentType', 'pageRange', 'colorMode', 'ocrText', ].map((variable) => ( ))}
{regexFields.map((field) => (
updateRegexField(field.id, 'fieldName', e.target.value)} onBlur={() => handleRegexFieldBlur(field.id, 'fieldName')} />
updateRegexField(field.id, 'regex', e.target.value)} onBlur={() => handleRegexFieldBlur(field.id, 'regex')} />
))}
{[ { label: '日期格式:yyyy-mm-dd', regex: '\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?', }, { label: '合同编号格式', regex: '[A-Z]{2,5}-\\d{4,10}' }, { label: '金额格式', regex: '(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?', }, { label: '座机号码格式', regex: '\\d{3}-\\d{8}|\\d{4}-\\d{7,8}' }, { label: '手机号码格式', regex: '1[3-9]\\d{9}' }, ].map(({ label, regex }) => (
applyRegexTemplate(regex)} role="button" tabIndex={0} onKeyDown={(e) => { if (e.key === 'Enter' || e.key === ' ') applyRegexTemplate(regex); }} > {label}
))}
); }