1242 lines
46 KiB
TypeScript
1242 lines
46 KiB
TypeScript
import { useState, KeyboardEvent, FormEvent, useContext, useEffect } from 'react';
|
||
import { RuleContext } from './ReviewSettings';
|
||
|
||
interface RegexField {
|
||
id?: string;
|
||
fieldName: string;
|
||
regex: string;
|
||
}
|
||
|
||
interface PromptTemplate {
|
||
id: number;
|
||
template_name: string;
|
||
template_type: string;
|
||
template_content: string;
|
||
}
|
||
|
||
interface ExtractionSettingsProps {
|
||
onChange?: (data: Record<string, unknown>) => void;
|
||
initialData?: {
|
||
llm_ocr?: {
|
||
fields?: string[];
|
||
prompt_setting?: {
|
||
type?: string;
|
||
template?: string;
|
||
}
|
||
};
|
||
llm_vl?: {
|
||
fields?: string[];
|
||
prompt_setting?: {
|
||
type?: string;
|
||
template?: string;
|
||
}
|
||
};
|
||
ocr_regex?: {
|
||
fields?: RegexField[];
|
||
}
|
||
};
|
||
}
|
||
|
||
export function ExtractionSettings({ onChange, initialData }: ExtractionSettingsProps) {
|
||
// 使用RuleContext获取全局状态
|
||
const ruleContext = useContext(RuleContext);
|
||
|
||
const [currentTab, setCurrentTab] = useState('llm_ocr');
|
||
const [fields, setFields] = useState<{[key: string]: string[]}>({
|
||
llm_ocr: [],
|
||
llm: []
|
||
});
|
||
const [inputValue, setInputValue] = useState({
|
||
llm_ocr: '',
|
||
llm: ''
|
||
});
|
||
const [selectedFieldType, setSelectedFieldType] = useState('default');
|
||
const [regexFields, setRegexFields] = useState<RegexField[]>([
|
||
{ id: '1', fieldName: '', regex: '' }
|
||
]);
|
||
|
||
// 提示词相关状态
|
||
const [promptType, setPromptType] = useState({
|
||
llm_ocr: 'system',
|
||
llm: 'system'
|
||
});
|
||
const [promptContent, setPromptContent] = useState({
|
||
llm_ocr: '',
|
||
llm: ''
|
||
});
|
||
const [selectedTemplate, setSelectedTemplate] = useState({
|
||
llm_ocr: '',
|
||
llm: ''
|
||
});
|
||
|
||
// 加载初始数据
|
||
useEffect(() => {
|
||
if (initialData) {
|
||
// 设置字段数据
|
||
const newFields = {
|
||
llm_ocr: initialData.llm_ocr?.fields || [],
|
||
llm: initialData.llm_vl?.fields || []
|
||
};
|
||
setFields(newFields);
|
||
|
||
// 设置提示词类型
|
||
setPromptType({
|
||
llm_ocr: initialData.llm_ocr?.prompt_setting?.type || 'system',
|
||
llm: initialData.llm_vl?.prompt_setting?.type || 'system'
|
||
});
|
||
|
||
// 设置提示词内容
|
||
setPromptContent({
|
||
llm_ocr: initialData.llm_ocr?.prompt_setting?.template || '',
|
||
llm: initialData.llm_vl?.prompt_setting?.template || ''
|
||
});
|
||
|
||
// 设置正则字段
|
||
if (initialData.ocr_regex && initialData.ocr_regex.fields && initialData.ocr_regex.fields.length > 0) {
|
||
const regexFieldsWithIds = initialData.ocr_regex.fields.map((field: RegexField, index: number) => ({
|
||
id: (index + 1).toString(),
|
||
fieldName: field.fieldName || '',
|
||
regex: field.regex || ''
|
||
}));
|
||
setRegexFields(regexFieldsWithIds);
|
||
}
|
||
|
||
// 更新全局字段列表
|
||
const allFields = [
|
||
...newFields.llm_ocr,
|
||
...newFields.llm,
|
||
...(initialData.ocr_regex?.fields || []).map((f: RegexField) => f.fieldName)
|
||
].filter(Boolean);
|
||
|
||
if (ruleContext && ruleContext.updateFields) {
|
||
ruleContext.updateFields(allFields);
|
||
}
|
||
}
|
||
}, [initialData, ruleContext?.updateFields]);
|
||
|
||
// 在组件首次加载时更新字段
|
||
useEffect(() => {
|
||
if (!initialData) {
|
||
updateAllFields();
|
||
}
|
||
}, []);
|
||
|
||
// 当组件首次加载时,如果Context中已有字段数据,则使用Context数据初始化
|
||
useEffect(() => {
|
||
if (!initialData && ruleContext && ruleContext.extractionFields.length > 0) {
|
||
// 将Context中的字段数据添加到当前激活的抽取方式中
|
||
setFields(prevFields => ({
|
||
...prevFields,
|
||
[currentTab]: [...ruleContext.extractionFields]
|
||
}));
|
||
}
|
||
}, []);
|
||
|
||
// 获取所有可用字段(合并大模型、多模态和正则抽取的字段)
|
||
const getAllFields = (): string[] => {
|
||
// 从大模型OCR抽取中获取字段
|
||
const llm_ocr_fields = fields.llm_ocr || [];
|
||
|
||
// 从多模态字段中提取基本字段名(去除类型后缀)
|
||
const llm_fields = (fields.llm || []).map(field => {
|
||
const [fieldName] = field.split('_');
|
||
return fieldName;
|
||
});
|
||
|
||
// 获取正则字段名
|
||
const regex_fields = regexFields
|
||
.map(field => field.fieldName)
|
||
.filter(name => name.trim() !== '');
|
||
|
||
// 合并并去重
|
||
const allFields = [...new Set([...llm_ocr_fields, ...llm_fields, ...regex_fields])];
|
||
console.log("所有可用字段:", allFields);
|
||
return allFields;
|
||
};
|
||
|
||
// 检查字段名是否存在(精确匹配)
|
||
const isFieldNameExists = (fieldName: string, excludeId?: string): boolean => {
|
||
// 获取所有字段名称(不转换为小写)
|
||
const existingFields = getAllFields();
|
||
|
||
// 检查精确匹配(区分大小写)
|
||
for (const existingField of existingFields) {
|
||
// 严格相等比较,确保完全匹配而不是部分匹配
|
||
if (existingField === fieldName) {
|
||
console.log(`字段名 '${fieldName}' 在现有字段中存在(严格匹配)`);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// 检查正则字段组中的其他字段(精确匹配)
|
||
// 排除当前正在编辑的字段ID
|
||
const otherRegexFields = regexFields
|
||
.filter(f => !excludeId || f.id !== excludeId)
|
||
.map(f => f.fieldName);
|
||
|
||
for (const regexField of otherRegexFields) {
|
||
// 严格相等比较
|
||
if (regexField === fieldName) {
|
||
console.log(`字段名 '${fieldName}' 在正则字段中存在(严格匹配)`);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// 不区分大小写的检查(保留这部分功能,但仍然是精确匹配)
|
||
const fieldNameLower = fieldName.toLowerCase();
|
||
const existingFieldsLower = existingFields.map(f => f.toLowerCase());
|
||
const otherRegexFieldsLower = otherRegexFields.map(f => f.toLowerCase());
|
||
|
||
if (existingFieldsLower.includes(fieldNameLower)) {
|
||
console.log(`字段名 '${fieldName}' 在现有字段中存在(不区分大小写)`);
|
||
return true;
|
||
}
|
||
|
||
if (otherRegexFieldsLower.includes(fieldNameLower)) {
|
||
console.log(`字段名 '${fieldName}' 在正则字段中存在(不区分大小写)`);
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
};
|
||
|
||
// 更新所有抽取字段到Context
|
||
const updateAllFields = () => {
|
||
const allFields = getAllFields();
|
||
|
||
// 更新全局Context中的字段
|
||
if (ruleContext) {
|
||
ruleContext.updateFields(allFields);
|
||
}
|
||
|
||
// 触发自定义事件,通知字段已更新(兼容非Context的实现)
|
||
const event = new CustomEvent('extraction-fields-updated', {
|
||
detail: {
|
||
fields: allFields,
|
||
tab: currentTab,
|
||
fieldsData: {
|
||
llm_ocr: fields.llm_ocr || [],
|
||
llm: fields.llm || [],
|
||
regex: regexFields.map(f => f.fieldName).filter(name => name.trim() !== '')
|
||
}
|
||
}
|
||
});
|
||
document.dispatchEvent(event);
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
fields,
|
||
regexFields,
|
||
allFields // 添加合并后的所有字段
|
||
});
|
||
}
|
||
};
|
||
|
||
// 使用useEffect监听字段变化并更新Context
|
||
useEffect(() => {
|
||
const debouncedUpdate = setTimeout(() => {
|
||
updateAllFields();
|
||
}, 300);
|
||
|
||
return () => clearTimeout(debouncedUpdate);
|
||
}, [fields.llm_ocr, fields.llm, regexFields]);
|
||
|
||
const handleTabChange = (tab: string) => {
|
||
setCurrentTab(tab);
|
||
|
||
if (onChange) {
|
||
onChange({ extractionMethod: tab });
|
||
}
|
||
};
|
||
|
||
const handleFieldInputChange = (e: FormEvent<HTMLInputElement>, type: 'llm_ocr' | 'llm') => {
|
||
setInputValue({
|
||
...inputValue,
|
||
[type]: e.currentTarget.value
|
||
});
|
||
};
|
||
|
||
const handleFieldTypeChange = (e: FormEvent<HTMLSelectElement>) => {
|
||
setSelectedFieldType(e.currentTarget.value);
|
||
};
|
||
|
||
const addField = (type: 'llm_ocr' | 'llm') => {
|
||
if (inputValue[type].trim()) {
|
||
let newFields: string[] = [];
|
||
|
||
// OCR+LLM模式下,支持多个字段同时添加(用逗号、顿号或空格分隔)
|
||
if (type === 'llm_ocr') {
|
||
const fieldsToAdd = inputValue[type].split(/[\s、,]+/)
|
||
.map(f => f.trim())
|
||
.filter(f => f !== '');
|
||
|
||
console.log(`添加OCR字段:`, fieldsToAdd);
|
||
|
||
// 仅添加不存在的字段
|
||
const uniqueFields = fieldsToAdd.filter(field => !isFieldNameExists(field));
|
||
|
||
if (uniqueFields.length === 0) {
|
||
// 如果没有唯一字段可添加,显示提示并返回
|
||
alert("所有字段名已存在,请确保字段名称唯一");
|
||
return;
|
||
}
|
||
|
||
newFields = [...fields[type], ...uniqueFields];
|
||
} else {
|
||
// 多模态抽取模式下,处理字段名称唯一性
|
||
const fieldName = inputValue[type].trim();
|
||
console.log(`添加多模态字段:${fieldName}`);
|
||
|
||
// 检查字段名是否已存在
|
||
if (isFieldNameExists(fieldName)) {
|
||
alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`);
|
||
return;
|
||
}
|
||
|
||
newFields = [...fields[type], `${fieldName}_${selectedFieldType}`];
|
||
}
|
||
|
||
setFields(prevFields => ({
|
||
...prevFields,
|
||
[type]: newFields
|
||
}));
|
||
|
||
setInputValue({
|
||
...inputValue,
|
||
[type]: ''
|
||
});
|
||
|
||
if (type === 'llm') {
|
||
setSelectedFieldType('default');
|
||
}
|
||
}
|
||
};
|
||
|
||
const handleKeyDown = (e: KeyboardEvent<HTMLInputElement>, type: 'llm_ocr' | 'llm') => {
|
||
if (e.key === 'Enter') {
|
||
e.preventDefault();
|
||
addField(type);
|
||
}
|
||
};
|
||
|
||
const removeField = (type: 'llm_ocr' | 'llm', index: number) => {
|
||
const newFields = [...fields[type]];
|
||
newFields.splice(index, 1);
|
||
|
||
// 使用新的方式更新,确保状态立即更新并触发后续操作
|
||
setFields(prevFields => {
|
||
const updatedFields = {
|
||
...prevFields,
|
||
[type]: newFields
|
||
};
|
||
|
||
// 状态更新后立即触发字段更新事件
|
||
Promise.resolve().then(() => updateAllFields());
|
||
|
||
return updatedFields;
|
||
});
|
||
};
|
||
|
||
// 添加正则表达式字段行
|
||
const addRegexFieldRow = () => {
|
||
const newId = `${regexFields.length + 1}`;
|
||
setRegexFields([...regexFields, { id: newId, fieldName: '', regex: '' }]);
|
||
|
||
// 如果是新增了regex字段,也要更新字段列表通知评查设置组件
|
||
setTimeout(() => updateAllFields(), 0);
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
regexFields: [...regexFields, { id: newId, fieldName: '', regex: '' }]
|
||
});
|
||
}
|
||
};
|
||
|
||
// 删除正则表达式字段行
|
||
const removeRegexFieldRow = (id: string) => {
|
||
// 至少保留一行
|
||
if (regexFields.length <= 1) {
|
||
return;
|
||
}
|
||
|
||
const newRegexFields = regexFields.filter(field => field.id !== id);
|
||
setRegexFields(newRegexFields);
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
regexFields: newRegexFields
|
||
});
|
||
}
|
||
};
|
||
|
||
// 更新正则表达式字段
|
||
const updateRegexField = (id: string, key: 'fieldName' | 'regex', value: string) => {
|
||
// 更新字段值
|
||
const newRegexFields = regexFields.map(field => {
|
||
if (field.id === id) {
|
||
return { ...field, [key]: value };
|
||
}
|
||
return field;
|
||
});
|
||
|
||
// 仅更新状态,不触发其他事件
|
||
setRegexFields(newRegexFields);
|
||
|
||
// 更新onChange回调
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
regexFields: newRegexFields
|
||
});
|
||
}
|
||
};
|
||
|
||
// 处理正则字段失去焦点事件,检查唯一性并更新字段列表
|
||
const handleRegexFieldBlur = (id: string, key: 'fieldName' | 'regex') => {
|
||
// 只有在修改字段名时需要检查唯一性并更新字段列表
|
||
if (key === 'fieldName') {
|
||
const currentField = regexFields.find(field => field.id === id);
|
||
if (currentField && currentField.fieldName.trim() !== '') {
|
||
const fieldName = currentField.fieldName.trim();
|
||
console.log(`检查正则字段 '${fieldName}' 的唯一性,ID: ${id}`);
|
||
|
||
// 检查当前正则字段组中是否有重名(排除自身)
|
||
const duplicateInRegex = regexFields
|
||
.filter(f => f.id !== id)
|
||
.find(f => f.fieldName === fieldName);
|
||
|
||
if (duplicateInRegex) {
|
||
console.log(`字段名 '${fieldName}' 在正则字段中存在重复,ID: ${duplicateInRegex.id}`);
|
||
alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`);
|
||
|
||
// 重置为空字段名
|
||
const resetFields = regexFields.map(field => {
|
||
if (field.id === id) {
|
||
return { ...field, fieldName: '' };
|
||
}
|
||
return field;
|
||
});
|
||
|
||
setRegexFields(resetFields);
|
||
return;
|
||
}
|
||
|
||
// 检查其他抽取方法中的字段(不区分大小写)
|
||
const otherExtractFields = [
|
||
...fields.llm_ocr.map(f => f.toLowerCase()),
|
||
...fields.llm.map(f => {
|
||
const [name] = f.split('_');
|
||
return name.toLowerCase();
|
||
})
|
||
];
|
||
|
||
const fieldNameLower = fieldName.toLowerCase();
|
||
const duplicateInOtherMethods = otherExtractFields.includes(fieldNameLower);
|
||
|
||
if (duplicateInOtherMethods) {
|
||
console.log(`字段名 '${fieldName}' 在其他抽取方法中存在(不区分大小写)`);
|
||
alert(`字段名 "${fieldName}" 已存在,请确保字段名称唯一`);
|
||
|
||
// 重置为空字段名
|
||
const resetFields = regexFields.map(field => {
|
||
if (field.id === id) {
|
||
return { ...field, fieldName: '' };
|
||
}
|
||
return field;
|
||
});
|
||
|
||
setRegexFields(resetFields);
|
||
return;
|
||
}
|
||
|
||
// 字段名有效,更新字段列表
|
||
console.log(`字段名 '${fieldName}' 检查通过,更新字段列表`);
|
||
updateAllFields();
|
||
}
|
||
} else {
|
||
// 对于regex字段,只需更新字段列表
|
||
updateAllFields();
|
||
}
|
||
};
|
||
|
||
// 应用正则模板
|
||
const applyRegexTemplate = (regex: string) => {
|
||
// 找到当前正在编辑的行,或者最后一行
|
||
const lastField = regexFields[regexFields.length - 1];
|
||
updateRegexField(lastField.id, 'regex', regex);
|
||
};
|
||
|
||
// 从字段字符串中提取字段名和类型(用于多模态抽取)
|
||
const getFieldInfo = (field: string) => {
|
||
const [fieldName, fieldType = 'default'] = field.split('_');
|
||
const typeName = {
|
||
'default': '默认',
|
||
'seal': '印章',
|
||
'cross-seal': '骑缝章',
|
||
'handwriting': '手写体',
|
||
'print': '印刷体',
|
||
'english': '英文',
|
||
'number': '数字',
|
||
'currency': '货币'
|
||
}[fieldType] || '默认';
|
||
|
||
const badgeClass = {
|
||
'default': 'bg-blue-100 text-blue-800',
|
||
'seal': 'bg-red-100 text-red-800',
|
||
'cross-seal': 'bg-red-100 text-red-800',
|
||
'handwriting': 'bg-yellow-100 text-yellow-800',
|
||
'print': 'bg-purple-100 text-purple-800',
|
||
'english': 'bg-indigo-100 text-indigo-800',
|
||
'number': 'bg-gray-100 text-gray-800',
|
||
'currency': 'bg-green-100 text-green-800'
|
||
}[fieldType] || 'bg-blue-100 text-blue-800';
|
||
|
||
return { fieldName, fieldType, typeName, badgeClass };
|
||
};
|
||
|
||
// 处理提示词类型切换
|
||
const handlePromptTypeChange = (e: FormEvent<HTMLInputElement>, type: 'llm_ocr' | 'llm') => {
|
||
const value = e.currentTarget.value;
|
||
setPromptType({
|
||
...promptType,
|
||
[type]: value
|
||
});
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
promptSettings: {
|
||
type: value,
|
||
template: selectedTemplate[type],
|
||
content: promptContent[type]
|
||
}
|
||
});
|
||
}
|
||
};
|
||
|
||
// 处理提示词模板选择
|
||
const handleTemplateChange = (e: FormEvent<HTMLSelectElement>, type: 'llm_ocr' | 'llm') => {
|
||
const value = e.currentTarget.value;
|
||
setSelectedTemplate({
|
||
...selectedTemplate,
|
||
[type]: value
|
||
});
|
||
|
||
if (value) {
|
||
const templateData = getPromptTemplateById(Number(value));
|
||
if (templateData) {
|
||
// 基础模板内容
|
||
let content = templateData.template_content;
|
||
|
||
// 替换字段列表变量
|
||
if (content.includes('{fieldsList}') && fields[type].length > 0) {
|
||
let fieldListStr = '';
|
||
|
||
if (type === 'llm_ocr') {
|
||
// 普通字段列表
|
||
fieldListStr = fields[type].map((field, idx) => `${idx+1}. ${field}`).join('\n');
|
||
} else if (type === 'llm') {
|
||
// 带类型的字段列表
|
||
fieldListStr = fields[type].map((field, idx) => {
|
||
const { fieldName, typeName } = getFieldInfo(field);
|
||
return `${idx+1}. ${fieldName} (${typeName})`;
|
||
}).join('\n');
|
||
}
|
||
|
||
content = content.replace('{fieldsList}', fieldListStr);
|
||
}
|
||
|
||
setPromptContent({
|
||
...promptContent,
|
||
[type]: content
|
||
});
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
promptSettings: {
|
||
type: promptType[type],
|
||
template: value,
|
||
content: content
|
||
}
|
||
});
|
||
}
|
||
}
|
||
} else {
|
||
// 清空内容
|
||
setPromptContent({
|
||
...promptContent,
|
||
[type]: ''
|
||
});
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
promptSettings: {
|
||
type: promptType[type],
|
||
template: '',
|
||
content: ''
|
||
}
|
||
});
|
||
}
|
||
}
|
||
};
|
||
|
||
// 处理提示词内容变更
|
||
const handlePromptContentChange = (e: FormEvent<HTMLTextAreaElement>, type: 'llm_ocr' | 'llm') => {
|
||
const value = e.currentTarget.value;
|
||
setPromptContent({
|
||
...promptContent,
|
||
[type]: value
|
||
});
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
promptSettings: {
|
||
type: promptType[type],
|
||
template: selectedTemplate[type],
|
||
content: value
|
||
}
|
||
});
|
||
}
|
||
};
|
||
|
||
// 应用变量标签到提示词
|
||
const applyVariableToPrompt = (variable: string, type: 'llm_ocr' | 'llm') => {
|
||
const textarea = document.getElementById(type === 'llm_ocr' ? 'llm-prompt-content' : 'multimodal-prompt-content') as HTMLTextAreaElement;
|
||
if (textarea) {
|
||
const start = textarea.selectionStart;
|
||
const end = textarea.selectionEnd;
|
||
const text = textarea.value;
|
||
const newText = text.substring(0, start) + `{${variable}}` + text.substring(end);
|
||
|
||
setPromptContent({
|
||
...promptContent,
|
||
[type]: newText
|
||
});
|
||
|
||
// 使焦点回到文本框并设置光标位置
|
||
setTimeout(() => {
|
||
textarea.focus();
|
||
textarea.setSelectionRange(start + variable.length + 2, start + variable.length + 2);
|
||
}, 0);
|
||
|
||
if (onChange) {
|
||
onChange({
|
||
extractionMethod: currentTab,
|
||
promptSettings: {
|
||
type: promptType[type],
|
||
template: selectedTemplate[type],
|
||
content: newText
|
||
}
|
||
});
|
||
}
|
||
}
|
||
};
|
||
|
||
// 模拟获取提示词模板
|
||
const getPromptTemplateById = (id: number): PromptTemplate | null => {
|
||
// 模拟的模板数据,实际应用中应从服务器获取
|
||
const templates: Record<number, PromptTemplate> = {
|
||
1: {
|
||
id: 1,
|
||
template_name: '行政处罚-抽取通用模板',
|
||
template_type: 'Extraction',
|
||
template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息:
|
||
|
||
{fieldsList}
|
||
|
||
请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。`
|
||
},
|
||
4: {
|
||
id: 4,
|
||
template_name: '采购合同-乙方资质抽取',
|
||
template_type: 'Extraction',
|
||
template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息:
|
||
|
||
需要抽取的信息包括:
|
||
{fieldsList}
|
||
|
||
{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。`
|
||
},
|
||
5: {
|
||
id: 5,
|
||
template_name: '合同-关键条款抽取',
|
||
template_type: 'Extraction',
|
||
template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息:
|
||
|
||
{fieldsList}
|
||
|
||
文档ID: {documentId}
|
||
审核日期: {date}
|
||
|
||
请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。`
|
||
},
|
||
6: {
|
||
id: 6,
|
||
template_name: '烟草许可证-信息抽取',
|
||
template_type: 'Extraction',
|
||
template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息:
|
||
|
||
{fieldsList}
|
||
|
||
这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。`
|
||
},
|
||
7: {
|
||
id: 7,
|
||
template_name: '多模态-印章识别模板',
|
||
template_type: 'Multimodal',
|
||
template_content: `请识别并提取文档中的所有印章信息,包括:
|
||
|
||
{fieldsList}
|
||
|
||
文档类型: {docType}
|
||
页面范围: {pageRange}
|
||
|
||
请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。`
|
||
},
|
||
8: {
|
||
id: 8,
|
||
template_name: '多模态-表格抽取模板',
|
||
template_type: 'Multimodal',
|
||
template_content: `请从文档中的表格提取以下信息:
|
||
|
||
{fieldsList}
|
||
|
||
文档类型: {docType}
|
||
表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。`
|
||
},
|
||
9: {
|
||
id: 9,
|
||
template_name: '多模态-手写内容识别模板',
|
||
template_type: 'Multimodal',
|
||
template_content: `请识别文档中的手写内容,特别关注:
|
||
|
||
{fieldsList}
|
||
|
||
文档类型: {docType}
|
||
内容类型: {contentType}
|
||
|
||
对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。`
|
||
}
|
||
};
|
||
|
||
return templates[id] || null;
|
||
};
|
||
|
||
return (
|
||
<div className="ant-card">
|
||
<div className="ant-card-header">
|
||
<h3>抽取设置</h3>
|
||
</div>
|
||
<div className="ant-card-body">
|
||
<div className="mb-6">
|
||
{/* 切换按钮 */}
|
||
<div className="tab-nav mb-4" id="extraction-method-tabs">
|
||
<button
|
||
className={`tab-nav-item ${currentTab === 'llm_ocr' ? 'active' : ''}`}
|
||
onClick={() => handleTabChange('llm_ocr')}
|
||
type="button"
|
||
>
|
||
<i className="ri-brain-line mr-1"></i> 大模型抽取
|
||
</button>
|
||
<button
|
||
className={`tab-nav-item ${currentTab === 'llm' ? 'active' : ''}`}
|
||
onClick={() => handleTabChange('llm')}
|
||
type="button"
|
||
>
|
||
<i className="ri-scan-line mr-1"></i> 多模态抽取
|
||
</button>
|
||
<button
|
||
className={`tab-nav-item ${currentTab === 'ocr_regex' ? 'active' : ''}`}
|
||
onClick={() => handleTabChange('ocr_regex')}
|
||
type="button"
|
||
>
|
||
<i className="ri-code-box-line mr-1"></i> 正则抽取
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
{/* 大模型抽取配置 */}
|
||
<div className={`extraction-config ${currentTab !== 'llm_ocr' ? 'hidden' : ''}`} id="llm-ocr-config">
|
||
<div className="grid grid-cols-1 gap-3">
|
||
<div className="col-span-1">
|
||
<label className="form-label mb-1" htmlFor="field-input-ocr">抽取字段</label>
|
||
<div className="flex mb-2">
|
||
<input
|
||
type="text"
|
||
className="form-input mr-2"
|
||
id="field-input-ocr"
|
||
placeholder="请输入字段名,多个字段可用、或,或空格分隔"
|
||
value={inputValue.llm_ocr}
|
||
onChange={(e) => handleFieldInputChange(e, 'llm_ocr')}
|
||
onKeyDown={(e) => handleKeyDown(e, 'llm_ocr')}
|
||
/>
|
||
<button
|
||
className="ant-btn ant-btn-default"
|
||
id="add-field-btn-ocr"
|
||
type="button"
|
||
onClick={() => addField('llm_ocr')}
|
||
>添加</button>
|
||
</div>
|
||
<div className="chips-container" id="fields-container-ocr">
|
||
{fields.llm_ocr.map((field, index) => (
|
||
<div className="chip" key={`ocr-field-${index}`}>
|
||
{field}
|
||
<span
|
||
className="close-btn"
|
||
onClick={() => removeField('llm_ocr', index)}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
removeField('llm_ocr', index);
|
||
}
|
||
}}
|
||
role="button"
|
||
tabIndex={0}
|
||
aria-label={`删除字段 ${field}`}
|
||
>×</span>
|
||
</div>
|
||
))}
|
||
</div>
|
||
<div className="form-tip mt-1 text-xs">支持一次输入多个字段</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 gap-3 mt-3">
|
||
<div className="col-span-1">
|
||
<label className="form-label mb-1" htmlFor="llm-prompt-settings">提示词设置</label>
|
||
<div className="flex items-center mb-2" id="llm-prompt-settings">
|
||
<label className="inline-flex items-center mr-6">
|
||
<input
|
||
type="radio"
|
||
name="llm-prompt-type"
|
||
value="system"
|
||
checked={promptType.llm_ocr === 'system'}
|
||
onChange={(e) => handlePromptTypeChange(e, 'llm_ocr')}
|
||
className="form-radio"
|
||
/>
|
||
<span className="ml-2">使用系统默认提示词</span>
|
||
</label>
|
||
<label className="inline-flex items-center">
|
||
<input
|
||
type="radio"
|
||
name="llm-prompt-type"
|
||
value="custom"
|
||
checked={promptType.llm_ocr === 'custom'}
|
||
onChange={(e) => handlePromptTypeChange(e, 'llm_ocr')}
|
||
className="form-radio"
|
||
/>
|
||
<span className="ml-2">使用自定义提示词</span>
|
||
</label>
|
||
</div>
|
||
|
||
<div
|
||
className="bg-gray-50 p-2 rounded text-xs text-gray-600 mb-2"
|
||
id="llm-system-prompt-info"
|
||
style={{ display: promptType.llm_ocr === 'system' ? 'block' : 'none' }}
|
||
>
|
||
系统将根据评查点类型和抽取目标自动生成适合的提示词,您无需额外配置。
|
||
</div>
|
||
|
||
<div
|
||
id="llm-custom-prompt-container"
|
||
style={{ display: promptType.llm_ocr === 'custom' ? 'block' : 'none' }}
|
||
className="border border-dashed border-gray-300 p-3 rounded-md"
|
||
>
|
||
<div className="mb-2">
|
||
<label className="form-label mb-1 text-sm" htmlFor="llm-prompt-template">选择提示词模板</label>
|
||
<select
|
||
className="form-select"
|
||
id="llm-prompt-template"
|
||
value={selectedTemplate.llm_ocr}
|
||
onChange={(e) => handleTemplateChange(e, 'llm_ocr')}
|
||
>
|
||
<option value="">请选择模板</option>
|
||
<option value="1">行政处罚-抽取通用模板</option>
|
||
<option value="4">采购合同-乙方资质抽取</option>
|
||
<option value="5">合同-关键条款抽取</option>
|
||
<option value="6">烟草许可证-信息抽取</option>
|
||
</select>
|
||
</div>
|
||
<div className="mb-2">
|
||
<label className="form-label mb-1 text-sm" htmlFor="llm-prompt-content">提示词内容</label>
|
||
<textarea
|
||
className="form-textarea"
|
||
id="llm-prompt-content"
|
||
rows={4}
|
||
placeholder="选择模板后自动填充,您也可以进行修改..."
|
||
value={promptContent.llm_ocr}
|
||
onChange={(e) => handlePromptContentChange(e, 'llm_ocr')}
|
||
readOnly={!selectedTemplate.llm_ocr}
|
||
></textarea>
|
||
<div className="form-tip mt-1 bg-gray-50 p-2 rounded text-xs">
|
||
<p className="mb-1"><strong>支持的变量</strong>(点击变量将其添加到提示词中):</p>
|
||
<div className="flex flex-wrap gap-1">
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('docType', 'llm_ocr')}
|
||
>docType</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('fieldsList', 'llm_ocr')}
|
||
>fieldsList</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('companyName', 'llm_ocr')}
|
||
>companyName</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('documentId', 'llm_ocr')}
|
||
>documentId</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('date', 'llm_ocr')}
|
||
>date</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('industry', 'llm_ocr')}
|
||
>industry</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('ocrText', 'llm_ocr')}
|
||
>ocrText</button>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
{/* 多模态抽取配置 */}
|
||
<div className={`extraction-config ${currentTab !== 'llm' ? 'hidden' : ''}`} id="llm-config">
|
||
<div className="grid grid-cols-1 gap-3">
|
||
<div className="col-span-1">
|
||
<label className="form-label mb-1" htmlFor="field-input">抽取字段与类型</label>
|
||
<div className="flex mb-2">
|
||
<input
|
||
type="text"
|
||
className="form-input mr-2"
|
||
id="field-input"
|
||
placeholder="请输入字段名"
|
||
value={inputValue.llm}
|
||
onChange={(e) => handleFieldInputChange(e, 'llm')}
|
||
onKeyDown={(e) => handleKeyDown(e, 'llm')}
|
||
/>
|
||
<select
|
||
className="form-select mr-2"
|
||
id="field-type"
|
||
value={selectedFieldType}
|
||
onChange={handleFieldTypeChange}
|
||
>
|
||
<option value="default">默认</option>
|
||
<option value="seal">印章</option>
|
||
<option value="cross-seal">骑缝章</option>
|
||
<option value="handwriting">手写体</option>
|
||
<option value="print">印刷体</option>
|
||
<option value="english">英文</option>
|
||
<option value="number">数字</option>
|
||
<option value="currency">货币</option>
|
||
</select>
|
||
<button
|
||
className="ant-btn ant-btn-default"
|
||
id="add-field-btn"
|
||
type="button"
|
||
onClick={() => addField('llm')}
|
||
>添加</button>
|
||
</div>
|
||
<div className="chips-container" id="fields-container">
|
||
{fields.llm.map((field, index) => {
|
||
const { fieldName, fieldType, typeName, badgeClass } = getFieldInfo(field);
|
||
return (
|
||
<div className="chip" key={`llm-field-${index}`}>
|
||
{fieldName}
|
||
<span className={`badge ${badgeClass} text-xs ml-1`} data-type={fieldType}>{typeName}</span>
|
||
<span
|
||
className="close-btn"
|
||
onClick={() => removeField('llm', index)}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
removeField('llm', index);
|
||
}
|
||
}}
|
||
role="button"
|
||
tabIndex={0}
|
||
aria-label={`删除字段 ${fieldName}`}
|
||
>×</span>
|
||
</div>
|
||
);
|
||
})}
|
||
</div>
|
||
<div className="form-tip mt-1 text-xs">请为每个字段选择适当的抽取类型,有助于提高识别准确率</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div className="grid grid-cols-1 gap-3 mt-3">
|
||
<div className="col-span-1">
|
||
<label className="form-label mb-1" htmlFor="multimodal-prompt-settings">提示词设置</label>
|
||
<div className="flex items-center mb-2" id="multimodal-prompt-settings">
|
||
<label className="inline-flex items-center mr-6">
|
||
<input
|
||
type="radio"
|
||
name="multimodal-prompt-type"
|
||
value="system"
|
||
checked={promptType.llm === 'system'}
|
||
onChange={(e) => handlePromptTypeChange(e, 'llm')}
|
||
className="form-radio"
|
||
/>
|
||
<span className="ml-2">使用系统默认提示词</span>
|
||
</label>
|
||
<label className="inline-flex items-center">
|
||
<input
|
||
type="radio"
|
||
name="multimodal-prompt-type"
|
||
value="custom"
|
||
checked={promptType.llm === 'custom'}
|
||
onChange={(e) => handlePromptTypeChange(e, 'llm')}
|
||
className="form-radio"
|
||
/>
|
||
<span className="ml-2">使用自定义提示词</span>
|
||
</label>
|
||
</div>
|
||
<div
|
||
className="bg-gray-50 p-2 rounded text-xs text-gray-600 mb-2"
|
||
id="multimodal-system-prompt-info"
|
||
style={{ display: promptType.llm === 'system' ? 'block' : 'none' }}
|
||
>
|
||
系统将根据评查点类型和抽取目标自动生成适合的提示词,支持图表、印章等图像内容抽取。
|
||
</div>
|
||
|
||
<div
|
||
id="multimodal-custom-prompt-container"
|
||
style={{ display: promptType.llm === 'custom' ? 'block' : 'none' }}
|
||
className="border border-dashed border-gray-300 p-3 rounded-md"
|
||
>
|
||
<div className="mb-2">
|
||
<label className="form-label mb-1 text-sm" htmlFor="multimodal-prompt-template">选择提示词模板</label>
|
||
<select
|
||
className="form-select"
|
||
id="multimodal-prompt-template"
|
||
value={selectedTemplate.llm}
|
||
onChange={(e) => handleTemplateChange(e, 'llm')}
|
||
>
|
||
<option value="">请选择模板</option>
|
||
<option value="7">多模态-印章识别模板</option>
|
||
<option value="8">多模态-表格抽取模板</option>
|
||
<option value="9">多模态-手写内容识别模板</option>
|
||
</select>
|
||
</div>
|
||
<div className="mb-2">
|
||
<label className="form-label mb-1 text-sm" htmlFor="multimodal-prompt-content">提示词内容</label>
|
||
<textarea
|
||
className="form-textarea"
|
||
id="multimodal-prompt-content"
|
||
rows={4}
|
||
placeholder="选择模板后自动填充,您也可以进行修改..."
|
||
value={promptContent.llm}
|
||
onChange={(e) => handlePromptContentChange(e, 'llm')}
|
||
readOnly={!selectedTemplate.llm}
|
||
></textarea>
|
||
<div className="form-tip mt-1 bg-gray-50 p-2 rounded text-xs">
|
||
<p className="mb-1"><strong>支持的变量</strong>(点击变量将其添加到提示词中):</p>
|
||
<div className="flex flex-wrap gap-1">
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('docType', 'llm')}
|
||
>docType</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('fieldsList', 'llm')}
|
||
>fieldsList</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('companyName', 'llm')}
|
||
>companyName</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('documentId', 'llm')}
|
||
>documentId</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('date', 'llm')}
|
||
>date</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('industry', 'llm')}
|
||
>industry</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('contentType', 'llm')}
|
||
>contentType</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('pageRange', 'llm')}
|
||
>pageRange</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('colorMode', 'llm')}
|
||
>colorMode</button>
|
||
<button
|
||
type="button"
|
||
className="var-tag"
|
||
onClick={() => applyVariableToPrompt('ocrText', 'llm')}
|
||
>ocrText</button>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
{/* 正则抽取配置 */}
|
||
<div className={`extraction-config ${currentTab !== 'ocr_regex' ? 'hidden' : ''}`} id="ocr-regex-config">
|
||
<div className="grid grid-cols-1 gap-3">
|
||
<div className="col-span-1">
|
||
<div className="mb-2">
|
||
<div className="flex justify-between items-center mb-1">
|
||
<label className="form-label m-0" htmlFor="regex-fields-container">字段正则表达式配置</label>
|
||
<button
|
||
className="ant-btn ant-btn-default"
|
||
id="add-regex-field-row"
|
||
type="button"
|
||
onClick={addRegexFieldRow}
|
||
>
|
||
<i className="ri-add-line"></i> 添加字段
|
||
</button>
|
||
</div>
|
||
|
||
<div className="mt-2" id="regex-fields-container">
|
||
{/* 字段-正则表达式配置行 */}
|
||
{regexFields.map((field) => (
|
||
<div className="regex-field-row flex items-start mb-2 border border-gray-200 rounded-md p-2 bg-gray-50" key={field.id}>
|
||
<div className="w-3/10 mr-2">
|
||
<label className="text-xs text-gray-600 mb-0 block" htmlFor={`regex-field-name-${field.id}`}>字段名称</label>
|
||
<input
|
||
type="text"
|
||
className="form-input regex-field-name"
|
||
id={`regex-field-name-${field.id}`}
|
||
placeholder="如:合同编号"
|
||
value={field.fieldName}
|
||
onChange={(e) => updateRegexField(field.id, 'fieldName', e.target.value)}
|
||
onBlur={() => handleRegexFieldBlur(field.id, 'fieldName')}
|
||
/>
|
||
</div>
|
||
<div className="w-7/10 mr-2">
|
||
<label className="text-xs text-gray-600 mb-0 block" htmlFor={`regex-expression-${field.id}`}>正则表达式</label>
|
||
<input
|
||
type="text"
|
||
className="form-input regex-expression"
|
||
id={`regex-expression-${field.id}`}
|
||
placeholder="如:\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?"
|
||
value={field.regex}
|
||
onChange={(e) => updateRegexField(field.id, 'regex', e.target.value)}
|
||
onBlur={() => handleRegexFieldBlur(field.id, 'regex')}
|
||
/>
|
||
</div>
|
||
<div className="flex flex-col justify-end pt-3">
|
||
<button
|
||
className="text-red-500 hover:text-red-700 remove-regex-field-row"
|
||
type="button"
|
||
aria-label="删除"
|
||
onClick={() => removeRegexFieldRow(field.id)}
|
||
>
|
||
<i className="ri-delete-bin-line"></i>
|
||
</button>
|
||
</div>
|
||
</div>
|
||
))}
|
||
</div>
|
||
</div>
|
||
<div className="mt-2">
|
||
<label className="form-label mb-1" htmlFor="regex-template-container">常用正则模板</label>
|
||
<div className="flex flex-wrap gap-1 mt-1" id="regex-template-container">
|
||
<div
|
||
className="chip cursor-pointer regex-template"
|
||
data-regex="\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?"
|
||
onClick={() => applyRegexTemplate("\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?")}
|
||
role="button"
|
||
tabIndex={0}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
applyRegexTemplate("\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?");
|
||
}
|
||
}}
|
||
>日期格式:yyyy-mm-dd</div>
|
||
<div
|
||
className="chip cursor-pointer regex-template"
|
||
data-regex="[A-Z]{2,5}-\\d{4,10}"
|
||
onClick={() => applyRegexTemplate("[A-Z]{2,5}-\\d{4,10}")}
|
||
role="button"
|
||
tabIndex={0}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
applyRegexTemplate("[A-Z]{2,5}-\\d{4,10}");
|
||
}
|
||
}}
|
||
>合同编号格式</div>
|
||
<div
|
||
className="chip cursor-pointer regex-template"
|
||
data-regex="(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?"
|
||
onClick={() => applyRegexTemplate("(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?")}
|
||
role="button"
|
||
tabIndex={0}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
applyRegexTemplate("(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?");
|
||
}
|
||
}}
|
||
>金额格式</div>
|
||
<div
|
||
className="chip cursor-pointer regex-template"
|
||
data-regex="\\d{3}-\\d{8}|\\d{4}-\\d{7,8}"
|
||
onClick={() => applyRegexTemplate("\\d{3}-\\d{8}|\\d{4}-\\d{7,8}")}
|
||
role="button"
|
||
tabIndex={0}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
applyRegexTemplate("\\d{3}-\\d{8}|\\d{4}-\\d{7,8}");
|
||
}
|
||
}}
|
||
>座机号码格式</div>
|
||
<div
|
||
className="chip cursor-pointer regex-template"
|
||
data-regex="1[3-9]\\d{9}"
|
||
onClick={() => applyRegexTemplate("1[3-9]\\d{9}")}
|
||
role="button"
|
||
tabIndex={0}
|
||
onKeyDown={(e) => {
|
||
if (e.key === 'Enter' || e.key === ' ') {
|
||
applyRegexTemplate("1[3-9]\\d{9}");
|
||
}
|
||
}}
|
||
>手机号码格式</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
);
|
||
}
|