Files
leaudit-platform-frontend/app/components/rules/new/ExtractionSettings.tsx
T

1740 lines
61 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import React, {
useState,
KeyboardEvent,
FormEvent,
useContext,
useEffect,
useCallback,
useRef,
} from "react";
import { RuleContext } from "~/contexts/RuleContext";
import type { VLMFieldType } from "~/models/evaluation_points";
// 定义通知函数的类型
type NotifyFn = (data: Record<string, unknown>) => void;
// 添加防抖工具函数,使用简单的函数类型
const debounce = (fn: NotifyFn, ms = 300): NotifyFn => {
let timeoutId: ReturnType<typeof setTimeout>;
return function (data: Record<string, unknown>): void {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => fn(data), ms);
};
};
// 添加类型守卫函数
const isVlmField = (field: string | VlmField): field is VlmField => {
return typeof field !== "string" && "name" in field && "type" in field;
};
// 安全处理字段名提取 - 使用不同的函数名避免冲突
const safeProcessFieldName = (field: string | VlmField): string => {
if (isVlmField(field)) {
return field.name; // 如果是VlmField类型,直接返回name属性
} else {
// 如果是字符串,假设格式为 name_type,尝试分割
const parts = field.split("_");
return parts[0]; // 返回字段名部分
}
};
/**
* ExtractionSettings 组件
*
* 功能:
* - 提供三种抽取设置方式:大模型抽取、多模态抽取和正则抽取
* - 允许在三个标签页中添加不同类型的字段
* - 统一的更新机制,确保点击"更新全部字段"按钮时,所有三种类型的字段都会被收集和更新
*
* 优化后的交互逻辑:
* 1. 用户可以在三个标签页之间切换,在每个标签页中添加对应类型的字段
* 2. 添加字段后,会自动标记为"有未保存更改"状态
* 3. 无论当前在哪个标签页,点击底部的"更新全部字段"按钮都会收集所有三种类型的字段
* 4. 更新成功后会显示详细的字段数量统计信息,包括每种类型的字段数
* 5. 系统会自动检查字段名重复,确保所有字段名唯一
*
* 注意:
* - 仅当点击"更新全部字段"按钮后,字段才会真正提交给父组件和规则上下文
* - 用户必须手动点击更新按钮,才能在评查设置中使用这些字段
*
* 类型定义:
* - LogicType: 'and' | 'or' | 'custom' - 用于评查配置中多个规则的组合逻辑
* - 'and': 所有规则都必须满足
* - 'or': 任一规则满足即可
* - 'custom': 自定义逻辑表达式,如 "(规则1 AND 规则2) OR 规则3"
*
* - LogicOperator: 'and' | 'or' - 用于单个规则内的条件组合
* - 'and': 规则内所有条件都必须满足
* - 'or': 规则内任一条件满足即可
*/
interface RegexField {
field: string;
pattern: string;
}
interface VlmField {
name: string;
type: VLMFieldType | string;
}
interface PromptTemplate {
id: number;
template_name: string;
template_type: string;
template_content: string;
}
interface ExtractionSettingsProps {
onChange?: (data: Record<string, unknown>) => void;
initialData?: {
llm?: {
fields?: string[];
prompt_setting?: {
type?: string;
template?: string;
};
};
vlm?: {
fields?: VlmField[] | string[];
prompt_setting?: {
type?: string;
template?: string;
};
};
regex?: {
fields?: RegexField[];
};
};
promptTypeOptions?: Array<{ value: string; label: string }>;
vlmFieldTypeOptions?: Array<{ value: string; label: string }>;
}
// 更新类型定义
type FormDataType = {
fields: {
llm: string[];
vlm: (string | VlmField)[];
};
regexFields: RegexField[];
promptType: {
llm: string;
vlm: string;
};
promptContent: {
llm: string;
vlm: string;
};
selectedTemplate: {
llm: string;
vlm: string;
};
};
export function ExtractionSettings({
onChange,
initialData,
promptTypeOptions = [],
vlmFieldTypeOptions = [],
}: ExtractionSettingsProps) {
const ruleContext = useContext(RuleContext);
// 核心数据状态
const [formData, setFormData] = useState<FormDataType>({
// 字段配置
fields: {
llm: initialData?.llm?.fields ?? [],
vlm: initialData?.vlm?.fields
? // 处理两种可能的vlm字段格式
Array.isArray(initialData.vlm.fields) &&
initialData.vlm.fields.length > 0
? typeof initialData.vlm.fields[0] === "string"
? initialData.vlm.fields as string[]
: (initialData.vlm.fields as VlmField[]).map((field) =>
field.type && field.type !== "default"
? `${field.name}_${field.type}`
: field.name
)
: []
: [],
},
// 正则字段配置
regexFields: (initialData?.regex?.fields && initialData.regex.fields.length > 0)
? [...initialData.regex.fields]
: [{ field: "", pattern: "" }],
// 提示词配置
promptType: {
llm: initialData?.llm?.prompt_setting?.type ?? "system",
vlm: initialData?.vlm?.prompt_setting?.type ?? "system",
},
promptContent: {
llm: initialData?.llm?.prompt_setting?.template ?? "",
vlm: initialData?.vlm?.prompt_setting?.template ?? "",
},
selectedTemplate: {
llm: "",
vlm: "",
},
});
// 为了简化访问,解构出常用字段
const { fields, regexFields, promptType, promptContent, selectedTemplate } =
formData;
// UI状态和临时状态
const [currentTab, setCurrentTab] = useState("llm");
const [inputValue, setInputValue] = useState({ llm: "", vlm: "" });
const [selectedFieldType, setSelectedFieldType] = useState("default");
const [fieldEditStatus, setFieldEditStatus] = useState<{
[id: string]: boolean;
}>({});
const [statusMessage, setStatusMessage] = useState<{
id: string;
message: string;
} | null>(null);
const [updateStatus, setUpdateStatus] = useState<{
success: boolean;
message: string;
} | null>(null);
const [hasPendingChanges, setHasPendingChanges] = useState<boolean>(false);
// 引用值
const lastUpdateTimeRef = useRef(0);
const lastEventFieldsRef = useRef<string[]>([]);
const ignoreEmptyFieldsRef = useRef(false);
const debouncedNotifyParentRef = useRef<NotifyFn | null>(null);
const activeFieldRef = useRef<string | null>(null);
const fieldEditResetTimeoutRef = useRef<NodeJS.Timeout | null>(null);
// 加载初始数据
useEffect(() => {
if (initialData) {
const newFields: { llm: string[], vlm: (string | VlmField)[] } = {
llm: initialData.llm?.fields ?? [],
vlm: [],
};
// 处理vlm字段
if (initialData.vlm?.fields) {
// 处理两种可能的格式:字符串数组或对象数组
if (Array.isArray(initialData.vlm.fields)) {
if (initialData.vlm.fields.length > 0) {
if (typeof initialData.vlm.fields[0] === "string") {
// 如果是字符串数组,直接使用
newFields.vlm = initialData.vlm.fields as string[];
} else {
// 如果是对象数组,转换为字符串数组 (name_type 格式)
newFields.vlm = (initialData.vlm.fields as VlmField[]).map(
(field) =>
field.type && field.type !== "default"
? `${field.name}_${field.type}`
: field.name
);
}
}
}
}
// 安全地设置表单数据
setFormData((prev) => ({
...prev,
fields: {
...prev.fields,
llm: newFields.llm,
vlm: newFields.vlm,
},
promptType: {
llm: initialData.llm?.prompt_setting?.type ?? prev.promptType.llm,
vlm: initialData.vlm?.prompt_setting?.type ?? prev.promptType.vlm,
},
promptContent: {
llm: initialData.llm?.prompt_setting?.template ?? prev.promptContent.llm,
vlm: initialData.vlm?.prompt_setting?.template ?? prev.promptContent.vlm,
},
regexFields: initialData.regex?.fields && initialData.regex.fields.length > 0
? [...initialData.regex.fields]
: prev.regexFields
}));
}
}, [initialData]); // 只依赖 initialData,避免 ruleContext 导致频繁触发
// 自动保存字段变更状态
// 这个效果确保添加字段后自动保存到组件状态,但不自动提交更新
useEffect(() => {
// 仅标记有未保存的更改,不立即触发onChange
setHasPendingChanges(true);
// 这些字段变化不会立即反映到父组件,只在点击更新按钮时才会提交
}, [formData.fields, formData.regexFields]);
// 独立处理父组件传过来的初始数据
useEffect(() => {
if (!initialData && ruleContext?.extractionFields?.length > 0) {
setFormData((prev) => ({
...prev,
fields: {
...prev.fields,
[currentTab]: [...ruleContext.extractionFields],
},
}));
}
}, [ruleContext?.extractionFields, currentTab, initialData]); // 依赖具体属性而非整个 ruleContext
// 获取所有字段(使用 useCallback 稳定函数引用)
const getAllFields = useCallback(() => {
// 1. 收集大模型抽取字段
const llm_fields = fields.llm || [];
// 2. 收集多模态抽取字段(去掉类型后缀)
const vlm_fields = (fields.vlm || []).map(safeProcessFieldName);
// 3. 收集正则抽取字段(仅保留有效字段)
const regex_fields = regexFields
.filter((field) => field.field && field.field.trim() !== "")
.map((field) => field.field.trim());
// 4. 合并所有字段并确保唯一性(使用Set去重)
// 这样即使用户在不同标签页添加了同名字段,最终也只会保留一个
return [...new Set([...llm_fields, ...vlm_fields, ...regex_fields])];
}, [fields, regexFields]);
// 检查字段名是否存在
const isFieldNameExists = useCallback(
(fieldName: string, excludeId?: string): boolean => {
if (!fieldName || !fieldName.trim()) return false;
const fieldNameTrimmed = fieldName.trim();
const fieldNameLower = fieldNameTrimmed.toLowerCase();
// 获取所有字段(不包括regexFields,这部分单独处理)
const llm_fields = fields.llm || [];
const vlm_fields = (fields.vlm || []).map(safeProcessFieldName);
// 检查是否在其他类型字段中存在
if (
llm_fields.some((f) => f.toLowerCase() === fieldNameLower) ||
vlm_fields.some((f) => f.toLowerCase() === fieldNameLower)
) {
return true;
}
// 检查是否在其他正则字段中存在(排除当前正在编辑的字段)
const otherRegexFields = regexFields
.filter((f) => !excludeId || f.field !== excludeId)
.map((f) => (f.field ? f.field.trim() : ""));
return otherRegexFields.some((f) => f.toLowerCase() === fieldNameLower);
},
[fields, regexFields]
);
// 验证并更新字段的函数
const validateAndUpdateFields = useCallback(() => {
try {
// 收集所有三种类型的字段,无论当前在哪个标签页
// 验证正则字段,只需要字段名有值即可,不要求正则表达式必须有值
const validRegexFields = regexFields.filter(
(field) => field.field && field.field.trim() !== ""
);
// 检查字段名称是否重复
const fieldNames = new Map<string, number>();
let hasDuplicates = false;
const duplicateFields: string[] = [];
// 收集所有字段名 - 不受当前标签页影响,始终收集所有类型的字段
const allFieldNamesList = [
...fields.llm,
...fields.vlm.map((f) => safeProcessFieldName(f)),
...validRegexFields.map((f) => f.field.trim()),
].filter((name) => name); // 过滤空值
allFieldNamesList.forEach((name) => {
const lowercaseName = name.toLowerCase();
fieldNames.set(lowercaseName, (fieldNames.get(lowercaseName) || 0) + 1);
if (fieldNames.get(lowercaseName)! > 1) {
hasDuplicates = true;
duplicateFields.push(name);
}
});
if (hasDuplicates) {
setUpdateStatus({
success: false,
message: `发现重复字段: ${[...new Set(duplicateFields)].join(
", "
)},请修正后再更新`,
});
return false;
}
// 更新有效的字段列表 - 确保获取所有三种类型的字段
const allFields = getAllFields();
// 创建摘要信息,显示所有类型的字段数量
const llmCount = fields.llm.length;
const vlmCount = fields.vlm.length;
const regexCount = validRegexFields.length;
const totalCount = allFields.length;
// 更新ruleContext
if (ruleContext?.updateFields) {
ruleContext.updateFields(allFields);
}
// 触发父组件的onChange回调 - 始终传递所有三种类型的字段数据
if (onChange) {
onChange({
fields: {
llm: fields.llm,
vlm: fields.vlm,
},
regexFields: validRegexFields,
allFields,
pendingUpdate: false, // 标记已完成更新
});
}
// 不再使用自定义事件,统一通过Context共享数据
// 更新上次发送的字段列表和时间
lastEventFieldsRef.current = [...allFields];
lastUpdateTimeRef.current = Date.now();
// 清除待更新状态
setHasPendingChanges(false);
// 生成更详细的成功消息,列出每种类型的字段数量
setUpdateStatus({
success: true,
message: `已成功更新${totalCount}个字段(大模型字段: ${llmCount},多模态字段: ${vlmCount},正则字段: ${regexCount}`,
});
// 3秒后清除更新状态
setTimeout(() => {
setUpdateStatus(null);
}, 3000);
return true;
} catch (error) {
console.error("更新字段时出错:", error);
setUpdateStatus({
success: false,
message: `更新失败: ${
error instanceof Error ? error.message : "未知错误"
}`,
});
return false;
}
}, [fields, regexFields, getAllFields, ruleContext, onChange]);
// 初始化防抖函数
useEffect(() => {
if (onChange) {
debouncedNotifyParentRef.current = debounce(
(data: Record<string, unknown>) => {
onChange(data);
},
500
); // 500ms的防抖延迟
}
return () => {
// 组件卸载时清理
debouncedNotifyParentRef.current = null;
};
}, [onChange]);
// 通知父组件的包装函数,使用防抖
const notifyParent = useCallback(
(data: Record<string, unknown>, immediate = false) => {
if (!onChange) return;
if (immediate) {
// 对于需要立即响应的操作,直接调用onChange
onChange(data);
} else if (debouncedNotifyParentRef.current) {
// 对于可以延迟处理的操作,使用防抖函数
debouncedNotifyParentRef.current(data);
}
},
[onChange]
);
// 修改addField函数,使用防抖通知
const addField = (type: "llm" | "vlm") => {
const value = inputValue[type].trim();
if (!value) return;
const newFields = { ...fields };
if (type === "llm") {
// 大模型抽取支持一次性添加多个字段
const fieldsToAdd = value
.split(/[\s、,]+/)
.map((f) => f.trim())
.filter((f) => f && !isFieldNameExists(f));
if (fieldsToAdd.length === 0) {
alert("所有字段名已存在,请确保字段名称唯一");
return;
}
newFields[type] = [...fields[type], ...fieldsToAdd];
} else {
// 多模态抽取需要添加字段类型后缀
if (isFieldNameExists(value)) {
alert(`字段名 "${value}" 已存在,请确保字段名称唯一`);
return;
}
newFields[type] = [...fields[type], `${value}_${selectedFieldType}`];
}
setFormData((prev) => ({
...prev,
fields: newFields,
}));
setInputValue((prev) => ({ ...prev, [type]: "" }));
// 标记有未保存的更改
setHasPendingChanges(true);
// 添加字段后通知父组件,使用防抖
notifyParent({
fields: newFields,
pendingUpdate: true,
allFields: getFieldsWithNewAddition(
newFields,
type === "llm" ? value : `${value}_${selectedFieldType}`
),
});
};
// 新增辅助函数,计算包含新添加字段的完整字段列表
const getFieldsWithNewAddition = (
fieldsObj: { llm: string[]; vlm: (string | VlmField)[] },
newField: string
) => {
// 收集大模型抽取字段
const llm_fields = fieldsObj.llm || [];
// 收集多模态抽取字段(去掉类型后缀)
const vlm_fields = (fieldsObj.vlm || []).map(safeProcessFieldName);
// 收集正则抽取字段(仅保留有效字段)
const regex_fields = regexFields
.filter((field) => field.field && field.field.trim() !== "")
.map((field) => field.field.trim());
// 添加新字段(处理新字段格式)
const newFieldName = newField.split("_")[0];
// 合并所有字段并确保唯一性(使用Set去重)
return [
...new Set([...llm_fields, ...vlm_fields, ...regex_fields, newFieldName]),
];
};
// 修改removeField函数,使用防抖通知
const removeField = (type: "llm" | "vlm", index: number) => {
const newFields = { ...fields };
if (type === "llm") {
const tempFields = [...fields.llm];
const removedField = tempFields[index];
tempFields.splice(index, 1);
newFields.llm = tempFields;
setFormData((prev) => ({
...prev,
fields: {
...prev.fields,
llm: tempFields,
},
}));
// 删除字段后通知父组件,使用防抖
notifyParent({
fields: newFields,
pendingUpdate: true,
allFields: getFieldsWithRemoval(newFields, removedField),
});
} else {
// vlm类型
const tempFields = [...fields.vlm];
const removedField = tempFields[index];
tempFields.splice(index, 1);
newFields.vlm = tempFields;
setFormData((prev) => ({
...prev,
fields: {
...prev.fields,
vlm: tempFields,
},
}));
// 删除字段后通知父组件,使用防抖
notifyParent({
fields: newFields,
pendingUpdate: true,
allFields: getFieldsWithRemoval(newFields, removedField),
});
}
// 标记有未保存的更改
setHasPendingChanges(true);
};
// 新增辅助函数,计算移除字段后的完整字段列表
const getFieldsWithRemoval = (
fieldsObj: { llm: string[]; vlm: (string | VlmField)[] },
removedField: string | VlmField
) => {
// 收集大模型抽取字段
const llm_fields = fieldsObj.llm || [];
// 收集多模态抽取字段(去掉类型后缀)
const vlm_fields = (fieldsObj.vlm || []).map(safeProcessFieldName);
// 收集正则抽取字段(仅保留有效字段)
const regex_fields = regexFields
.filter((field) => field.field && field.field.trim() !== "")
.map((field) => field.field.trim());
// 移除字段处理
const fieldToRemove = safeProcessFieldName(removedField);
// 合并所有字段并确保唯一性(使用Set去重)
const allFields = [
...new Set([...llm_fields, ...vlm_fields, ...regex_fields]),
];
return allFields.filter((field) => field !== fieldToRemove);
};
// 修改addRegexFieldRow函数,使用防抖通知
const addRegexFieldRow = () => {
// 使用时间戳和随机数生成唯一ID
const newId = `regex_${Date.now()}_${Math.floor(Math.random() * 100000)}`;
// 设置标记表示正在添加新字段,临时忽略空字段检查
ignoreEmptyFieldsRef.current = true;
// 记录正在添加的字段ID
activeFieldRef.current = newId;
// 标记新字段为编辑状态
setFieldEditStatus((prev) => ({ ...prev, [newId]: true }));
// 添加空字段但不会立即触发验证和更新
setFormData((prev) => ({
...prev,
regexFields: [...prev.regexFields, { field: "", pattern: "" }],
}));
// 手动触发一次onChange,确保父组件知道我们添加了新字段
// 但不触发完整的字段验证和更新,此处立即通知,不使用防抖
notifyParent(
{
regexFields: [...regexFields, { field: "", pattern: "" }],
pendingUpdate: true, // 标记有待更新的内容
},
true
);
};
const removeRegexFieldRow = (id: string) => {
if (regexFields.length <= 1) return;
// 先保存更新前的状态,以便通知父组件
const updatedRegexFields = regexFields.filter(
(field) => field.field !== id
);
setFormData((prev) => ({
...prev,
regexFields: updatedRegexFields,
}));
// 标记有未保存的更改
setHasPendingChanges(true);
// 删除字段后通知父组件,使用立即通知模式确保立即删除
notifyParent(
{
regexFields: updatedRegexFields,
pendingUpdate: true,
allFields: getAllFields().filter((field) => {
// 找到被删除的字段名
const deletedField = regexFields.find((f) => f.field === id);
return deletedField ? field !== deletedField.field.trim() : true;
}),
},
true
); // 使用立即通知,确保字段立即删除
};
// 修改updateRegexField函数,使用防抖通知
const updateRegexField = (
id: string,
key: "field" | "pattern",
value: string
) => {
// 标记此字段为正在编辑状态
setFieldEditStatus((prev) => ({ ...prev, [id]: true }));
// 记录当前活动字段ID
activeFieldRef.current = id;
setFormData((prev) => ({
...prev,
regexFields: regexFields.map((field) =>
field.field === id ? { ...field, [key]: value } : field
),
}));
// 标记有未保存的更改
setHasPendingChanges(true);
// 如果状态消息是关于这个字段的,且该字段有内容了,则清除状态消息
if (statusMessage?.id === id && value.trim() !== "") {
setTimeout(() => {
setStatusMessage(null);
}, 1500);
}
// 如果用户正在输入,重置编辑状态计时器
if (fieldEditResetTimeoutRef.current) {
clearTimeout(fieldEditResetTimeoutRef.current);
}
// 设置一个更长的超时时间,给用户充分的编辑时间
fieldEditResetTimeoutRef.current = setTimeout(() => {
// 检查字段是否已填写完成
const currentField = regexFields.find((f) => f.field === id);
if (currentField) {
// 只有当字段名有值时,才考虑将字段标记为完成状态
if (currentField.field && currentField.field.trim() !== "") {
// 即使正则为空,也不要自动删除字段,只是更新编辑状态
setFieldEditStatus((prev) => ({ ...prev, [id]: false }));
if (activeFieldRef.current === id) {
activeFieldRef.current = null;
}
// 如果正则为空,提示用户填写,但不删除字段
if (!currentField.pattern || currentField.pattern.trim() === "") {
setStatusMessage({
id,
message: "正则表达式为空,此字段会保留但不会执行抽取。",
});
// 5秒后自动隐藏提示
setTimeout(() => {
setStatusMessage((current) =>
current?.id === id ? null : current
);
}, 5000);
}
}
}
}, 3600000); // 设置为1小时,确保用户有足够时间完成编辑
// 每次字段更新都触发onChange,确保父组件知道字段状态变化,使用防抖
notifyParent({
regexFields: regexFields.map((field) =>
field.field === id ? { ...field, [key]: value } : field
),
pendingUpdate: true,
});
};
// 修改handleRegexFieldBlur函数,使用防抖通知
const handleRegexFieldBlur = (id: string, key: "field" | "pattern") => {
// 如果用户从正则表达式字段离开并且字段名和正则都已填写,则标记字段编辑完成
const field = regexFields.find((f) => f.field === id);
if (!field) return;
if (key === "field") {
// 如果字段名为空,不进行任何操作,保留字段
if (!field.field || field.field.trim() === "") {
return;
}
// 检查重复字段
if (isFieldNameExists(field.field, id)) {
alert(`字段名 "${field.field.trim()}" 已存在,请确保字段名称唯一`);
setFormData((prev) => ({
...prev,
regexFields: prev.regexFields.map((f) =>
f.field === id ? { ...f, field: "" } : f
),
}));
// 通知父组件字段已更新,此处立即通知,不使用防抖
notifyParent(
{
regexFields: regexFields.map((f) =>
f.field === id ? { ...f, field: "" } : f
),
pendingUpdate: true,
},
true
);
} else if (field.field.trim() !== "") {
// 如果字段名不为空且不重复,通知父组件字段已更新,使用防抖
notifyParent({
regexFields: regexFields,
pendingUpdate: true,
});
}
} else if (key === "pattern") {
// 如果字段名和正则都已填写,标记为完成状态
if (field.field && field.field.trim() !== "") {
// 即使正则为空,也不要自动删除字段
setTimeout(() => {
// 只有当正则不为空时,才显示完成提示
if (field.pattern && field.pattern.trim() !== "") {
setFieldEditStatus((prev) => ({ ...prev, [id]: false }));
if (activeFieldRef.current === id) {
activeFieldRef.current = null;
}
// 显示完成提示
setStatusMessage({
id,
message: "字段配置完成",
});
// 2秒后自动隐藏提示
setTimeout(() => {
setStatusMessage((current) =>
current?.id === id ? null : current
);
}, 2000);
} else {
// 正则为空时,显示提示但不删除字段
setStatusMessage({
id,
message: "未设置正则表达式,此字段会保留但不会执行抽取。",
});
// 5秒后自动隐藏提示
setTimeout(() => {
setStatusMessage((current) =>
current?.id === id ? null : current
);
}, 5000);
}
// 不管正则是否为空,都通知父组件字段已更新,使用防抖
notifyParent({
regexFields: regexFields,
pendingUpdate: true,
});
}, 200);
}
}
};
const applyRegexTemplate = (regex: string) => {
const lastField = regexFields[regexFields.length - 1];
if (lastField) {
updateRegexField(lastField.field, "pattern", regex);
}
};
const getFieldInfo = (field: string) => {
const [fieldName, fieldType = "default"] = field.split("_");
const typeName =
{
default: "默认",
seal: "印章",
"cross-seal": "骑缝章",
handwriting: "手写体",
print: "印刷体",
english: "英文",
number: "数字",
currency: "货币",
}[fieldType] || "默认";
const badgeClass =
{
default: "bg-blue-100 text-blue-800",
seal: "bg-red-100 text-red-800",
"cross-seal": "bg-red-100 text-red-800",
handwriting: "bg-yellow-100 text-yellow-800",
print: "bg-purple-100 text-purple-800",
english: "bg-indigo-100 text-indigo-800",
number: "bg-gray-100 text-gray-800",
currency: "bg-green-100 text-green-800",
}[fieldType] || "bg-blue-100 text-blue-800";
return { fieldName, fieldType, typeName, badgeClass };
};
const handleTemplateChange = (
e: FormEvent<HTMLSelectElement>,
type: "llm" | "vlm"
) => {
const value = e.currentTarget.value;
setFormData((prev) => ({
...prev,
selectedTemplate: { ...prev.selectedTemplate, [type]: value },
}));
if (value) {
const templateData = getPromptTemplateById(Number(value));
if (templateData) {
let content = templateData.template_content;
if (content.includes("{fieldsList}") && fields[type].length > 0) {
const fieldListStr =
type === "llm"
? fields[type]
.map((field, idx) => `${idx + 1}. ${field}`)
.join("\n")
: fields[type]
.map((field, idx) => {
const { fieldName, typeName } = getFieldInfo(
typeof field === "string"
? field
: `${field.name}_${field.type}`
);
return `${idx + 1}. ${fieldName} (${typeName})`;
})
.join("\n");
content = content.replace("{fieldsList}", fieldListStr);
}
setFormData((prev) => ({
...prev,
promptContent: { ...prev.promptContent, [type]: content },
}));
// 标记有未保存的更改,但不触发onChange
setHasPendingChanges(true);
}
} else {
setFormData((prev) => ({
...prev,
promptContent: { ...prev.promptContent, [type]: "" },
}));
// 标记有未保存的更改,但不触发onChange
setHasPendingChanges(true);
}
};
const handlePromptContentChange = (
e: FormEvent<HTMLTextAreaElement>,
type: "llm" | "vlm"
) => {
const value = e.currentTarget.value;
setFormData((prev) => ({
...prev,
promptContent: { ...prev.promptContent, [type]: value },
}));
// 标记有未保存的更改,但不触发onChange
setHasPendingChanges(true);
};
const applyVariableToPrompt = (variable: string, type: "llm" | "vlm") => {
const textarea = document.getElementById(
type === "llm" ? "llm-prompt-content" : "multimodal-prompt-content"
) as HTMLTextAreaElement;
if (textarea) {
const start = textarea.selectionStart;
const end = textarea.selectionEnd;
const text = textarea.value;
const newText =
text.substring(0, start) + `{${variable}}` + text.substring(end);
setFormData((prev) => ({
...prev,
promptContent: { ...prev.promptContent, [type]: newText },
}));
setTimeout(() => {
textarea.focus();
textarea.setSelectionRange(
start + variable.length + 2,
start + variable.length + 2
);
}, 0);
// 标记有未保存的更改,但不触发onChange
setHasPendingChanges(true);
}
};
const getPromptTemplateById = (id: number): PromptTemplate | null => {
const templates: Record<number, PromptTemplate> = {
1: {
id: 1,
template_name: "行政处罚-抽取通用模板",
template_type: "Extraction",
template_content: `你是一个专业的文档信息抽取助手。请从以下{docType}文档中抽取关键信息:\n{fieldsList}\n请将结果以JSON格式输出,包含以上字段。如果某个字段在文档中未找到,则该字段的值设为null。`,
},
4: {
id: 4,
template_name: "采购合同-乙方资质抽取",
template_type: "Extraction",
template_content: `你是一个专业的合同信息抽取助手。请从以下{docType}中抽取乙方的资质信息:\n需要抽取的信息包括:\n{fieldsList}\n{companyName}要求所有供应商必须提供完整的资质信息。请将结果以JSON格式输出,包含以上字段。`,
},
5: {
id: 5,
template_name: "合同-关键条款抽取",
template_type: "Extraction",
template_content: `请作为{industry}行业的专业合同审核员,从提供的{docType}中提取以下关键条款信息:\n{fieldsList}\n文档ID: {documentId}\n审核日期: {date}\n请以JSON格式输出结果,对于未明确指定的条款需标记为"未明确约定"。`,
},
6: {
id: 6,
template_name: "烟草许可证-信息抽取",
template_type: "Extraction",
template_content: `请从下列烟草专卖许可证文件中抽取以下关键信息:\n{fieldsList}\n这些信息将用于{companyName}内部数据库更新。请确保许可证编号和有效期格式准确无误。`,
},
7: {
id: 7,
template_name: "多模态-印章识别模板",
template_type: "Multimodal",
template_content: `请识别并提取文档中的所有印章信息,包括:\n{fieldsList}\n文档类型: {docType}\n页面范围: {pageRange}\n请注意区分公章、法人章和合同专用章,并分析印章的清晰度和完整性。`,
},
8: {
id: 8,
template_name: "多模态-表格抽取模板",
template_type: "Multimodal",
template_content: `请从文档中的表格提取以下信息:\n{fieldsList}\n文档类型: {docType}\n表格可能跨页,请确保完整提取所有内容。表格中的数值需保留原始精度。`,
},
9: {
id: 9,
template_name: "多模态-手写内容识别模板",
template_type: "Multimodal",
template_content: `请识别文档中的手写内容,特别关注:\n{fieldsList}\n文档类型: {docType}\n内容类型: {contentType}\n对于难以辨认的手写内容,请标注为"[难以辨认]"并尽可能给出可能的解读。`,
},
};
return templates[id] || null;
};
// 修复缺失的handleKeyDown函数
const handleKeyDown = (
e: KeyboardEvent<HTMLInputElement>,
type: "llm" | "vlm"
) => {
if (e.key === "Enter") {
e.preventDefault();
addField(type);
}
};
// 修改handleUpdateFields函数,使用立即通知模式
const handleUpdateFields = () => {
// 只有在点击更新按钮时,才执行验证和向父组件提交数据
if (validateAndUpdateFields()) {
// 当更新成功时,才传递字段数据到父组件
// 保留所有有字段名的正则字段,包括那些正则表达式为空的字段
const validRegexFields = regexFields.filter(
(field) => field.field && field.field.trim() !== ""
);
if (onChange) {
// 更新按钮点击时使用立即通知,不使用防抖
notifyParent(
{
fields: {
llm: fields.llm,
vlm: fields.vlm,
},
regexFields: validRegexFields,
allFields: getAllFields(),
pendingUpdate: false, // 标记已完成更新
// 同时提交提示词设置
promptType,
promptContent,
promptSettings: {
llm: {
type: promptType.llm,
content: promptContent.llm,
template: selectedTemplate.llm,
},
vlm: {
type: promptType.vlm,
content: promptContent.vlm,
template: selectedTemplate.vlm,
},
},
},
true
);
// 更新完成后,取消所有编辑状态
setFieldEditStatus({});
// 清除活动字段引用
activeFieldRef.current = null;
// 重置待更新状态
setHasPendingChanges(false);
// 显示成功提示,包含字段数量统计
setUpdateStatus({
success: true,
message: `更新成功!共更新字段 ${getAllFields().length} 个 (大模型: ${
fields.llm.length
}, 多模态: ${fields.vlm.length}, 正则: ${validRegexFields.length})`,
});
// 5秒后自动隐藏成功提示
setTimeout(() => {
setUpdateStatus(null);
}, 5000);
}
}
};
const handleTabChange = (tab: string) => {
setCurrentTab(tab);
// 不触发父组件的onChange回调,只记录当前标签页,使界面切换
// onChange?.({ extractionMethod: tab });
};
const handleFieldInputChange = (
e: FormEvent<HTMLInputElement>,
type: "llm" | "vlm"
) => {
setInputValue({ ...inputValue, [type]: e.currentTarget.value });
};
const handleFieldTypeChange = (e: FormEvent<HTMLSelectElement>) => {
setSelectedFieldType(e.currentTarget.value);
};
// 在渲染选择模态字段类型的下拉列表时使用vlmFieldTypeOptions
const renderVlmFieldTypeSelect = () => {
return (
<select
className="form-select"
value={selectedFieldType}
onChange={handleFieldTypeChange}
>
{vlmFieldTypeOptions.length > 0 ? (
vlmFieldTypeOptions.map((option) => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))
) : (
// 默认选项,如果没有提供字段类型选项
<>
<option value="default"></option>
<option value="currency"></option>
<option value="print"></option>
<option value="seal"></option>
<option value="cross-seal"></option>
<option value="english"></option>
<option value="number"></option>
<option value="handwriting"></option>
</>
)}
</select>
);
};
// 在渲染提示词类型的选择器时使用promptTypeOptions
const renderPromptTypeSelect = (
type: string,
promptTypeKey: "llm" | "vlm"
) => {
return (
<select
className="form-select"
value={type}
onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
const value = e.target.value;
setFormData((prev) => ({
...prev,
promptType: { ...prev.promptType, [promptTypeKey]: value },
}));
// 标记有未保存的更改,但不触发onChange
setHasPendingChanges(true);
}}
>
{promptTypeOptions.length > 0 ? (
promptTypeOptions.map((option) => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))
) : (
// 默认选项,如果没有提供提示词类型选项
<>
<option value="system">使</option>
<option value="custom">使</option>
</>
)}
</select>
);
};
return (
<div className="ant-card">
<div className="ant-card-header">
<h3></h3>
</div>
<div className="ant-card-body">
<div className="mb-6">
<div className="tab-nav mb-4" id="extraction-method-tabs">
<button
className={`tab-nav-item ${currentTab === "llm" ? "active" : ""}`}
onClick={() => handleTabChange("llm")}
type="button"
>
<i className="ri-brain-line mr-1"></i>
</button>
<button
className={`tab-nav-item ${currentTab === "vlm" ? "active" : ""}`}
onClick={() => handleTabChange("vlm")}
type="button"
>
<i className="ri-scan-line mr-1"></i>
</button>
<button
className={`tab-nav-item ${
currentTab === "regex" ? "active" : ""
}`}
onClick={() => handleTabChange("regex")}
type="button"
>
<i className="ri-code-box-line mr-1"></i>
</button>
</div>
</div>
<div
className={`extraction-config ${
currentTab !== "llm" ? "hidden" : ""
}`}
id="llm-config"
>
<div className="grid grid-cols-1 gap-3">
<div className="col-span-1">
<label className="form-label mb-1" htmlFor="field-input">
</label>
<div className="flex mb-2">
<input
type="text"
className="form-input mr-2"
id="field-input"
placeholder="请输入字段名,多个字段可用、或,或空格分隔"
value={inputValue.llm}
onChange={(e) => handleFieldInputChange(e, "llm")}
onKeyDown={(e) => handleKeyDown(e, "llm")}
/>
<button
className="ant-btn ant-btn-default"
id="add-field-btn"
type="button"
onClick={() => addField("llm")}
>
</button>
</div>
<div className="chips-container" id="fields-container">
{fields.llm.map((field, index) => (
<div className="chip" key={`llm-field-${index}`}>
{field}
<span
className="close-btn"
onClick={() => removeField("llm", index)}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ")
removeField("llm", index);
}}
role="button"
tabIndex={0}
aria-label={`删除字段 ${field}`}
>
×
</span>
</div>
))}
</div>
<div className="form-tip mt-1 text-xs">
</div>
</div>
</div>
<div className="grid grid-cols-1 gap-3 mt-3">
<div className="col-span-1">
<label className="form-label mb-1" htmlFor="llm-prompt-settings">
</label>
<div className="flex items-center mb-2" id="llm-prompt-settings">
{renderPromptTypeSelect(promptType.llm, "llm")}
</div>
<div
className="bg-gray-50 p-2 rounded text-xs text-gray-600 mb-2"
id="llm-system-prompt-info"
style={{
display: promptType.llm === "system" ? "block" : "none",
}}
>
</div>
<div
id="llm-custom-prompt-container"
style={{
display: promptType.llm === "custom" ? "block" : "none",
}}
className="border border-dashed border-gray-300 p-3 rounded-md"
>
<div className="mb-2">
<label
className="form-label mb-1 text-sm"
htmlFor="llm-prompt-template"
>
</label>
<select
className="form-select"
id="llm-prompt-template"
value={selectedTemplate.llm}
onChange={(e) => handleTemplateChange(e, "llm")}
>
<option value=""></option>
<option value="1">-</option>
<option value="4">-</option>
<option value="5">-</option>
<option value="6">-</option>
</select>
</div>
<div className="mb-2">
<label
className="form-label mb-1 text-sm"
htmlFor="llm-prompt-content"
>
</label>
<textarea
className="form-textarea"
id="llm-prompt-content"
rows={4}
placeholder="选择模板后自动填充,您也可以进行修改..."
value={promptContent.llm}
onChange={(e) => handlePromptContentChange(e, "llm")}
readOnly={!selectedTemplate.llm}
></textarea>
<div className="form-tip mt-1 bg-gray-50 p-2 rounded text-xs">
<p className="mb-1">
<strong></strong>
</p>
<div className="flex flex-wrap gap-1">
{[
"docType",
"fieldsList",
"companyName",
"documentId",
"date",
"industry",
"ocrText",
].map((variable) => (
<button
key={variable}
type="button"
className="var-tag"
onClick={() => applyVariableToPrompt(variable, "llm")}
>
{variable}
</button>
))}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div
className={`extraction-config ${
currentTab !== "vlm" ? "hidden" : ""
}`}
id="vlm-config"
>
<div className="grid grid-cols-1 gap-3">
<div className="col-span-1">
<label className="form-label mb-1" htmlFor="field-input-vlm">
</label>
<div className="flex mb-2">
<input
type="text"
className="form-input mr-2"
id="field-input-vlm"
placeholder="请输入字段名"
value={inputValue.vlm}
onChange={(e) => handleFieldInputChange(e, "vlm")}
onKeyDown={(e) => handleKeyDown(e, "vlm")}
/>
{renderVlmFieldTypeSelect()}
<button
className="ant-btn ant-btn-default"
id="add-field-btn-vlm"
type="button"
onClick={() => addField("vlm")}
>
</button>
</div>
<div className="chips-container" id="fields-container-vlm">
{fields.vlm.map((field, index) => {
const { fieldName, fieldType, typeName, badgeClass } =
getFieldInfo(
typeof field === "string"
? field
: `${field.name}_${field.type}`
);
return (
<div className="chip" key={`vlm-field-${index}`}>
{fieldName}
<span
className={`badge ${badgeClass} text-xs ml-1`}
data-type={fieldType}
>
{typeName}
</span>
<span
className="close-btn"
onClick={() => removeField("vlm", index)}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ")
removeField("vlm", index);
}}
role="button"
tabIndex={0}
aria-label={`删除字段 ${fieldName}`}
>
×
</span>
</div>
);
})}
</div>
<div className="form-tip mt-1 text-xs">
</div>
</div>
</div>
<div className="grid grid-cols-1 gap-3 mt-3">
<div className="col-span-1">
<label
className="form-label mb-1"
htmlFor="multimodal-prompt-settings"
>
</label>
<div
className="flex items-center mb-2"
id="multimodal-prompt-settings"
>
{renderPromptTypeSelect(promptType.vlm, "vlm")}
</div>
<div
className="bg-gray-50 p-2 rounded text-xs text-gray-600 mb-2"
id="multimodal-system-prompt-info"
style={{
display: promptType.vlm === "system" ? "block" : "none",
}}
>
</div>
<div
id="multimodal-custom-prompt-container"
style={{
display: promptType.vlm === "custom" ? "block" : "none",
}}
className="border border-dashed border-gray-300 p-3 rounded-md"
>
<div className="mb-2">
<label
className="form-label mb-1 text-sm"
htmlFor="multimodal-prompt-template"
>
</label>
<select
className="form-select"
id="multimodal-prompt-template"
value={selectedTemplate.vlm}
onChange={(e) => handleTemplateChange(e, "vlm")}
>
<option value=""></option>
<option value="7">-</option>
<option value="8">-</option>
<option value="9">-</option>
</select>
</div>
<div className="mb-2">
<label
className="form-label mb-1 text-sm"
htmlFor="multimodal-prompt-content"
>
</label>
<textarea
className="form-textarea"
id="multimodal-prompt-content"
rows={4}
placeholder="选择模板后自动填充,您也可以进行修改..."
value={promptContent.vlm}
onChange={(e) => handlePromptContentChange(e, "vlm")}
readOnly={!selectedTemplate.vlm}
></textarea>
<div className="form-tip mt-1 bg-gray-50 p-2 rounded text-xs">
<p className="mb-1">
<strong></strong>
</p>
<div className="flex flex-wrap gap-1">
{[
"docType",
"fieldsList",
"companyName",
"documentId",
"date",
"industry",
"contentType",
"pageRange",
"colorMode",
"ocrText",
].map((variable) => (
<button
key={variable}
type="button"
className="var-tag"
onClick={() => applyVariableToPrompt(variable, "vlm")}
>
{variable}
</button>
))}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div
className={`extraction-config ${
currentTab !== "regex" ? "hidden" : ""
}`}
id="regex-config"
>
<div className="grid grid-cols-1 gap-3">
<div className="col-span-1">
<div className="mb-2">
<div className="flex justify-between items-center mb-1">
<label
className="form-label m-0"
htmlFor="regex-fields-container"
>
</label>
<button
className="ant-btn ant-btn-default"
id="add-regex-field-row"
type="button"
onClick={addRegexFieldRow}
>
<i className="ri-add-line"></i>
</button>
</div>
<div className="mt-2" id="regex-fields-container">
{regexFields.map((field) => (
<div
className={`regex-field-row flex items-start mb-2 border rounded-md p-2 ${
fieldEditStatus[field.field]
? "border-blue-300 bg-blue-50"
: "border-gray-200 bg-gray-50"
}`}
key={field.field}
>
<div className="w-3/10 mr-2">
<label
className="text-xs text-gray-600 mb-0 block"
htmlFor={`regex-field-name-${field.field}`}
>
</label>
<input
type="text"
className={`form-input regex-field-name ${
!field.field && !fieldEditStatus[field.field]
? "border-yellow-300"
: ""
}`}
id={`regex-field-name-${field.field}`}
placeholder="如:合同编号"
value={field.field || ""}
onChange={(e) =>
field.field &&
updateRegexField(
field.field,
"field",
e.target.value
)
}
onBlur={() =>
field.field &&
handleRegexFieldBlur(field.field, "field")
}
/>
</div>
<div className="w-7/10 mr-2">
<label
className="text-xs text-gray-600 mb-0 block"
htmlFor={`regex-expression-${field.field}`}
>
</label>
<input
type="text"
className={`form-input regex-expression ${
field.field &&
!field.pattern &&
!fieldEditStatus[field.field]
? "border-yellow-300"
: ""
}`}
id={`regex-expression-${field.field}`}
placeholder="如:\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?"
value={field.pattern || ""}
onChange={(e) =>
field.field &&
updateRegexField(
field.field,
"pattern",
e.target.value
)
}
onBlur={() =>
field.field &&
handleRegexFieldBlur(field.field, "pattern")
}
/>
{statusMessage && statusMessage.id === field.field && (
<div className="text-xs mt-1 text-blue-600 transition-opacity duration-300">
{statusMessage.message}
</div>
)}
{!statusMessage &&
field.field &&
!field.pattern &&
!fieldEditStatus[field.field] && (
<div className="text-xs mt-1 text-yellow-600">
</div>
)}
</div>
<div className="flex flex-col justify-end pt-3">
<button
className="text-red-500 hover:text-red-700 remove-regex-field-row"
type="button"
aria-label="删除"
onClick={() =>
field.field && removeRegexFieldRow(field.field)
}
>
<i className="ri-delete-bin-line"></i>
</button>
</div>
</div>
))}
</div>
</div>
<div className="mt-2">
<label
className="form-label mb-1"
htmlFor="regex-template-container"
>
</label>
<div
className="flex flex-wrap gap-1 mt-1"
id="regex-template-container"
>
{[
{
label: "日期格式:yyyy-mm-dd",
regex:
"\\d{4}[-/年](0?[1-9]|1[0-2])[-/月](0?[1-9]|[12][0-9]|3[01])[日]?",
},
{ label: "合同编号格式", regex: "[A-Z]{2,5}-\\d{4,10}" },
{
label: "金额格式",
regex:
"(人民币|RMB)?\\s?(\\d{1,3}(,\\d{3})*(\\.\\d{2})?)\\s?[万元]?",
},
{
label: "座机号码格式",
regex: "\\d{3}-\\d{8}|\\d{4}-\\d{7,8}",
},
{ label: "手机号码格式", regex: "1[3-9]\\d{9}" },
].map(({ label, regex }) => (
<div
key={label}
className="chip cursor-pointer regex-template"
onClick={() => applyRegexTemplate(regex)}
role="button"
tabIndex={0}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ")
applyRegexTemplate(regex);
}}
>
{label}
</div>
))}
</div>
</div>
</div>
</div>
</div>
{/* 在所有标签页外部添加统一的更新按钮和状态显示,这样在任何标签页都可见 */}
<div className="border-t border-gray-200 pt-5 mt-5">
<div className="flex flex-col items-center mb-2">
{hasPendingChanges && !updateStatus && (
<div className="text-center text-sm mb-3 p-2 rounded bg-yellow-100 text-yellow-700">
&ldquo;&rdquo;
</div>
)}
{!hasPendingChanges && !updateStatus && (
<div className="text-center text-xs mb-2 text-gray-600">
</div>
)}
<button
className={`ant-btn ${
hasPendingChanges ? "ant-btn-primary" : "ant-btn-default"
} text-base py-2 px-4 font-medium shadow-sm hover:shadow`}
type="button"
onClick={handleUpdateFields}
>
<i className="ri-refresh-line mr-1"></i>
{hasPendingChanges
? "更新全部字段 (有未保存更改)"
: "更新全部字段"}
</button>
</div>
{updateStatus && (
<div
className={`text-center text-sm mt-2 p-2 rounded ${
updateStatus.success
? "bg-green-100 text-green-700"
: "bg-red-100 text-red-700"
}`}
>
{updateStatus.message}
</div>
)}
</div>
</div>
</div>
);
}