Files

242 lines
9.0 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
export type RuleSummary = {
id: string;
ruleId: string;
name: string;
group: string;
risk: string;
score: string;
type: string;
checkTypes: string[];
logic: string;
subRules: Array<{
id: string;
check: string;
content: string;
}>;
subRuleIds: string[];
scope: string[];
dependencies: string[];
stageCount: number;
appliesIn: string[];
prompt: string;
description: string;
};
function getTopLevelSection(source: string, key: string): string {
const lines = source.split("\n");
const start = lines.findIndex((line) => line === `${key}:`);
if (start === -1) return "";
const end = lines.findIndex((line, index) => index > start && /^[a-zA-Z_][\w-]*:/.test(line));
return lines.slice(start + 1, end === -1 ? undefined : end).join("\n");
}
function stripYamlValue(value = ""): string {
return value.trim().replace(/^['"]|['"]$/g, "").replace(/\u0000/g, "");
}
function splitBlocks(section: string, marker: RegExp): string[] {
const lines = section.split("\n");
const starts = lines.reduce<number[]>((indexes, line, index) => {
if (marker.test(line)) indexes.push(index);
return indexes;
}, []);
return starts.map((start, index) => lines.slice(start, starts[index + 1]).join("\n"));
}
export function parseRuleSummariesFromYaml(source: string): RuleSummary[] {
const section = getTopLevelSection(source, "rules");
const groups = splitBlocks(section, /^-\s+group:\s*/);
const readExplicitDependencies = (block: string): string[] => {
const lines = block.split("\n");
const start = lines.findIndex((line) => /^\s{4}dependencies:\s*$/.test(line));
if (start === -1) return [];
const dependencies: string[] = [];
for (let index = start + 1; index < lines.length; index += 1) {
const line = lines[index];
if (/^\s{4}[a-zA-Z_][^:]*:\s*/.test(line)) break;
const match = line.match(/^\s{4}-\s+(.+)$/);
if (match) dependencies.push(stripYamlValue(match[1]));
}
return dependencies;
};
const normalizeDependency = (value: string) => {
const normalized = stripYamlValue(value);
if (normalized === "cross_page_seal") return "骑缝章";
if (normalized === "seal") return "印章";
if (normalized === "signature") return "签名";
return normalized;
};
const readPrompts = (block: string): string[] => {
const lines = block.split("\n");
const prompts: string[] = [];
for (let index = 0; index < lines.length; index += 1) {
const match = lines[index].match(/^(\s*)prompt:\s*(.*)$/);
if (!match) continue;
const indent = match[1].length;
const parts = [match[2]];
for (let nextIndex = index + 1; nextIndex < lines.length; nextIndex += 1) {
const line = lines[nextIndex];
const nextIndent = line.match(/^\s*/)?.[0].length || 0;
const trimmed = line.trim();
if (trimmed && nextIndent <= indent) break;
if (trimmed && nextIndent === indent + 2 && /^[a-zA-Z_][\w-]*:\s*/.test(trimmed)) break;
parts.push(line);
}
prompts.push(
parts
.join("\n")
.replace(/^['"]/, "")
.replace(/['"]\s*$/, "")
.split("\n")
.map((line) => line.replace(/^\s{8}/, ""))
.join("\n")
.trim(),
);
}
return prompts.filter(Boolean);
};
const readList = (block: string, key: string, indent = 4): string[] => {
const lines = block.split("\n");
const start = lines.findIndex((line) => new RegExp(`^\\s{${indent}}${key}:\\s*$`).test(line));
if (start === -1) return [];
const values: string[] = [];
for (let index = start + 1; index < lines.length; index += 1) {
const line = lines[index];
if (new RegExp(`^\\s{${indent}}[a-zA-Z_][^:]*:\\s*`).test(line)) break;
const match = line.match(new RegExp(`^\\s{${indent}}-\\s+(.+)$`));
if (match) values.push(stripYamlValue(match[1]));
}
return values;
};
const readFlexibleList = (block: string, key: string): string[] => {
const lines = block.split("\n");
const start = lines.findIndex((line) => new RegExp(`^(\\s*)${key}:\\s*$`).test(line));
if (start === -1) return [];
const indent = lines[start].match(/^\s*/)?.[0].length || 0;
const values: string[] = [];
for (let index = start + 1; index < lines.length; index += 1) {
const line = lines[index];
const lineIndent = line.match(/^\s*/)?.[0].length || 0;
const match = line.match(/^\s*-\s+(.+)$/);
if (match) {
values.push(stripYamlValue(match[1]));
continue;
}
if (line.trim() && lineIndent <= indent) break;
}
return values;
};
const readStageList = (block: string, key: string): string[] => {
const lines = block.split("\n");
const start = lines.findIndex((line) => new RegExp(`^\\s{6}${key}:\\s*$`).test(line));
if (start === -1) return [];
const values: string[] = [];
for (let index = start + 1; index < lines.length; index += 1) {
const line = lines[index];
if (/^\s{6}[a-zA-Z_][^:]*:\s*/.test(line)) break;
const match = line.match(/^\s{6}-\s+(.+)$/);
if (match) values.push(stripYamlValue(match[1]));
}
return values;
};
const readStageScalar = (block: string, key: string): string =>
stripYamlValue(block.match(new RegExp(`^\\s{6}${key}:\\s*(.+)$`, "m"))?.[1] || "");
const summarizeStage = (stageBlock: string): string => {
const fields = readStageList(stageBlock, "fields");
const field = readStageScalar(stageBlock, "field");
const left = readStageScalar(stageBlock, "left") || readStageScalar(stageBlock, "left_field");
const op = readStageScalar(stageBlock, "op");
const right = readStageScalar(stageBlock, "right") || readStageScalar(stageBlock, "right_field");
const value = readStageScalar(stageBlock, "value");
const prompt = readStageScalar(stageBlock, "prompt");
const element = readStageScalar(stageBlock, "element") || readStageScalar(stageBlock, "seal_id") || readStageScalar(stageBlock, "signature_id");
if (fields.length > 0) return fields.join("、");
if (left || right) return [left, op, right].filter(Boolean).join(" ");
if (field && value) return `${field} = ${value}`;
if (field) return field;
if (element) return element;
if (prompt) return prompt.slice(0, 80);
return (
stageBlock
.split("\n")
.map((line) => line.trim())
.filter(Boolean)
.slice(1, 4)
.join("") || "未配置内容"
);
};
const readSubRules = (block: string) =>
splitBlocks(block, /^\s{4}-\s+id:\s*/)
.map((stageBlock) => {
const id = stripYamlValue(stageBlock.match(/^\s{4}-\s+id:\s*(.+)$/m)?.[1] || "");
const check = readStageScalar(stageBlock, "check") || readStageScalar(stageBlock, "type") || "-";
return {
id,
check,
content: summarizeStage(stageBlock),
};
})
.filter((stage) => stage.id);
return groups.flatMap((groupBlock) => {
const group = stripYamlValue(groupBlock.match(/^-\s+group:\s*(.+)$/m)?.[1] || "未分组");
return splitBlocks(groupBlock, /^\s{2}-\s+rule_id:\s*/).map((ruleBlock) => {
const ruleId = stripYamlValue(ruleBlock.match(/^\s{2}-\s+rule_id:\s*(.+)$/m)?.[1] || "");
const name = stripYamlValue(ruleBlock.match(/^\s{4}name:\s*(.+)$/m)?.[1] || "未命名规则");
const checkTypes = Array.from(
new Set(Array.from(ruleBlock.matchAll(/^\s{6,}(?:check|type):\s*(.+)$/gm)).map((match) => stripYamlValue(match[1]))),
);
const stageDependencies = Array.from(
ruleBlock.matchAll(/^\s{6,}(?:field|number|chinese|left|right|left_field|right_field|target|element|seal_id|signature_id):\s*(.+)$/gm),
).map((match) => normalizeDependency(match[1]));
const dependencies = Array.from(new Set([...readExplicitDependencies(ruleBlock), ...stageDependencies]));
const scope = Array.from(
new Set(
Array.from(ruleBlock.matchAll(/^\s{4,}-\s*([^:\n]+)$/gm))
.map((match) => stripYamlValue(match[1]))
.filter((value) => !/^\d+$/.test(value)),
),
);
const prompts = readPrompts(ruleBlock);
const subRules = readSubRules(ruleBlock);
return {
id: ruleId || `${group}-${name}`,
ruleId,
name,
group,
risk: stripYamlValue(ruleBlock.match(/^\s{4}risk:\s*(.+)$/m)?.[1] || "medium"),
score: stripYamlValue(ruleBlock.match(/^\s{4}score:\s*(.+)$/m)?.[1] || "-"),
type: stripYamlValue(ruleBlock.match(/^\s{4}type:\s*(.+)$/m)?.[1] || "deterministic"),
checkTypes,
logic: stripYamlValue(ruleBlock.match(/^\s{4}logic:\s*(.+)$/m)?.[1] || ""),
subRules,
subRuleIds: readList(ruleBlock, "rules"),
scope: scope.slice(0, 8),
dependencies: dependencies.slice(0, 8),
stageCount: subRules.length,
appliesIn: readFlexibleList(ruleBlock, "applies_in"),
prompt: prompts.join("\n\n"),
description: stripYamlValue(ruleBlock.match(/^\s{4}desc:\s*(.+)$/m)?.[1] || ""),
} satisfies RuleSummary;
});
});
}