This commit is contained in:
2025-12-05 00:09:32 +08:00
parent bb3d22eabf
commit 3d1dbb3f97
214 changed files with 113060 additions and 1232 deletions
+148
View File
@@ -0,0 +1,148 @@
/**
* DOCX 文档解析工具
* 使用 docxtemplater 从 docx 文件中提取占位符
*/
import PizZip from 'pizzip';
import type { PlaceholderField, PlaceholderSchema } from '~/types/contract-draft';
import { DOCUMENT_URL } from '../axios-client';
/**
* 从 docx 文件中提取占位符
* @param filePath MinIO 文件路径(相对路径,如 contract-template/买卖/买卖合同范本.docx
* @returns 占位符列表
*/
export async function extractPlaceholdersFromDocx(
filePath: string
): Promise<string[]> {
try {
// 构建完整的 MinIO URL
const fileUrl = `${DOCUMENT_URL}${filePath}`;
// 从 MinIO 下载文件
const response = await fetch(fileUrl);
if (!response.ok) {
throw new Error(`下载文件失败: ${response.status} ${response.statusText}`);
}
// 获取文件内容(ArrayBuffer
const arrayBuffer = await response.arrayBuffer();
// 转换为 BufferPizZip 需要)
const content = Buffer.from(arrayBuffer);
// 使用 PizZip 解压
const zip = new PizZip(content);
// 读取 document.xml 文件内容(不使用 docxtemplater,避免格式化文本的标签分割问题)
const documentXml = zip.file('word/document.xml');
if (!documentXml) {
throw new Error('无法找到 word/document.xml 文件');
}
// 获取 XML 文本内容
const xmlContent = documentXml.asText();
// console.log('[DOCX Parser] 文档 XML 长度:', xmlContent.length);
// 移除所有 XML 标签,只保留纯文本
const fullText = xmlContent.replace(/<[^>]+>/g, '');
// console.log('[DOCX Parser] 文档文本长度:', fullText.length);
// 使用正则表达式提取所有 {{...}} 占位符
const placeholderRegex = /\{\{([^}]+)\}\}/g;
const matches = fullText.matchAll(placeholderRegex);
// 去重并返回
const placeholders = new Set<string>();
for (const match of matches) {
const placeholder = match[1].trim();
if (placeholder) {
placeholders.add(placeholder);
}
}
const placeholderList = Array.from(placeholders);
// console.log('[DOCX Parser] 提取到的占位符:', placeholderList);
return placeholderList;
} catch (error) {
console.error('[DOCX Parser] 解析文档失败:', error);
throw new Error(`解析文档失败: ${error instanceof Error ? error.message : '未知错误'}`);
}
}
/**
* 从占位符列表生成默认的 PlaceholderSchema
* @param placeholders 占位符列表
* @returns PlaceholderSchema
*/
export function generateDefaultSchema(
placeholders: string[]
): PlaceholderSchema {
// 按名称自动分组
const fields: PlaceholderField[] = placeholders.map(placeholder => {
// 根据占位符名称推测分组
let group = '基本信息';
if (placeholder.includes('甲方') || placeholder.includes('partyA')) {
group = '甲方信息';
} else if (placeholder.includes('乙方') || placeholder.includes('partyB')) {
group = '乙方信息';
} else if (
placeholder.includes('金额') ||
placeholder.includes('价格') ||
placeholder.includes('数量') ||
placeholder.includes('amount')
) {
group = '合同条款';
} else if (
placeholder.includes('日期') ||
placeholder.includes('时间') ||
placeholder.includes('date')
) {
group = '日期信息';
}
// 根据名称推测字段类型
let type: 'text' | 'number' | 'date' | 'textarea' = 'text';
if (
placeholder.includes('金额') ||
placeholder.includes('数量') ||
placeholder.includes('价格') ||
placeholder.includes('amount') ||
placeholder.includes('price') ||
placeholder.includes('quantity')
) {
type = 'number';
} else if (
placeholder.includes('日期') ||
placeholder.includes('时间') ||
placeholder.includes('date') ||
placeholder.includes('time')
) {
type = 'date';
} else if (
placeholder.includes('地址') ||
placeholder.includes('说明') ||
placeholder.includes('备注') ||
placeholder.includes('address') ||
placeholder.includes('description') ||
placeholder.includes('remark')
) {
type = 'textarea';
}
// 根据名称推测是否必填
const required = !placeholder.includes('可选') && !placeholder.includes('optional');
return {
key: placeholder,
label: placeholder, // 使用占位符本身作为标签
type,
required,
group
};
});
return { fields };
}