149 lines
4.5 KiB
TypeScript
149 lines
4.5 KiB
TypeScript
/**
|
||
* DOCX 文档解析工具
|
||
* 使用 docxtemplater 从 docx 文件中提取占位符
|
||
*/
|
||
|
||
import PizZip from 'pizzip';
|
||
import type { PlaceholderField, PlaceholderSchema } from '~/types/contract-draft';
|
||
import { DOCUMENT_URL } from '../axios-client';
|
||
|
||
/**
|
||
* 从 docx 文件中提取占位符
|
||
* @param filePath MinIO 文件路径(相对路径,如 contract-template/买卖/买卖合同范本.docx)
|
||
* @returns 占位符列表
|
||
*/
|
||
export async function extractPlaceholdersFromDocx(
|
||
filePath: string
|
||
): Promise<string[]> {
|
||
try {
|
||
// 构建完整的 MinIO URL
|
||
const fileUrl = `${DOCUMENT_URL}${filePath}`;
|
||
|
||
// 从 MinIO 下载文件
|
||
const response = await fetch(fileUrl);
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`下载文件失败: ${response.status} ${response.statusText}`);
|
||
}
|
||
|
||
// 获取文件内容(ArrayBuffer)
|
||
const arrayBuffer = await response.arrayBuffer();
|
||
|
||
// 转换为 Buffer(PizZip 需要)
|
||
const content = Buffer.from(arrayBuffer);
|
||
|
||
// 使用 PizZip 解压
|
||
const zip = new PizZip(content);
|
||
|
||
// 读取 document.xml 文件内容(不使用 docxtemplater,避免格式化文本的标签分割问题)
|
||
const documentXml = zip.file('word/document.xml');
|
||
if (!documentXml) {
|
||
throw new Error('无法找到 word/document.xml 文件');
|
||
}
|
||
|
||
// 获取 XML 文本内容
|
||
const xmlContent = documentXml.asText();
|
||
// console.log('[DOCX Parser] 文档 XML 长度:', xmlContent.length);
|
||
|
||
// 移除所有 XML 标签,只保留纯文本
|
||
const fullText = xmlContent.replace(/<[^>]+>/g, '');
|
||
// console.log('[DOCX Parser] 文档文本长度:', fullText.length);
|
||
|
||
// 使用正则表达式提取所有 {{...}} 占位符
|
||
const placeholderRegex = /\{\{([^}]+)\}\}/g;
|
||
const matches = fullText.matchAll(placeholderRegex);
|
||
|
||
// 去重并返回
|
||
const placeholders = new Set<string>();
|
||
for (const match of matches) {
|
||
const placeholder = match[1].trim();
|
||
if (placeholder) {
|
||
placeholders.add(placeholder);
|
||
}
|
||
}
|
||
|
||
const placeholderList = Array.from(placeholders);
|
||
// console.log('[DOCX Parser] 提取到的占位符:', placeholderList);
|
||
|
||
return placeholderList;
|
||
} catch (error) {
|
||
console.error('[DOCX Parser] 解析文档失败:', error);
|
||
throw new Error(`解析文档失败: ${error instanceof Error ? error.message : '未知错误'}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 从占位符列表生成默认的 PlaceholderSchema
|
||
* @param placeholders 占位符列表
|
||
* @returns PlaceholderSchema
|
||
*/
|
||
export function generateDefaultSchema(
|
||
placeholders: string[]
|
||
): PlaceholderSchema {
|
||
// 按名称自动分组
|
||
const fields: PlaceholderField[] = placeholders.map(placeholder => {
|
||
// 根据占位符名称推测分组
|
||
let group = '基本信息';
|
||
if (placeholder.includes('甲方') || placeholder.includes('partyA')) {
|
||
group = '甲方信息';
|
||
} else if (placeholder.includes('乙方') || placeholder.includes('partyB')) {
|
||
group = '乙方信息';
|
||
} else if (
|
||
placeholder.includes('金额') ||
|
||
placeholder.includes('价格') ||
|
||
placeholder.includes('数量') ||
|
||
placeholder.includes('amount')
|
||
) {
|
||
group = '合同条款';
|
||
} else if (
|
||
placeholder.includes('日期') ||
|
||
placeholder.includes('时间') ||
|
||
placeholder.includes('date')
|
||
) {
|
||
group = '日期信息';
|
||
}
|
||
|
||
// 根据名称推测字段类型
|
||
let type: 'text' | 'number' | 'date' | 'textarea' = 'text';
|
||
if (
|
||
placeholder.includes('金额') ||
|
||
placeholder.includes('数量') ||
|
||
placeholder.includes('价格') ||
|
||
placeholder.includes('amount') ||
|
||
placeholder.includes('price') ||
|
||
placeholder.includes('quantity')
|
||
) {
|
||
type = 'number';
|
||
} else if (
|
||
placeholder.includes('日期') ||
|
||
placeholder.includes('时间') ||
|
||
placeholder.includes('date') ||
|
||
placeholder.includes('time')
|
||
) {
|
||
type = 'date';
|
||
} else if (
|
||
placeholder.includes('地址') ||
|
||
placeholder.includes('说明') ||
|
||
placeholder.includes('备注') ||
|
||
placeholder.includes('address') ||
|
||
placeholder.includes('description') ||
|
||
placeholder.includes('remark')
|
||
) {
|
||
type = 'textarea';
|
||
}
|
||
|
||
// 根据名称推测是否必填
|
||
const required = !placeholder.includes('可选') && !placeholder.includes('optional');
|
||
|
||
return {
|
||
key: placeholder,
|
||
label: placeholder, // 使用占位符本身作为标签
|
||
type,
|
||
required,
|
||
group
|
||
};
|
||
});
|
||
|
||
return { fields };
|
||
}
|