/** * DOCX 文档解析工具 * 使用 docxtemplater 从 docx 文件中提取占位符 */ import PizZip from 'pizzip'; import type { PlaceholderField, PlaceholderSchema } from '~/types/contract-draft'; import { DOCUMENT_URL } from '../axios-client'; /** * 从 docx 文件中提取占位符 * @param filePath MinIO 文件路径(相对路径,如 contract-template/买卖/买卖合同范本.docx) * @returns 占位符列表 */ export async function extractPlaceholdersFromDocx( filePath: string ): Promise { try { // 构建完整的 MinIO URL const fileUrl = `${DOCUMENT_URL}${filePath}`; // 从 MinIO 下载文件 const response = await fetch(fileUrl); if (!response.ok) { throw new Error(`下载文件失败: ${response.status} ${response.statusText}`); } // 获取文件内容(ArrayBuffer) const arrayBuffer = await response.arrayBuffer(); // 转换为 Buffer(PizZip 需要) const content = Buffer.from(arrayBuffer); // 使用 PizZip 解压 const zip = new PizZip(content); // 读取 document.xml 文件内容(不使用 docxtemplater,避免格式化文本的标签分割问题) const documentXml = zip.file('word/document.xml'); if (!documentXml) { throw new Error('无法找到 word/document.xml 文件'); } // 获取 XML 文本内容 const xmlContent = documentXml.asText(); // console.log('[DOCX Parser] 文档 XML 长度:', xmlContent.length); // 移除所有 XML 标签,只保留纯文本 const fullText = xmlContent.replace(/<[^>]+>/g, ''); // console.log('[DOCX Parser] 文档文本长度:', fullText.length); // 使用正则表达式提取所有 {{...}} 占位符 const placeholderRegex = /\{\{([^}]+)\}\}/g; const matches = fullText.matchAll(placeholderRegex); // 去重并返回 const placeholders = new Set(); for (const match of matches) { const placeholder = match[1].trim(); if (placeholder) { placeholders.add(placeholder); } } const placeholderList = Array.from(placeholders); // console.log('[DOCX Parser] 提取到的占位符:', placeholderList); return placeholderList; } catch (error) { console.error('[DOCX Parser] 解析文档失败:', error); throw new Error(`解析文档失败: ${error instanceof Error ? error.message : '未知错误'}`); } } /** * 从占位符列表生成默认的 PlaceholderSchema * @param placeholders 占位符列表 * @returns PlaceholderSchema */ export function generateDefaultSchema( placeholders: string[] ): PlaceholderSchema { // 按名称自动分组 const fields: PlaceholderField[] = placeholders.map(placeholder => { // 根据占位符名称推测分组 let group = '基本信息'; if (placeholder.includes('甲方') || placeholder.includes('partyA')) { group = '甲方信息'; } else if (placeholder.includes('乙方') || placeholder.includes('partyB')) { group = '乙方信息'; } else if ( placeholder.includes('金额') || placeholder.includes('价格') || placeholder.includes('数量') || placeholder.includes('amount') ) { group = '合同条款'; } else if ( placeholder.includes('日期') || placeholder.includes('时间') || placeholder.includes('date') ) { group = '日期信息'; } // 根据名称推测字段类型 let type: 'text' | 'number' | 'date' | 'textarea' = 'text'; if ( placeholder.includes('金额') || placeholder.includes('数量') || placeholder.includes('价格') || placeholder.includes('amount') || placeholder.includes('price') || placeholder.includes('quantity') ) { type = 'number'; } else if ( placeholder.includes('日期') || placeholder.includes('时间') || placeholder.includes('date') || placeholder.includes('time') ) { type = 'date'; } else if ( placeholder.includes('地址') || placeholder.includes('说明') || placeholder.includes('备注') || placeholder.includes('address') || placeholder.includes('description') || placeholder.includes('remark') ) { type = 'textarea'; } // 根据名称推测是否必填 const required = !placeholder.includes('可选') && !placeholder.includes('optional'); return { key: placeholder, label: placeholder, // 使用占位符本身作为标签 type, required, group }; }); return { fields }; }