all in
This commit is contained in:
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* 测试从 DOCX 文件提取占位符功能(本地文件版本)
|
||||
* 运行: node scripts/test-docx-parser.cjs
|
||||
*
|
||||
* 注意:此脚本测试本地文件系统中的文档
|
||||
* 实际应用中,文件从 MinIO 下载(使用 fetch)
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const PizZip = require('pizzip');
|
||||
|
||||
// 测试文件路径
|
||||
const testDocPath = path.join(__dirname, '..', 'public', 'testWork', '买卖合同 (1).docx');
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('测试从 DOCX 文件提取占位符功能(本地文件)');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// 1. 检查文件是否存在
|
||||
if (!fs.existsSync(testDocPath)) {
|
||||
console.error('❌ 文件不存在:', testDocPath);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('✅ 文件存在:', testDocPath);
|
||||
console.log('');
|
||||
|
||||
// 2. 读取文件(本地文件使用 fs.readFileSync)
|
||||
const content = fs.readFileSync(testDocPath);
|
||||
console.log('✅ 文件读取成功, 大小:', (content.length / 1024).toFixed(2), 'KB');
|
||||
console.log('');
|
||||
|
||||
// 3. 使用 PizZip 解压
|
||||
const zip = new PizZip(content);
|
||||
console.log('✅ PizZip 解压成功');
|
||||
console.log('');
|
||||
|
||||
// 4. 读取 document.xml 文件
|
||||
const documentXml = zip.file('word/document.xml');
|
||||
if (!documentXml) {
|
||||
console.error('❌ 无法找到 word/document.xml 文件');
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('✅ document.xml 读取成功');
|
||||
console.log('');
|
||||
|
||||
// 5. 获取 XML 内容并提取纯文本
|
||||
const xmlContent = documentXml.asText();
|
||||
console.log('✅ XML 内容长度:', xmlContent.length);
|
||||
|
||||
// 移除所有 XML 标签,只保留纯文本
|
||||
const fullText = xmlContent.replace(/<[^>]+>/g, '');
|
||||
console.log('✅ 提取纯文本成功');
|
||||
console.log('文本长度:', fullText.length);
|
||||
console.log('');
|
||||
|
||||
// 6. 提取占位符
|
||||
const placeholderRegex = /\{\{([^}]+)\}\}/g;
|
||||
const matches = fullText.matchAll(placeholderRegex);
|
||||
|
||||
const placeholders = new Set();
|
||||
for (const match of matches) {
|
||||
const placeholder = match[1].trim();
|
||||
if (placeholder) {
|
||||
placeholders.add(placeholder);
|
||||
}
|
||||
}
|
||||
|
||||
const placeholderList = Array.from(placeholders);
|
||||
|
||||
console.log('✅ 提取占位符成功');
|
||||
console.log('');
|
||||
console.log('找到', placeholderList.length, '个占位符:');
|
||||
console.log('='.repeat(60));
|
||||
placeholderList.forEach((p, index) => {
|
||||
console.log(`${index + 1}. {{${p}}}`);
|
||||
});
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
// 7. 推测字段类型和分组
|
||||
console.log('字段分析:');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
placeholderList.forEach(placeholder => {
|
||||
// 推测类型
|
||||
let type = 'text';
|
||||
if (
|
||||
placeholder.includes('金额') ||
|
||||
placeholder.includes('数量') ||
|
||||
placeholder.includes('价格')
|
||||
) {
|
||||
type = 'number';
|
||||
} else if (
|
||||
placeholder.includes('日期') ||
|
||||
placeholder.includes('时间')
|
||||
) {
|
||||
type = 'date';
|
||||
} else if (
|
||||
placeholder.includes('地址') ||
|
||||
placeholder.includes('说明') ||
|
||||
placeholder.includes('备注')
|
||||
) {
|
||||
type = 'textarea';
|
||||
}
|
||||
|
||||
// 推测分组
|
||||
let group = '基本信息';
|
||||
if (placeholder.includes('甲方')) {
|
||||
group = '甲方信息';
|
||||
} else if (placeholder.includes('乙方')) {
|
||||
group = '乙方信息';
|
||||
} else if (
|
||||
placeholder.includes('金额') ||
|
||||
placeholder.includes('价格') ||
|
||||
placeholder.includes('数量')
|
||||
) {
|
||||
group = '合同条款';
|
||||
} else if (
|
||||
placeholder.includes('日期') ||
|
||||
placeholder.includes('时间')
|
||||
) {
|
||||
group = '日期信息';
|
||||
}
|
||||
|
||||
// 推测是否必填
|
||||
const required = !placeholder.includes('可选') && !placeholder.includes('optional');
|
||||
|
||||
console.log(`📝 ${placeholder}`);
|
||||
console.log(` 类型: ${type}`);
|
||||
console.log(` 分组: ${group}`);
|
||||
console.log(` 必填: ${required ? '是' : '否'}`);
|
||||
console.log('');
|
||||
});
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
console.log('🎉 测试完成!所有功能正常工作。');
|
||||
console.log('');
|
||||
|
||||
} catch (error) {
|
||||
console.error('');
|
||||
console.error('❌ 测试失败:');
|
||||
console.error(error);
|
||||
console.error('');
|
||||
process.exit(1);
|
||||
}
|
||||
Reference in New Issue
Block a user