This commit is contained in:
2025-11-25 11:02:40 +08:00
parent 93bae2de17
commit 0ed6f0aaf4
5 changed files with 194 additions and 156 deletions
+13 -72
View File
@@ -39,7 +39,7 @@ interface HighlightArea {
// 基于坐标的字符数据
interface CharacterBox {
box: [number, number][]; // 4个点:左上、右上、右下、左下
box: [number, number][];
char: string;
page: number;
}
@@ -70,7 +70,7 @@ export default function PdfDemo() {
// PDF文件URL(使用示例PDF
// const [pdfUrl] = useState('/testPDF/sample.pdf'); // 使用包含真实文本层的PDF
// const [pdfUrl] = useState('/api/pdf-proxy?path=documents/mz/行政处罚决定书/2025/11月13日/第71号--未在当地烟草专卖批发企业进货_02时58分36秒/第71号--未在当地烟草专卖批发企业进货.pdf'); // 使用项目中的示例PDF
const [pdfUrl] = useState('/api/pdf-proxy?path=documents/mz/行政处罚决定书/2025/11月22日/第35号--无烟草专卖品准运证运输烟草专卖品_15时15分24秒/第35号--无烟草专卖品准运证运输烟草专卖品.pdf')
const [pdfUrl] = useState('/api/pdf-proxy?path=documents/mz/测试示范类型/2025/11月24日/第37号--涉嫌生产、销售伪劣产品罪_12时19分10秒/第37号--涉嫌生产、销售伪劣产品罪.pdf')
// PDF状态
const [numPages, setNumPages] = useState<number | null>(null);
@@ -227,87 +227,28 @@ export default function PdfDemo() {
// 获取Page容器(SVG实际渲染的坐标空间)
const pageContainer = canvas?.closest('.react-pdf__Page') as HTMLElement;
if (canvas && pageContainer && pdfOriginalWidthPt) {
// Canvas 内部绘制尺寸(考虑了 devicePixelRatio
const canvasInternalWidth = canvas.width;
const canvasInternalHeight = canvas.height;
if (canvas && pdfOriginalWidthPt) {
// Canvas 显示尺寸(浏览器中实际占用的像素)
const canvasDisplayWidth = canvas.offsetWidth;
const canvasDisplayHeight = canvas.offsetHeight;
// Page容器尺寸(SVG高亮渲染的实际坐标空间)
const pageContainerWidth = pageContainer.offsetWidth;
const pageContainerHeight = pageContainer.offsetHeight;
// 计算坐标缩放比例:Canvas显示尺寸 / PDF原始尺寸
const autoScale = canvasDisplayWidth / pdfOriginalWidthPt;
// 尝试多种计算方式
const scale1_canvasDisplay = canvasDisplayWidth / pdfOriginalWidthPt;
const scale2_canvasInternal = canvasInternalWidth / pdfOriginalWidthPt;
const scale3_pageContainer = pageContainerWidth / pdfOriginalWidthPt;
// 尝试反向计算:如果OCR尺寸比渲染尺寸大(需要缩小)
const scale4_inverseCanvasInternal = canvasDisplayWidth / canvasInternalWidth;
const scale5_inversePage = canvasDisplayWidth / pageContainerWidth;
// 计算如果要达到 0.83 的缩放比例,OCR原始尺寸应该是多少
const expectedOcrWidth = canvasDisplayWidth / 0.83;
console.log('📏 尺寸信息汇总:');
console.log(' 1️⃣ PDF原始尺寸 (page.view):', pdfOriginalWidthPt, 'x', pdfOriginalHeightPt, 'pt');
console.log(' 2️⃣ Page容器尺寸:', pageContainerWidth, 'x', pageContainerHeight, 'px');
console.log(' 3️⃣ Canvas显示尺寸:', canvasDisplayWidth, 'x', canvasDisplayHeight, 'px');
console.log(' 4️⃣ Canvas内部尺寸:', canvasInternalWidth, 'x', canvasInternalHeight, 'px');
console.log(' 5️⃣ 用户缩放 (scale):', scale);
console.log(' 6️⃣ devicePixelRatio:', window.devicePixelRatio || 1);
console.log('');
console.log('🎯 各种计算方式:');
console.log(' 方案1️⃣: Canvas显示 / PDF原始 =', scale1_canvasDisplay.toFixed(3), 'x');
console.log(' 方案2️⃣: Canvas内部 / PDF原始 =', scale2_canvasInternal.toFixed(3), 'x');
console.log(' 方案3️⃣: Page容器 / PDF原始 =', scale3_pageContainer.toFixed(3), 'x');
console.log(' 方案4️⃣: Canvas显示 / Canvas内部 =', scale4_inverseCanvasInternal.toFixed(3), 'x ⬅ 可能是这个!');
console.log(' 方案5️⃣: Canvas显示 / Page容器 =', scale5_inversePage.toFixed(3), 'x');
console.log('');
console.log('🔍 目标值分析:');
console.log(' - 手动校准的正确值: 0.83');
console.log(' - 反推OCR图像尺寸:', expectedOcrWidth.toFixed(0), 'x', (canvasDisplayHeight / 0.83).toFixed(0), 'px');
console.log(' - 比较: ', expectedOcrWidth.toFixed(0), 'vs Canvas内部', canvasInternalWidth);
// 使用最接近0.83的方案
let autoScale = scale1_canvasDisplay;
let scaleMethod = '方案1 (Canvas显示/PDF原始)';
// 检查哪个方案最接近0.83
const diff1 = Math.abs(scale1_canvasDisplay - 0.83);
const diff2 = Math.abs(scale2_canvasInternal - 0.83);
const diff3 = Math.abs(scale3_pageContainer - 0.83);
const diff4 = Math.abs(scale4_inverseCanvasInternal - 0.83);
const diff5 = Math.abs(scale5_inversePage - 0.83);
const minDiff = Math.min(diff1, diff2, diff3, diff4, diff5);
if (minDiff === diff4) {
autoScale = scale4_inverseCanvasInternal;
scaleMethod = '方案4 (Canvas显示/Canvas内部)';
} else if (minDiff === diff5) {
autoScale = scale5_inversePage;
scaleMethod = '方案5 (Canvas显示/Page容器)';
} else if (minDiff === diff2) {
autoScale = scale2_canvasInternal;
scaleMethod = '方案2 (Canvas内部/PDF原始)';
} else if (minDiff === diff3) {
autoScale = scale3_pageContainer;
scaleMethod = '方案3 (Page容器/PDF原始)';
}
console.log('');
console.log('✅ 自动选择:', scaleMethod, '=', autoScale.toFixed(3), 'x (最接近0.83)');
console.log('📏 PDF尺寸信息:');
console.log(' - PDF原始尺寸 (page.view):', pdfOriginalWidthPt, 'x', pdfOriginalHeightPt, 'pt');
console.log(' - Canvas显示尺寸 (offsetWidth):', canvasDisplayWidth, 'x', canvasDisplayHeight, 'px');
console.log(' - 用户缩放 (scale):', scale);
console.log(' - devicePixelRatio:', window.devicePixelRatio || 1);
console.log('🎯 自动计算坐标缩放:', autoScale.toFixed(3), 'x');
console.log(' 公式: Canvas显示宽度 / PDF原始宽度 =', canvasDisplayWidth, '/', pdfOriginalWidthPt);
// 保存原始宽度和自动计算的缩放比例
setPdfOriginalWidth(pdfOriginalWidthPt);
setCoordinateScale(autoScale);
setIsScaleAutoCalculated(true);
toastService.success(`自动校准完成: ${autoScale.toFixed(3)}x (${scaleMethod})`);
toastService.success(`自动校准完成: ${autoScale.toFixed(3)}x`);
} else {
console.warn('⚠️ 无法获取Canvas元素、Page容器或原始尺寸');
console.log('调试信息:', {