/** * React-PDF 功能测试 Demo * 探索 react-pdf 库的各种内置功能 */ import { useState, useRef, useCallback, useEffect } from 'react'; import { Document, Page, pdfjs } from 'react-pdf'; import { type MetaFunction } from '@remix-run/node'; import { Card } from '~/components/ui/Card'; import { Button } from '~/components/ui/Button'; import { toastService } from '~/components/ui/Toast'; // 导入react-pdf的CSS样式(文本层和注释层必需) import 'react-pdf/dist/esm/Page/TextLayer.css'; import 'react-pdf/dist/esm/Page/AnnotationLayer.css'; // 设置worker路径 pdfjs.GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; export const meta: MetaFunction = () => { return [ { title: 'React-PDF 功能测试 - 文档审核系统' }, { name: 'description', content: 'React-PDF库功能测试和演示' }, ]; }; // 高亮区域类型(基于文本选择) interface HighlightArea { id: string; pageNumber: number; text: string; rects: Array<{ left: number; top: number; width: number; height: number; }>; color: string; } // 基于坐标的字符数据 interface CharacterBox { box: [number, number][]; char: string; page: number; } // 行数据(一行文字) interface TextLine { chars: CharacterBox[]; // 这行的所有字符 text: string; // 这行的文本 rect: { // 整行的矩形区域(从第一个字到最后一个字) x1: number; // 左上角 X y1: number; // 左上角 Y x2: number; // 右下角 X y2: number; // 右下角 Y }; } // 基于坐标的高亮区域 interface CoordinateHighlight { id: string; pageNumber: number; text: string; lines: TextLine[]; // 按行存储 color: string; } export default function PdfDemo() { // PDF文件URL(使用示例PDF) // const [pdfUrl] = useState('/testPDF/sample.pdf'); // 使用包含真实文本层的PDF // const [pdfUrl] = useState('/api/pdf-proxy?path=documents/mz/行政处罚决定书/2025/11月13日/第71号--未在当地烟草专卖批发企业进货_02时58分36秒/第71号--未在当地烟草专卖批发企业进货.pdf'); // 使用项目中的示例PDF const [pdfUrl] = useState('/api/pdf-proxy?path=documents/mz/测试示范类型/2025/11月24日/第37号--涉嫌生产、销售伪劣产品罪_12时19分10秒/第37号--涉嫌生产、销售伪劣产品罪.pdf') // PDF状态 const [numPages, setNumPages] = useState(null); const [currentPage, setCurrentPage] = useState(1); const [scale, setScale] = useState(1.0); const [rotation, setRotation] = useState(0); // 文本层和注释层开关 const [renderTextLayer, setRenderTextLayer] = useState(true); const [renderAnnotationLayer, setRenderAnnotationLayer] = useState(true); // 调试:检测文本层是否渲染 useEffect(() => { if (numPages && renderTextLayer) { // 增加延迟时间,等待文本内容加载 const checkTextLayer = () => { const textLayers1 = document.querySelectorAll('.react-pdf__Page__textContent'); const textLayers2 = document.querySelectorAll('.textLayer'); const canvasLayers = document.querySelectorAll('.react-pdf__Page__canvas'); console.log('🔍 检测到的文本层数量:'); console.log(' - .react-pdf__Page__textContent:', textLayers1.length); console.log(' - .textLayer:', textLayers2.length); console.log(' - .react-pdf__Page__canvas:', canvasLayers.length); const textLayer = textLayers1[0] || textLayers2[0]; const canvas = canvasLayers[0]; if (textLayer) { const styles = window.getComputedStyle(textLayer as Element); console.log('✅ 文本层已渲染'); console.log('📝 文本层关键样式:', { className: (textLayer as Element).className, pointerEvents: styles.pointerEvents, zIndex: styles.zIndex, opacity: styles.opacity, position: styles.position, userSelect: styles.userSelect, }); // 检查文本层中的 span 元素 const spans = textLayer.querySelectorAll('span'); console.log('📝 文本层中的 span 数量:', spans.length); if (spans.length === 0) { console.warn('⚠️⚠️⚠️ 关键问题:文本层容器存在,但里面没有 span 元素!'); console.warn('这意味着 PDF.js 没有提取出文本内容。'); console.log('🔍 文本层 HTML:', (textLayer as Element).innerHTML.substring(0, 500)); } else { const spanStyles = window.getComputedStyle(spans[0]); console.log('📝 第一个 span 的样式:', { pointerEvents: spanStyles.pointerEvents, cursor: spanStyles.cursor, userSelect: spanStyles.userSelect, }); console.log('📝 第一个 span 的文本内容:', (spans[0] as HTMLElement).textContent); } } else { console.warn('⚠️ 文本层未找到!'); } if (canvas) { const canvasStyles = window.getComputedStyle(canvas as Element); console.log('🎨 Canvas 层样式:', { pointerEvents: canvasStyles.pointerEvents, zIndex: canvasStyles.zIndex, position: canvasStyles.position, }); } }; // 多次检查,看文本内容是否会延迟加载 setTimeout(checkTextLayer, 1000); setTimeout(() => { console.log('🔄 2秒后再次检查...'); checkTextLayer(); }, 2000); setTimeout(() => { console.log('🔄 5秒后最后检查...'); checkTextLayer(); }, 5000); } }, [numPages, renderTextLayer]); // 页面渲染模式 const [renderMode, setRenderMode] = useState<'canvas' | 'svg'>('canvas'); // 高亮功能 const [highlights, setHighlights] = useState([]); const [selectedText, setSelectedText] = useState(''); // 文本搜索功能 const [searchText, setSearchText] = useState(''); const [searchResults, setSearchResults] = useState([]); // 基于坐标的高亮(用于扫描版PDF) const [coordinateHighlights, setCoordinateHighlights] = useState([]); const [coordinateInput, setCoordinateInput] = useState(''); // 坐标校准参数 const [coordinateScale, setCoordinateScale] = useState(0.83); // 坐标缩放系数(默认0.83) const [coordinateOffsetX, setCoordinateOffsetX] = useState(0); // X轴偏移 const [coordinateOffsetY, setCoordinateOffsetY] = useState(0); // Y轴偏移 // PDF原始尺寸 const [pdfOriginalWidth, setPdfOriginalWidth] = useState(0); const [isScaleAutoCalculated, setIsScaleAutoCalculated] = useState(false); // 是否已自动计算缩放 // 加载状态 const [isLoading, setIsLoading] = useState(false); const [loadError, setLoadError] = useState(null); // 引用 const containerRef = useRef(null); const pageRefs = useRef>(new Map()); // ============ PDF 加载事件 ============ const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => { setNumPages(numPages); setIsLoading(false); setLoadError(null); toastService.success(`PDF加载成功!共 ${numPages} 页`); console.log('✅ PDF加载成功,总页数:', numPages); }; const onDocumentLoadError = (error: Error) => { setIsLoading(false); setLoadError(error.message); toastService.error('PDF加载失败: ' + error.message); console.error('❌ PDF加载失败:', error); }; const onDocumentLoadProgress = ({ loaded, total }: { loaded: number; total: number }) => { const progress = Math.round((loaded / total) * 100); console.log(`📥 PDF加载进度: ${progress}%`); }; // ============ 页面渲染事件 ============ const onPageLoadSuccess = (page: any) => { console.log('✅ 页面渲染成功:', page.pageNumber); // 只在第一页加载时自动计算坐标缩放比例 if (page.pageNumber === 1 && !isScaleAutoCalculated) { // 延迟一点确保DOM完全渲染 setTimeout(() => { // 获取PDF原始尺寸(以点为单位,1 point ≈ 1/72 inch) // page.view 是 [x, y, width, height] 数组,表示PDF页面的原始坐标系 const pdfOriginalWidthPt = page.view?.[2] || page.originalWidth || page.width; const pdfOriginalHeightPt = page.view?.[3] || page.originalHeight || page.height; // 获取实际渲染的Canvas元素 const canvas = document.querySelector('.react-pdf__Page__canvas') as HTMLCanvasElement; // 获取Page容器(SVG实际渲染的坐标空间) const pageContainer = canvas?.closest('.react-pdf__Page') as HTMLElement; if (canvas && pdfOriginalWidthPt) { // Canvas 显示尺寸(浏览器中实际占用的像素) const canvasDisplayWidth = canvas.offsetWidth; const canvasDisplayHeight = canvas.offsetHeight; // 计算坐标缩放比例:Canvas显示尺寸 / PDF原始尺寸 const autoScale = canvasDisplayWidth / pdfOriginalWidthPt; console.log('📏 PDF尺寸信息:'); console.log(' - PDF原始尺寸 (page.view):', pdfOriginalWidthPt, 'x', pdfOriginalHeightPt, 'pt'); console.log(' - Canvas显示尺寸 (offsetWidth):', canvasDisplayWidth, 'x', canvasDisplayHeight, 'px'); console.log(' - 用户缩放 (scale):', scale); console.log(' - devicePixelRatio:', window.devicePixelRatio || 1); console.log('🎯 自动计算坐标缩放:', autoScale.toFixed(3), 'x'); console.log(' 公式: Canvas显示宽度 / PDF原始宽度 =', canvasDisplayWidth, '/', pdfOriginalWidthPt); // 保存原始宽度和自动计算的缩放比例 setPdfOriginalWidth(pdfOriginalWidthPt); setCoordinateScale(autoScale); setIsScaleAutoCalculated(true); toastService.success(`自动校准完成: ${autoScale.toFixed(3)}x`); } else { console.warn('⚠️ 无法获取Canvas元素、Page容器或原始尺寸'); console.log('调试信息:', { hasCanvas: !!canvas, hasPageContainer: !!pageContainer, pdfOriginalWidthPt, pageWidth: page.width, pageHeight: page.height, pageView: page.view, pageOriginalWidth: page.originalWidth, pageObject: Object.keys(page) }); } }, 200); // 延迟200ms确保渲染完成 } }; const onPageLoadError = (error: Error) => { console.error('❌ 页面渲染失败:', error); }; // ============ 缩放控制 ============ const handleZoomIn = () => { if (scale < 3.0) { setScale(prev => Math.min(prev + 0.25, 3.0)); toastService.success(`放大至 ${Math.round((scale + 0.25) * 100)}%`); } }; const handleZoomOut = () => { if (scale > 0.5) { setScale(prev => Math.max(prev - 0.25, 0.5)); toastService.success(`缩小至 ${Math.round((scale - 0.25) * 100)}%`); } }; const handleResetZoom = () => { setScale(1.0); toastService.success('重置缩放至 100%'); }; // ============ 旋转控制 ============ const handleRotateLeft = () => { setRotation(prev => (prev - 90) % 360); }; const handleRotateRight = () => { setRotation(prev => (prev + 90) % 360); }; // ============ 页面导航 ============ const handlePreviousPage = () => { if (currentPage > 1) { setCurrentPage(prev => prev - 1); } }; const handleNextPage = () => { if (numPages && currentPage < numPages) { setCurrentPage(prev => prev + 1); } }; const handleGoToPage = (pageNum: number) => { if (numPages && pageNum >= 1 && pageNum <= numPages) { setCurrentPage(pageNum); } }; // ============ 文本选择和高亮 ============ const handleTextSelection = useCallback(() => { const selection = window.getSelection(); if (!selection || selection.isCollapsed) { setSelectedText(''); return; } const text = selection.toString(); setSelectedText(text); console.log('📝 选中文本:', text); // 获取选区的范围 try { const range = selection.getRangeAt(0); const rects = range.getClientRects(); // 查找所属页面 const pageElement = range.startContainer.parentElement?.closest('[data-page-number]'); if (!pageElement) return; const pageNumber = parseInt(pageElement.getAttribute('data-page-number') || '1'); const pageRect = pageElement.getBoundingClientRect(); const highlightRects = Array.from(rects).map(rect => ({ left: (rect.left - pageRect.left) / scale, top: (rect.top - pageRect.top) / scale, width: rect.width / scale, height: rect.height / scale })); console.log('📍 高亮区域:', { pageNumber, rects: highlightRects }); } catch (error) { console.error('❌ 获取选区位置失败:', error); } }, [scale]); const handleAddHighlight = () => { if (!selectedText) { toastService.warning('请先选择要高亮的文本'); return; } const selection = window.getSelection(); if (!selection || selection.isCollapsed) return; try { const range = selection.getRangeAt(0); const rects = range.getClientRects(); const pageElement = range.startContainer.parentElement?.closest('[data-page-number]'); if (!pageElement) return; const pageNumber = parseInt(pageElement.getAttribute('data-page-number') || '1'); const pageRect = pageElement.getBoundingClientRect(); const highlightRects = Array.from(rects).map(rect => ({ left: (rect.left - pageRect.left) / scale, top: (rect.top - pageRect.top) / scale, width: rect.width / scale, height: rect.height / scale })); const newHighlight: HighlightArea = { id: `highlight-${Date.now()}`, pageNumber, text: selectedText, rects: highlightRects, color: '#FFFF00' // 黄色 }; setHighlights(prev => [...prev, newHighlight]); toastService.success('已添加高亮'); selection.removeAllRanges(); setSelectedText(''); } catch (error) { console.error('❌ 添加高亮失败:', error); toastService.error('添加高亮失败'); } }; const handleClearHighlights = () => { setHighlights([]); toastService.success('已清除所有高亮'); }; // ============ 文本搜索和高亮 ============ const handleSearchAndHighlight = () => { if (!searchText.trim()) { toastService.warning('请输入要搜索的文本'); return; } // 清除之前的搜索结果 setSearchResults([]); const results: HighlightArea[] = []; // 遍历所有页面的文本层 const textLayers = document.querySelectorAll('.textLayer, .react-pdf__Page__textContent'); textLayers.forEach((textLayer, index) => { const pageNumber = index + 1; const pageElement = textLayer.closest('[data-page-number]'); if (!pageElement) return; const pageRect = pageElement.getBoundingClientRect(); // 获取文本层中的所有文本 const textContent = textLayer.textContent || ''; // 搜索所有匹配的文本 let searchIndex = 0; while (searchIndex < textContent.length) { const foundIndex = textContent.toLowerCase().indexOf(searchText.toLowerCase(), searchIndex); if (foundIndex === -1) break; // 找到匹配的文本,现在需要找到对应的 DOM 元素 try { // 使用 TreeWalker 遍历文本节点 const walker = document.createTreeWalker( textLayer, NodeFilter.SHOW_TEXT, null ); let currentNode = walker.nextNode(); let currentOffset = 0; const targetStart = foundIndex; const targetEnd = foundIndex + searchText.length; const matchedRanges: Range[] = []; while (currentNode) { const nodeLength = currentNode.textContent?.length || 0; const nodeStart = currentOffset; const nodeEnd = currentOffset + nodeLength; // 检查这个节点是否包含匹配的文本 if (nodeEnd > targetStart && nodeStart < targetEnd) { const range = document.createRange(); range.selectNode(currentNode); const startOffset = Math.max(0, targetStart - nodeStart); const endOffset = Math.min(nodeLength, targetEnd - nodeStart); range.setStart(currentNode, startOffset); range.setEnd(currentNode, endOffset); matchedRanges.push(range); } currentOffset = nodeEnd; currentNode = walker.nextNode(); } // 获取所有匹配文本的矩形区域 const allRects: Array<{left: number; top: number; width: number; height: number}> = []; matchedRanges.forEach(range => { const rects = range.getClientRects(); Array.from(rects).forEach(rect => { allRects.push({ left: (rect.left - pageRect.left) / scale, top: (rect.top - pageRect.top) / scale, width: rect.width / scale, height: rect.height / scale }); }); }); if (allRects.length > 0) { results.push({ id: `search-${pageNumber}-${foundIndex}`, pageNumber, text: searchText, rects: allRects, color: '#FFFF00' // 黄色高亮 }); } } catch (error) { console.error('搜索文本时出错:', error); } searchIndex = foundIndex + 1; } }); setSearchResults(results); if (results.length > 0) { toastService.success(`找到 ${results.length} 处匹配的文本`); console.log('🔍 搜索结果:', results); } else { toastService.warning('未找到匹配的文本'); } }; const handleClearSearch = () => { setSearchResults([]); setSearchText(''); toastService.success('已清除搜索结果'); }; // ============ 基于坐标的高亮(扫描版PDF)============ const handleAddCoordinateHighlight = () => { if (!coordinateInput.trim()) { toastService.warning('请输入坐标数据'); return; } try { // 解析JSON数据 const data = JSON.parse(coordinateInput); let allBoxes: CharacterBox[] = []; // 存储按页面和行组织的数据 const pageLineData: Record = {}; // 检测数据格式 if (data.ocr_result) { // 新格式:嵌套结构(按行处理) console.log('🔍 检测到嵌套OCR格式(按行处理)'); // 遍历所有文档类型(如"现场笔录") Object.keys(data.ocr_result).forEach(docType => { const docData = data.ocr_result[docType]; if (docData.single_char_boxes) { // 遍历所有页面(如"page_7") Object.entries(docData.single_char_boxes).forEach(([pageKey, pageData]: [string, any]) => { // 从 "page_7" 提取页码 const pageMatch = pageKey.match(/page_(\d+)/); const pageNumber = pageMatch ? parseInt(pageMatch[1]) : 1; if (!pageLineData[pageNumber]) { pageLineData[pageNumber] = []; } // pageData 是一个二维数组,每个子数组代表一行 if (Array.isArray(pageData)) { pageData.forEach((line: any) => { if (Array.isArray(line) && line.length > 0) { // 解析这一行的所有字符 const lineChars: CharacterBox[] = []; line.forEach((charData: any) => { if (charData.box && charData.char) { lineChars.push({ box: charData.box, char: charData.char, page: pageNumber }); } }); if (lineChars.length > 0) { // 计算整行的矩形区域 const firstChar = lineChars[0]; const lastChar = lineChars[lineChars.length - 1]; // 第一个字的左上角 + 最后一个字的右下角 const rect = { x1: firstChar.box[0][0], // 左上角 X y1: firstChar.box[0][1], // 左上角 Y x2: lastChar.box[2][0], // 右下角 X y2: lastChar.box[2][1] // 右下角 Y }; pageLineData[pageNumber].push({ chars: lineChars, text: lineChars.map(c => c.char).join(''), rect }); } } }); } }); } }); } else if (Array.isArray(data)) { // 旧格式:简单数组(兼容处理) console.log('🔍 检测到简单数组格式(按字符高亮)'); allBoxes = data.filter(item => item.box && item.char && item.page); // 转换为行格式(所有字符作为一行) allBoxes.forEach(box => { const page = box.page; if (!pageLineData[page]) { pageLineData[page] = []; } // 每个字符单独成一行 pageLineData[page].push({ chars: [box], text: box.char, rect: { x1: box.box[0][0], y1: box.box[0][1], x2: box.box[2][0], y2: box.box[2][1] } }); }); } else if (data.box && data.char && data.page) { // 单个对象 console.log('🔍 检测到单个字符对象'); const page = data.page; pageLineData[page] = [{ chars: [data], text: data.char, rect: { x1: data.box[0][0], y1: data.box[0][1], x2: data.box[2][0], y2: data.box[2][1] } }]; } else { toastService.error('无法识别的数据格式'); return; } // 验证数据 const totalPages = Object.keys(pageLineData).length; const totalLines = Object.values(pageLineData).reduce((sum, lines) => sum + lines.length, 0); const totalChars = Object.values(pageLineData).reduce((sum, lines) => sum + lines.reduce((lineSum, line) => lineSum + line.chars.length, 0), 0 ); if (totalPages === 0 || totalChars === 0) { toastService.error('坐标数据为空或格式不正确'); return; } console.log(`✅ 解析成功: ${totalPages} 页, ${totalLines} 行, ${totalChars} 个字符`); // 为每个页面创建高亮 const newHighlights: CoordinateHighlight[] = []; Object.entries(pageLineData).forEach(([page, lines]) => { const text = lines.map(line => line.text).join('\n'); newHighlights.push({ id: `coord-${Date.now()}-page-${page}`, pageNumber: parseInt(page), text, lines, color: '#00FF00' // 绿色,区别于其他高亮 }); }); setCoordinateHighlights(prev => [...prev, ...newHighlights]); toastService.success(`已添加 ${totalPages} 页坐标高亮,共 ${totalLines} 行 ${totalChars} 个字符`); console.log('📍 坐标高亮已添加:', newHighlights); } catch (error) { console.error('解析坐标数据失败:', error); toastService.error('坐标数据格式错误,请检查JSON格式'); } }; const handleClearCoordinateHighlights = () => { setCoordinateHighlights([]); toastService.success('已清除坐标高亮'); }; const handleFillTestCoordinates = () => { // 填充测试坐标数据(使用新的嵌套格式) const testData = { "ocr_result": { "现场笔录": { "single_char_boxes": { "page_7": [ [ { "box": [[184, 567], [202, 567], [202, 597], [184, 597]], "char": "站", "score": 0.99857 }, { "box": [[209, 567], [227, 567], [227, 597], [209, 597]], "char": "民", "score": 0.99702 }, { "box": [[234, 567], [252, 567], [252, 597], [234, 597]], "char": "善", "score": 0.33934 }, { "box": [[259, 567], [278, 567], [278, 597], [259, 597]], "char": "在", "score": 0.98556 }, { "box": [[279, 567], [298, 567], [298, 597], [279, 597]], "char": "车", "score": 0.92309 }, { "box": [[304, 567], [323, 567], [323, 597], [304, 597]], "char": "牌", "score": 0.50887 } ], [ { "box": [[110, 596], [132, 596], [132, 629], [110, 629]], "char": "轿", "score": 0.9266 }, { "box": [[132, 596], [151, 596], [151, 629], [132, 629]], "char": "车", "score": 0.96376 }, { "box": [[151, 596], [170, 596], [170, 629], [151, 629]], "char": "上", "score": 0.99372 }, { "box": [[176, 596], [198, 596], [198, 629], [176, 629]], "char": "查", "score": 0.50258 }, { "box": [[198, 596], [220, 596], [220, 629], [198, 629]], "char": "获", "score": 0.60755 } ] ] } } } }; setCoordinateInput(JSON.stringify(testData, null, 2)); }; // ============ 渲染模式切换 ============ const handleToggleRenderMode = () => { setRenderMode(prev => prev === 'canvas' ? 'svg' : 'canvas'); toastService.success(`切换到 ${renderMode === 'canvas' ? 'SVG' : 'Canvas'} 渲染模式`); }; // ============ 渲染PDF ============ const renderPdfPages = () => { if (!numPages) return null; return Array.from({ length: numPages }, (_, i) => i + 1).map(pageNum => (
{ if (el) pageRefs.current.set(pageNum, el); }} data-page-number={pageNum} className="mb-8 flex flex-col items-center" >
第 {pageNum} 页
{/* 渲染手动高亮层 */} {highlights .filter(h => h.pageNumber === pageNum) .map(highlight => (
{highlight.rects.map((rect, idx) => (
))}
))} {/* 渲染搜索结果高亮层 */} {searchResults .filter(h => h.pageNumber === pageNum) .map(highlight => (
{highlight.rects.map((rect, idx) => (
))}
))} {/* 渲染基于坐标的高亮层(扫描版PDF - 按行高亮)*/} {coordinateHighlights .filter(h => h.pageNumber === pageNum) .map(highlight => ( {highlight.lines.map((line, idx) => { // 应用校准参数:坐标缩放 + 偏移 + PDF缩放 const x = (line.rect.x1 * coordinateScale + coordinateOffsetX) * scale; const y = (line.rect.y1 * coordinateScale + coordinateOffsetY) * scale; const width = ((line.rect.x2 - line.rect.x1) * coordinateScale) * scale; const height = ((line.rect.y2 - line.rect.y1) * coordinateScale) * scale; return ( {`行高亮: ${line.text}`} ); })} ))}
)); }; return (
{/* 强制文本层样式 - 确保文本可以被选择 */}

React-PDF 功能测试 Demo

探索 react-pdf v9.2.1 的各种内置功能

{/* 左侧控制面板 */}
{/* 基础信息 */}

PDF信息

总页数: {numPages || '-'}
当前页: {currentPage}
缩放: {Math.round(scale * 100)}%
旋转: {rotation}°
渲染模式: {renderMode.toUpperCase()}
{/* 缩放控制 */}

缩放控制

{/* 旋转控制 */}

旋转控制

{/* 页面导航 */}

页面导航

handleGoToPage(parseInt(e.target.value) || 1)} className="w-full px-2 py-1 text-sm border border-gray-300 rounded" placeholder="跳转到页码" />
{/* 图层控制 */}

图层控制

{/* 渲染模式 */}

渲染模式

{/* 文本搜索 */}

文本搜索