feat(pdf): support GraphRAG text_bbox highlighting in PDF viewer

When documents are processed through GraphRAG pipeline, coordinate
enrichment produces text_bbox (paragraph-level coordinates) instead of
char_positions (character-level OCR coordinates). Added resolveCharPositions()
helper that converts text_bbox to CharPosition[] format, enabling PDF
highlight rendering for GraphRAG-processed documents.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
DocAuditAI Dev
2026-03-23 16:44:22 +08:00
parent 32bee87998
commit 33fbd6b860
5 changed files with 113 additions and 39 deletions
@@ -38,6 +38,7 @@ interface PdfPreviewProps {
filePath: string; // PDF 文件路径
targetPage?: number; // 目标页码
charPositions?: Array<{ box: number[][], char: string, score: number }>; // 字符位置信息(用于高亮显示)
textBbox?: { x_min: number; y_min: number; x_max: number; y_max: number }; // GraphRAG段落级坐标
isStructuredView?: boolean; // 是否结构化视图
activeReviewPointResultId?: string | null; // 激活的评查点结果ID
pageOffset?: number; // 页码偏移量(用于调整 OCR 结果的页码)
@@ -49,6 +50,7 @@ export function PdfPreview({
filePath,
targetPage,
charPositions,
textBbox,
isStructuredView = false,
activeReviewPointResultId,
pageOffset = 0,
@@ -227,6 +229,18 @@ export function PdfPreview({
// ============ 处理字符位置数据,转换为高亮矩形 ============
const processCharPositionsToHighlights = () => {
// GraphRAG fallback: charPositions 为空但有 textBbox 时,用段落级坐标画高亮
if ((!charPositions || charPositions.length === 0) && textBbox && targetPage) {
const scale = zoomLevel / 100;
return {
x: textBbox.x_min * coordinateScale * scale,
y: textBbox.y_min * coordinateScale * scale,
width: (textBbox.x_max - textBbox.x_min) * coordinateScale * scale,
height: (textBbox.y_max - textBbox.y_min) * coordinateScale * scale,
text: ''
};
}
if (!charPositions || charPositions.length === 0 || !targetPage) {
return null;
}