feat(pdf): support GraphRAG text_bbox highlighting in PDF viewer

When documents are processed through GraphRAG pipeline, coordinate
enrichment produces text_bbox (paragraph-level coordinates) instead of
char_positions (character-level OCR coordinates). Added resolveCharPositions()
helper that converts text_bbox to CharPosition[] format, enabling PDF
highlight rendering for GraphRAG-processed documents.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
DocAuditAI Dev
2026-03-23 16:44:22 +08:00
parent 32bee87998
commit 33fbd6b860
5 changed files with 113 additions and 39 deletions
@@ -93,6 +93,32 @@ export interface CharPosition {
score: number; // OCR识别置信度
}
/**
* text_bbox -> CharPosition[] 转换
* GraphRAG 抽取结果只有 text_bbox (段落级坐标), 没有 char_positions (字符级坐标)。
* 将 text_bbox 转为单个 CharPosition 矩形框, 让 PdfPreview 的高亮逻辑复用。
*/
function resolveCharPositions(data: any): CharPosition[] | undefined {
// 优先用 char_positions
if (data?.char_positions && data.char_positions.length > 0) {
return data.char_positions;
}
// fallback: text_bbox -> CharPosition[]
if (data?.text_bbox) {
const b = data.text_bbox;
if (b.x_min != null && b.y_min != null && b.x_max != null && b.y_max != null
&& (b.x_max - b.x_min) > 0 && (b.y_max - b.y_min) > 0) {
return [{
box: [[b.x_min, b.y_min], [b.x_max, b.y_min], [b.x_max, b.y_max], [b.x_min, b.y_max]],
char: '',
score: 1
}];
}
}
return undefined;
}
/**
* 评查点类型定义
* 用于展示单个评查结果
@@ -1512,7 +1538,7 @@ export function ReviewPointsList({
for (const item of chain) {
if (item.data.page && typeof onReviewPointSelect === 'function') {
hasPage = true;
onReviewPointSelect(reviewPoint.id, Number(item.data.page), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPoint.id, Number(item.data.page), resolveCharPositions(item.data), item.data.value);
break;
}
}
@@ -1526,7 +1552,7 @@ export function ReviewPointsList({
// 遍历chain找到第一个有效的page
for (const item of chain) {
if (item.data.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(item.data.page), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPoint.id, Number(item.data.page), resolveCharPositions(item.data), item.data.value);
break;
}
}
@@ -1566,11 +1592,11 @@ export function ReviewPointsList({
// 假设onReviewPointSelect在作用域内可用
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, Number(item.data.page), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPointId, Number(item.data.page), resolveCharPositions(item.data), item.data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[item.field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[item.field]), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[item.field]), resolveCharPositions(item.data), item.data.value);
}
else{
toastService.error(`没有找到${item.field}对应的索引内容`);
@@ -1649,11 +1675,11 @@ export function ReviewPointsList({
if (chain[0].data.page) {
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, chain[0].data.page, chain[0].data.char_positions, chain[0].data.value);
onReviewPointSelect(reviewPointId, chain[0].data.page, resolveCharPositions(chain[0].data), chain[0].data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[chain[0].field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[0].field]), chain[0].data.char_positions, chain[0].data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[0].field]), resolveCharPositions(chain[0].data), chain[0].data.value);
}
else{
toastService.error(`没有找到${chain[0].field}对应的索引内容`);
@@ -1675,11 +1701,11 @@ export function ReviewPointsList({
if (chain[1].data.page) {
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, chain[1].data.page, chain[1].data.char_positions, chain[1].data.value);
onReviewPointSelect(reviewPointId, chain[1].data.page, resolveCharPositions(chain[1].data), chain[1].data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[chain[1].field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[1].field]), chain[1].data.char_positions, chain[1].data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[1].field]), resolveCharPositions(chain[1].data), chain[1].data.value);
}
else{
toastService.error(`没有找到${chain[1].field}对应的索引内容`);
@@ -1815,9 +1841,9 @@ export function ReviewPointsList({
onClick={(e) => {
e.stopPropagation();
if (mainTypeValue.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[fieldKey]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else{
toastService.error(`没有找到${fieldKey}对应的索引内容`);
}
@@ -1826,9 +1852,9 @@ export function ReviewPointsList({
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
if (mainTypeValue.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[fieldKey]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else{
toastService.error(`没有找到${fieldKey}对应的索引内容`);
}
@@ -1959,9 +1985,9 @@ export function ReviewPointsList({
onClick={(e) => {
e.stopPropagation();
if (value.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(value.page), value.char_positions, value.value);
onReviewPointSelect(reviewPoint.id, Number(value.page), resolveCharPositions(value), value.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[key]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), value.char_positions, value.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), resolveCharPositions(value), value.value);
}else{
toastService.error(`没有找到${key}对应的索引内容`);
}
@@ -1971,9 +1997,9 @@ export function ReviewPointsList({
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
if (value.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(value.page), value.char_positions, value.value);
onReviewPointSelect(reviewPoint.id, Number(value.page), resolveCharPositions(value), value.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[key]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), value.char_positions, value.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), resolveCharPositions(value), value.value);
}else{
toastService.error(`没有找到${key}对应的索引内容`);
}
+3 -1
View File
@@ -53,6 +53,7 @@ interface FilePreviewProps {
activeReviewPointResultId: string | null;
targetPage?: number; // 新增目标页码参数
charPositions?: Array<{ box: number[][], char: string, score: number }>; // 字符位置信息(仅用于PDF
textBbox?: { x_min: number; y_min: number; x_max: number; y_max: number }; // GraphRAG段落级坐标
highlightValue?: string; // 高亮文本值(用于DOCX
isStructuredView?: boolean; // 是否显示结构化视图
userInfo?: {
@@ -74,7 +75,7 @@ export interface FilePreviewHandle {
}
// export function FilePreview({ fileContent, reviewPoints, activeReviewPointResultId, targetPage }: FilePreviewProps) {
export const FilePreview = forwardRef<FilePreviewHandle, FilePreviewProps>(function FilePreview({ fileContent, activeReviewPointResultId, targetPage, charPositions, highlightValue, isStructuredView = false, userInfo, aiSuggestionReplace, isTemplate = false }, ref) {
export const FilePreview = forwardRef<FilePreviewHandle, FilePreviewProps>(function FilePreview({ fileContent, activeReviewPointResultId, targetPage, charPositions, textBbox, highlightValue, isStructuredView = false, userInfo, aiSuggestionReplace, isTemplate = false }, ref) {
// 获取文件类型
const real_path = fileContent.path || fileContent.template_contract_path || '';
const fileExtension = real_path.split('.').pop()?.toLowerCase();
@@ -236,6 +237,7 @@ export const FilePreview = forwardRef<FilePreviewHandle, FilePreviewProps>(funct
filePath={real_path}
targetPage={targetPage}
charPositions={charPositions}
textBbox={textBbox}
isStructuredView={isStructuredView}
activeReviewPointResultId={activeReviewPointResultId}
pageOffset={pageOffset}
+47 -21
View File
@@ -82,6 +82,32 @@ export interface CharPosition {
score: number; // OCR识别置信度
}
/**
* text_bbox -> CharPosition[] 转换
* GraphRAG 抽取结果只有 text_bbox (段落级坐标), 没有 char_positions (字符级坐标)。
* 将 text_bbox 转为单个 CharPosition 矩形框, 让 PdfPreview 的高亮逻辑复用。
*/
function resolveCharPositions(data: any): CharPosition[] | undefined {
// 优先用 char_positions
if (data?.char_positions && data.char_positions.length > 0) {
return data.char_positions;
}
// fallback: text_bbox -> CharPosition[]
if (data?.text_bbox) {
const b = data.text_bbox;
if (b.x_min != null && b.y_min != null && b.x_max != null && b.y_max != null
&& (b.x_max - b.x_min) > 0 && (b.y_max - b.y_min) > 0) {
return [{
box: [[b.x_min, b.y_min], [b.x_max, b.y_min], [b.x_max, b.y_max], [b.x_min, b.y_max]],
char: '',
score: 1
}];
}
}
return undefined;
}
/**
* 评查点类型定义
* 用于展示单个评查结果
@@ -1262,7 +1288,7 @@ export function ReviewPointsList({
for (const item of chain) {
if (item.data.page && typeof onReviewPointSelect === 'function') {
hasPage = true;
onReviewPointSelect(reviewPoint.id, Number(item.data.page), item.data.char_positions);
onReviewPointSelect(reviewPoint.id, Number(item.data.page), resolveCharPositions(item.data));
break;
}
}
@@ -1276,7 +1302,7 @@ export function ReviewPointsList({
// 遍历chain找到第一个有效的page
for (const item of chain) {
if (item.data.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(item.data.page), item.data.char_positions);
onReviewPointSelect(reviewPoint.id, Number(item.data.page), resolveCharPositions(item.data));
break;
}
}
@@ -1312,15 +1338,15 @@ export function ReviewPointsList({
onClick={(e) => {
e.stopPropagation();
if (item.data.page) {
console.log('点击了长链条评查点', item.data.char_positions, item.data);
console.log('点击了长链条评查点', resolveCharPositions(item.data), item.data);
// 假设onReviewPointSelect在作用域内可用
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, Number(item.data.page), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPointId, Number(item.data.page), resolveCharPositions(item.data), item.data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[item.field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[item.field]), item.data.char_positions, item.data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[item.field]), resolveCharPositions(item.data), item.data.value);
}
else{
toastService.error(`没有找到${item.field}对应的索引内容`);
@@ -1396,16 +1422,16 @@ export function ReviewPointsList({
${res ? 'hover:bg-[rgba(0,128,0,0.1)]' : 'hover:bg-[rgba(255,255,0,0.1)]'} transition-colors flex flex-col`}
onClick={(e) => {
e.stopPropagation();
console.log('点击了短链1左', chain[0].data.char_positions, chain[0].data)
console.log('点击了短链1左', resolveCharPositions(chain[0].data), chain[0].data)
if (chain[0].data.page) {
// console.log('点击了短链1左', chain[0].data.char_positions, chain[0].data)
// console.log('点击了短链1左', resolveCharPositions(chain[0].data), chain[0].data)
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, chain[0].data.page, chain[0].data.char_positions, chain[0].data.value);
onReviewPointSelect(reviewPointId, chain[0].data.page, resolveCharPositions(chain[0].data), chain[0].data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[chain[0].field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[0].field]), chain[0].data.char_positions,chain[0].data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[0].field]), resolveCharPositions(chain[0].data),chain[0].data.value);
}
else{
toastService.error(`没有找到${chain[0].field}对应的索引内容`);
@@ -1425,14 +1451,14 @@ export function ReviewPointsList({
onClick={(e) => {
e.stopPropagation();
if (chain[1].data.page) {
console.log('点击了短链2右', chain[1].data.char_positions, chain[1].data)
console.log('点击了短链2右', resolveCharPositions(chain[1].data), chain[1].data)
const reviewPointId = reviewPoint.id as string;
if (reviewPointId && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPointId, chain[1].data.page, chain[1].data.char_positions, chain[1].data.value);
onReviewPointSelect(reviewPointId, chain[1].data.page, resolveCharPositions(chain[1].data), chain[1].data.value);
}
}
else if(reviewPoint.contentPage && reviewPoint.contentPage[chain[1].field]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[1].field]), chain[1].data.char_positions, chain[1].data.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[chain[1].field]), resolveCharPositions(chain[1].data), chain[1].data.value);
}
else{
toastService.error(`没有找到${chain[1].field}对应的索引内容`);
@@ -1569,10 +1595,10 @@ export function ReviewPointsList({
e.stopPropagation();
if (mainTypeValue.page && typeof onReviewPointSelect === 'function') {
console.log("点击了其他评查点", mainTypeValue)
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), mainTypeValue.char_positions, mainTypeValue.value);
// onReviewPointSelect(reviewPoint.id, undefined, mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), resolveCharPositions(mainTypeValue), mainTypeValue.value);
// onReviewPointSelect(reviewPoint.id, undefined, resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[fieldKey]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[fieldKey]), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else{
toastService.error(`没有找到${fieldKey}对应的索引内容`);
}
@@ -1581,7 +1607,7 @@ export function ReviewPointsList({
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
if (mainTypeValue.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), mainTypeValue.char_positions, mainTypeValue.value);
onReviewPointSelect(reviewPoint.id, Number(mainTypeValue.page), resolveCharPositions(mainTypeValue), mainTypeValue.value);
}else{
toastService.error(`没有找到${fieldKey}对应的索引内容`);
}
@@ -1714,10 +1740,10 @@ export function ReviewPointsList({
onClick={(e) => {
e.stopPropagation();
if (value.page && typeof onReviewPointSelect === 'function') {
console.log("点击了大模型的评查点", value.char_positions, value)
onReviewPointSelect(reviewPoint.id, Number(value.page), value.char_positions, value.value);
console.log("点击了大模型的评查点", resolveCharPositions(value), value)
onReviewPointSelect(reviewPoint.id, Number(value.page), resolveCharPositions(value), value.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[key]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), value.char_positions,value.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), resolveCharPositions(value),value.value);
}else{
toastService.error(`没有找到${key}对应的索引内容`);
}
@@ -1727,9 +1753,9 @@ export function ReviewPointsList({
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
if (value.page && typeof onReviewPointSelect === 'function') {
onReviewPointSelect(reviewPoint.id, Number(value.page), value.char_positions, value.value);
onReviewPointSelect(reviewPoint.id, Number(value.page), resolveCharPositions(value), value.value);
}else if(reviewPoint.contentPage && reviewPoint.contentPage[key]){
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), value.char_positions,value.value);
onReviewPointSelect(reviewPoint.id, Number(reviewPoint.contentPage[key]), resolveCharPositions(value),value.value);
}else{
toastService.error(`没有找到${key}对应的索引内容`);
}
@@ -38,6 +38,7 @@ interface PdfPreviewProps {
filePath: string; // PDF 文件路径
targetPage?: number; // 目标页码
charPositions?: Array<{ box: number[][], char: string, score: number }>; // 字符位置信息(用于高亮显示)
textBbox?: { x_min: number; y_min: number; x_max: number; y_max: number }; // GraphRAG段落级坐标
isStructuredView?: boolean; // 是否结构化视图
activeReviewPointResultId?: string | null; // 激活的评查点结果ID
pageOffset?: number; // 页码偏移量(用于调整 OCR 结果的页码)
@@ -49,6 +50,7 @@ export function PdfPreview({
filePath,
targetPage,
charPositions,
textBbox,
isStructuredView = false,
activeReviewPointResultId,
pageOffset = 0,
@@ -227,6 +229,18 @@ export function PdfPreview({
// ============ 处理字符位置数据,转换为高亮矩形 ============
const processCharPositionsToHighlights = () => {
// GraphRAG fallback: charPositions 为空但有 textBbox 时,用段落级坐标画高亮
if ((!charPositions || charPositions.length === 0) && textBbox && targetPage) {
const scale = zoomLevel / 100;
return {
x: textBbox.x_min * coordinateScale * scale,
y: textBbox.y_min * coordinateScale * scale,
width: (textBbox.x_max - textBbox.x_min) * coordinateScale * scale,
height: (textBbox.y_max - textBbox.y_min) * coordinateScale * scale,
text: ''
};
}
if (!charPositions || charPositions.length === 0 || !targetPage) {
return null;
}
+7 -1
View File
@@ -386,6 +386,7 @@ export default function ReviewDetails() {
const [targetPage, setTargetPage] = useState<number | undefined>(undefined);
const [templateTargetPage, setTemplateTargetPage] = useState<number | undefined>(undefined);
const [charPositions, setCharPositions] = useState<Array<{ box: number[][], char: string, score: number }> | undefined>(undefined);
const [textBbox, setTextBbox] = useState<{ x_min: number; y_min: number; x_max: number; y_max: number } | undefined>(undefined);
const [highlightValue, setHighlightValue] = useState<string | undefined>(undefined);
const [pendingUpdate, setPendingUpdate] = useState<{
reviewPointResultId: string;
@@ -551,17 +552,19 @@ export default function ReviewDetails() {
setActiveTab(tabKey);
};
const handleReviewPointSelect = (reviewPointId: string, page?: number, charPos?: Array<{ box: number[][], char: string, score: number }>, value?: string) => {
const handleReviewPointSelect = (reviewPointId: string, page?: number, charPos?: Array<{ box: number[][], char: string, score: number }>, value?: string, bbox?: { x_min: number; y_min: number; x_max: number; y_max: number }) => {
// 如果点击的是相同的评查点,但有page参数,先重置targetPage以确保useEffect能够触发
if (reviewPointId === activeReviewPointResultId && page) {
setTargetPage(undefined);
setCharPositions(undefined);
setTextBbox(undefined);
setHighlightValue(undefined);
// 使用setTimeout确保状态更新后再设置新的targetPage、charPositions和highlightValue
setTimeout(() => {
setActiveReviewPointResultId(reviewPointId);
setTargetPage(page);
setCharPositions(charPos);
setTextBbox(bbox);
setHighlightValue(value);
}, 0);
} else {
@@ -569,6 +572,7 @@ export default function ReviewDetails() {
setActiveReviewPointResultId(reviewPointId);
setTargetPage(page);
setCharPositions(charPos);
setTextBbox(bbox);
setHighlightValue(value);
}
};
@@ -966,6 +970,7 @@ export default function ReviewDetails() {
activeReviewPointResultId={activeReviewPointResultId}
targetPage={targetPage}
charPositions={charPositions}
textBbox={textBbox}
highlightValue={highlightValue}
userInfo={loaderData.userInfo}
aiSuggestionReplace={aiSuggestionReplace}
@@ -1019,6 +1024,7 @@ export default function ReviewDetails() {
activeReviewPointResultId={activeReviewPointResultId}
targetPage={targetPage}
charPositions={charPositions}
textBbox={textBbox}
/>
</div>