Files
leaudit-platform-frontend/app/routes/rules.new1.tsx
T
2025-04-17 16:34:20 +08:00

622 lines
24 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { useState, useEffect, useRef } from "react";
import { useLoaderData } from "@remix-run/react";
import { Document, Page, pdfjs } from "react-pdf";
import type { LoaderFunctionArgs } from "@remix-run/node";
import mammoth from "mammoth";
// 设置 pdfjs 工作线程
pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.js`;
// 模拟后端返回的抽取内容数据
const mockExtractedContent = [
{ id: 1, text: "合同条款", page: 2, position: { start: 50, end: 60 } },
{ id: 2, text: "签署日期", page: 5, position: { start: 120, end: 130 } },
{ id: 3, text: "责任划分", page: 3, position: { start: 80, end: 90 } },
];
interface ExtractedContent {
id: number;
text: string;
page: number;
position: { start: number; end: number };
}
interface LoaderData {
fileUrl: string;
initialPage: number;
extractedContent: ExtractedContent[];
fileType: "pdf" | "docx";
urls: Record<string, string>;
}
// 定义文档加载成功回调类型
interface DocumentLoadSuccess {
numPages: number;
}
// 根据URL判断文件类型
function getFileTypeFromUrl(url: string): "pdf" | "docx" {
const lowerCaseUrl = url.toLowerCase();
if (lowerCaseUrl.endsWith(".pdf")) {
return "pdf";
} else if (lowerCaseUrl.endsWith(".docx") || lowerCaseUrl.endsWith(".doc")) {
return "docx";
}
// 默认当作PDF处理
return "pdf";
}
// Remix Loader 函数
export const loader = async ({ request }: LoaderFunctionArgs) => {
const url = new URL(request.url);
const page = url.searchParams.get("page") || 1;
// 实际文档 URL (PDF示例)
// const fileUrl = "http://172.18.0.100:9000/docauditai/documents/%E5%90%88%E5%90%8C%E6%96%87%E6%A1%A3/2025/04%E6%9C%8816%E6%97%A5/%E7%AC%AC16%E5%8F%B7--%E9%94%80%E5%94%AE%E6%97%A0%E6%A0%87%E5%BF%97%E5%A4%96%E5%9B%BD%E5%8D%B7%E7%83%9F_10%E6%97%B626%E5%88%8632%E7%A7%92/%E7%AC%AC16%E5%8F%B7--%E9%94%80%E5%94%AE%E6%97%A0%E6%A0%87%E5%BF%97%E5%A4%96%E5%9B%BD%E5%8D%B7%E7%83%9F.pdf";
// 示例文档URLs
const urls = {
// 1. 原始文档URL - 可能有CORS限制
original: "https://dev-xc-enroll.oss-cn-guangzhou.aliyuncs.com/uploads/7840-230620112939.docx",
// 2. 公开示例文档 - 仍可能有CORS限制
public: "https://dev-xc-enroll.oss-cn-guangzhou.aliyuncs.com/uploads/7840-230620112939.docx",
// 3. 通过CORS代理 (示例)
proxy: "https://dev-xc-enroll.oss-cn-guangzhou.aliyuncs.com/uploads/7840-230620112939.docx",
// 4. 本地服务器上的文档 (假设已经部署)
local: "/uploads/sample.docx",
// 5. PDF示例 (如果Word文档问题无法解决)
pdf: "http://172.18.0.100:9000/docauditai/documents/%E5%90%88%E5%90%8C%E6%96%87%E6%A1%A3/2025/04%E6%9C%8816%E6%97%A5/%E7%AC%AC16%E5%8F%B7--%E9%94%80%E5%94%AE%E6%97%A0%E6%A0%87%E5%BF%97%E5%A4%96%E5%9B%BD%E5%8D%B7%E7%83%9F_10%E6%97%B626%E5%88%8632%E7%A7%92/%E7%AC%AC16%E5%8F%B7--%E9%94%80%E5%94%AE%E6%97%A0%E6%A0%87%E5%BF%97%E5%A4%96%E5%9B%BD%E5%8D%B7%E7%83%9F.pdf"
};
// 使用本地文档或通过CORS代理的URL
const fileUrl = urls.public; // 可以切换到其他URL进行测试
// 判断文件类型
const fileType = getFileTypeFromUrl(fileUrl);
return {
fileUrl,
initialPage: Number(page),
extractedContent: mockExtractedContent,
fileType,
urls // 传递所有URL供前端选择
};
};
export default function Documents() {
const { fileUrl, extractedContent, fileType, urls } = useLoaderData<LoaderData>();
const [numPages, setNumPages] = useState<number | null>(null);
const [scrollToPage, setScrollToPage] = useState<number | null>(null);
const [docxLoading, setDocxLoading] = useState(false); // 设置为false以避免加载指示器
const [loadError, setLoadError] = useState<string | null>(null);
const [debugInfo, setDebugInfo] = useState<string[]>([]);
const docxContainerRef = useRef<HTMLDivElement>(null);
const [docxContentPositions, setDocxContentPositions] = useState<{[id: number]: number}>({});
const [currentUrl, setCurrentUrl] = useState<string>(fileUrl);
// 默认使用iframe模式
const [showIframe, setShowIframe] = useState<boolean>(true);
const [docxHtml, setDocxHtml] = useState<string>("");
// 处理抽取内容点击
const handleContentClick = (item: ExtractedContent) => {
setScrollToPage(item.page);
if (fileType === "pdf") {
// 使用ID滚动到指定页面
const pageElement = document.getElementById(`page-${item.page}`);
if (pageElement) {
pageElement.scrollIntoView({ behavior: 'smooth' });
}
} else if (fileType === "docx" && !showIframe) {
// 对于Word文档,滚动到提取内容位置 (仅本地渲染模式)
const position = docxContentPositions[item.id];
if (position !== undefined && docxContainerRef.current) {
// 找到Word内容容器内的位置并滚动
docxContainerRef.current.scrollTop = position;
// 高亮显示这个区域(模拟)
highlightDocxContent(item);
}
} else if (fileType === "docx" && showIframe) {
// 对于iframe中的Word文档,我们只能切换到特定iframe页面
// 这里我们无法控制iframe内部的滚动,只能提示用户
addDebugInfo(`在iframe中无法直接定位到"${item.text}",请在文档中手动查找`);
}
};
// 模拟在Word文档中高亮内容
const highlightDocxContent = (item: ExtractedContent) => {
// 移除之前的高亮
const previousHighlights = document.querySelectorAll('.docx-highlight');
previousHighlights.forEach(el => el.classList.remove('docx-highlight'));
// 由于我们没有确切的位置信息,这里使用一个模拟的方法
// 实际项目中,您需要一个更精确的方法来找到文本位置
if (docxContainerRef.current) {
const textNodes = Array.from(docxContainerRef.current.querySelectorAll('p, span, div'))
.filter(node => node.textContent?.includes(item.text));
textNodes.forEach(node => {
node.classList.add('docx-highlight');
});
}
};
// PDF文档加载成功回调
function onDocumentLoadSuccess({ numPages }: DocumentLoadSuccess) {
setNumPages(numPages);
console.log("PDF加载成功,页数:", numPages);
}
// 简化的调试日志
const addDebugInfo = (info: string) => {
console.log(info);
setDebugInfo(prev => [...prev, `${new Date().toISOString().split('T')[1].split('.')[0]}: ${info}`]);
};
// 切换到不同的文档URL
const switchDocumentUrl = (urlKey: keyof typeof urls) => {
setCurrentUrl(urls[urlKey]);
setDebugInfo([]);
setLoadError(null);
setDocxLoading(false);
setShowIframe(true);
addDebugInfo(`切换到新的文档URL: ${urls[urlKey]}`);
};
// 切换到iframe模式 (当直接加载文档有CORS问题时)
const switchToIframeMode = () => {
setShowIframe(true);
setDocxLoading(false);
addDebugInfo("切换到iframe嵌入模式");
};
// 使用mammoth处理Word文档
useEffect(() => {
if (fileType === "docx" && docxContainerRef.current && !showIframe) {
setDocxLoading(true);
setDebugInfo([]); // 清空之前的调试信息
addDebugInfo(`准备加载Word文档: ${currentUrl}`);
const loadDocx = async () => {
try {
// 获取文件
addDebugInfo(`开始获取文件...`);
let response;
try {
response = await fetch(currentUrl, {
// 添加CORS相关选项
mode: 'cors',
credentials: 'omit',
headers: {
'Access-Control-Allow-Origin': '*'
}
});
addDebugInfo(`fetch请求状态: ${response.status} ${response.statusText}`);
} catch (fetchError) {
addDebugInfo(`fetch请求失败: ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
throw new Error(`网络请求失败: ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
}
if (!response.ok) {
throw new Error(`文档无法访问,状态码: ${response.status}`);
}
addDebugInfo(`文档下载成功,状态码: ${response.status}`);
// 转换为ArrayBuffer
addDebugInfo(`开始读取响应内容为ArrayBuffer...`);
let buffer;
try {
buffer = await response.arrayBuffer();
addDebugInfo(`获取到文档数据,大小: ${buffer.byteLength} 字节`);
} catch (bufferError) {
addDebugInfo(`读取为ArrayBuffer失败: ${bufferError instanceof Error ? bufferError.message : String(bufferError)}`);
throw new Error(`转换文档内容失败: ${bufferError instanceof Error ? bufferError.message : String(bufferError)}`);
}
// 使用mammoth.js将Word转换为HTML,添加自定义选项
addDebugInfo("使用mammoth开始转换文档为HTML...");
try {
// 添加自定义样式映射
const styleMap = `
p[style-name='Heading 1'] => h1:fresh
p[style-name='Heading 2'] => h2:fresh
p[style-name='Title'] => h1.title:fresh
p[style-name='Subtitle'] => h2.subtitle:fresh
table => table.docx-table
`;
// 创建简化版的转换选项
const options = {
arrayBuffer: buffer,
styleMap: styleMap,
includeDefaultStyleMap: true
};
const result = await mammoth.convertToHtml(options);
// 检查转换警告
if (result.messages.length > 0) {
result.messages.forEach(message => {
addDebugInfo(`转换警告: [${message.type}] ${message.message}`);
});
}
addDebugInfo("文档转换成功,获取到HTML内容");
// 为生成的HTML文档添加包装容器和样式
const enhancedHtml = `
<div class="document-container">
${result.value}
<div class="format-note">
<p>注意:本地转换使用了简化版格式,一些高级格式(如页眉页脚、复杂表格格式)可能无法完全显示。</p>
<p>如需查看完整格式,请使用"嵌入模式"或下载文档。</p>
</div>
</div>
`;
// 存储HTML内容
setDocxHtml(enhancedHtml);
// 查找匹配的内容并创建位置映射
setTimeout(() => {
try {
if (docxContainerRef.current) {
const positionsMap: {[id: number]: number} = {};
extractedContent.forEach((item) => {
// 在HTML内容中查找文本
// 使用更安全的查询方式
if (docxContainerRef.current) {
// 获取所有可能包含文本的元素
const elements = docxContainerRef.current.querySelectorAll('p, h1, h2, h3, h4, h5, h6, li, td, th, span');
// 转为数组并过滤包含目标文本的元素
const textElements = Array.from(elements).filter(element =>
element.textContent?.includes(item.text)
);
if (textElements.length > 0) {
// 使用找到的第一个元素的位置
const element = textElements[0];
const rect = element.getBoundingClientRect();
const containerRect = docxContainerRef.current.getBoundingClientRect();
// 计算相对于容器的位置
positionsMap[item.id] = rect.top - containerRect.top + docxContainerRef.current.scrollTop;
// 标记找到的元素
element.classList.add('docx-content-found');
}
}
});
setDocxContentPositions(positionsMap);
addDebugInfo(`已创建 ${Object.keys(positionsMap).length} 个内容位置映射`);
}
} catch (positionError) {
addDebugInfo(`创建位置映射时出错: ${positionError instanceof Error ? positionError.message : String(positionError)}`);
}
}, 500);
setDocxLoading(false);
} catch (mammothError) {
addDebugInfo(`Mammoth转换失败: ${mammothError instanceof Error ? mammothError.message : String(mammothError)}`);
throw new Error(`Word转HTML失败: ${mammothError instanceof Error ? mammothError.message : String(mammothError)}`);
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
addDebugInfo(`文档处理错误: ${errorMessage}`);
setLoadError(`加载Word文档失败: ${errorMessage}`);
setDocxLoading(false);
}
};
loadDocx();
}
}, [currentUrl, fileType, extractedContent, showIframe]);
// 页面渲染完成后检查是否需要滚动
useEffect(() => {
if (scrollToPage && fileType === "pdf") {
const pageElement = document.getElementById(`page-${scrollToPage}`);
if (pageElement) {
pageElement.scrollIntoView({ behavior: 'smooth' });
}
setScrollToPage(null);
}
}, [scrollToPage, fileType]);
// 生成所有PDF页面的数组
const renderAllPages = () => {
if (!numPages) return null;
const pages = [];
for (let i = 1; i <= numPages; i++) {
pages.push(
<div key={i} id={`page-${i}`} className="mb-6">
<div className="text-center text-gray-500 text-sm mb-2"> {i} </div>
<Page
pageNumber={i}
renderTextLayer={true}
renderAnnotationLayer={true}
className="border border-gray-300 shadow-md"
/>
</div>
);
}
return pages;
};
return (
<div className="flex min-h-screen bg-gray-50 p-6">
{/* 文档展示区域 */}
<div className="flex-1 mr-6">
<div className="bg-white p-4 rounded-lg shadow-md">
<h1 className="text-2xl font-bold mb-4"> ({fileType.toUpperCase()})</h1>
{fileType === "docx" && (
<div className="bg-gray-100 p-3 mb-4 rounded flex flex-col">
<div className="flex justify-between items-center mb-2">
<p className="text-sm text-gray-600">Word文档预览模式</p>
<div className="flex gap-2">
<button
onClick={() => setShowIframe(!showIframe)}
className={`px-3 py-1 text-sm rounded ${showIframe ? 'bg-gray-200' : 'bg-blue-500 text-white'}`}
>
{showIframe ? "尝试本地渲染" : "使用嵌入模式"}
</button>
<button
onClick={() => window.open(currentUrl, '_blank')}
className="px-3 py-1 bg-gray-500 text-white text-sm rounded"
>
</button>
</div>
</div>
{!showIframe && (
<div className="text-xs text-gray-500 bg-yellow-50 p-2 rounded">
<p></p>
<ul className="list-disc pl-5 mt-1">
<li>使mammoth.js库将Word文档转换为HTML</li>
<li></li>
<li>使Google Docs提供原生渲染</li>
</ul>
</div>
)}
</div>
)}
<div className="w-full h-[80vh] overflow-auto bg-gray-100 rounded-lg p-4">
{loadError ? (
<div className="text-red-500 flex flex-col items-center justify-center h-full">
<p className="mb-4">:</p>
<p>{loadError}</p>
<div className="mt-6 p-4 bg-gray-800 text-green-400 rounded text-xs max-w-xl overflow-auto max-h-96">
<p className="font-bold mb-2">:</p>
{debugInfo.map((info, index) => (
<div key={index} className="mb-1">{info}</div>
))}
</div>
<div className="mt-4">
<p className="text-black mb-2">:</p>
<div className="flex flex-wrap gap-2">
<button onClick={() => switchDocumentUrl('public')} className="px-3 py-1 bg-green-500 text-white rounded">
使
</button>
<button onClick={() => switchDocumentUrl('proxy')} className="px-3 py-1 bg-blue-500 text-white rounded">
使CORS代理
</button>
<button onClick={() => switchToIframeMode()} className="px-3 py-1 bg-purple-500 text-white rounded">
使iframe嵌入
</button>
<button onClick={() => switchDocumentUrl('pdf')} className="px-3 py-1 bg-yellow-500 text-white rounded">
PDF
</button>
<a href={currentUrl} className="px-3 py-1 bg-gray-500 text-white rounded" download target="_blank" rel="noreferrer">
</a>
</div>
</div>
</div>
) : fileType === "pdf" ? (
<Document
file={currentUrl}
onLoadSuccess={onDocumentLoadSuccess}
onLoadError={(error) => {
console.error("PDF加载错误:", error);
setLoadError("PDF文档加载失败:" + (error.message || "未知错误"));
}}
className="flex flex-col items-center"
error={<div className="text-red-500">PDF文档加载失败</div>}
noData={<div></div>}
loading={<div className="text-center py-10">PDF加载中...</div>}
>
{renderAllPages()}
</Document>
) : (
<>
{docxLoading ? (
<div className="flex flex-col items-center justify-center h-full">
<div className="mb-6">
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-blue-500"></div>
</div>
<p className="mb-4 text-lg">Word文档加载中...</p>
{debugInfo.length > 0 && (
<div className="mt-4 p-4 bg-gray-800 text-green-400 rounded text-xs max-w-xl overflow-auto max-h-72">
<p className="font-bold mb-2">:</p>
{debugInfo.map((info, index) => (
<div key={index} className="mb-1">{info}</div>
))}
</div>
)}
</div>
) : showIframe ? (
// 嵌入模式显示Word文档
<div className="w-full h-full">
<iframe
src={`https://docs.google.com/viewer?url=${encodeURIComponent(currentUrl)}&embedded=true`}
className="w-full h-full"
frameBorder="0"
title="谷歌文档查看器"
/>
</div>
) : (
// 本地渲染模式 (只有用户特别点击按钮才显示)
<div
ref={docxContainerRef}
className="w-full h-full"
style={{
height: '100%',
overflowY: 'auto',
padding: '20px',
backgroundColor: 'white'
}}
dangerouslySetInnerHTML={{ __html: docxHtml }}
/>
)}
</>
)}
</div>
</div>
</div>
{/* 抽取内容区域 */}
<div className="w-80 bg-white p-4 rounded-lg shadow-md">
<h2 className="text-xl font-semibold mb-4"></h2>
<ul className="space-y-3">
{extractedContent.map((item) => (
<button
key={item.id}
onClick={() => handleContentClick(item)}
className="w-full text-left p-3 bg-gray-50 hover:bg-gray-100 cursor-pointer rounded-lg transition"
aria-label={`查看内容: ${item.text}`}
>
<p className="text-sm font-medium">{item.text}</p>
<p className="text-xs text-gray-500">: {item.page}</p>
</button>
))}
</ul>
</div>
{/* 添加自定义样式 */}
<style dangerouslySetInnerHTML={{
__html: `
.docx-highlight {
background-color: #ffff00;
outline: 2px solid orange;
position: relative;
}
/* 找到的内容高亮 */
.docx-content-found {
background-color: rgba(255, 230, 0, 0.3);
outline: 1px solid orange;
}
/* Mammoth.js生成的内容样式 */
.document-container {
font-family: "Microsoft YaHei", Arial, sans-serif;
line-height: 1.5;
color: #333;
max-width: 800px;
margin: 0 auto;
}
.document-container .format-note {
margin-top: 30px;
padding: 10px;
background-color: #f5f5f5;
border-left: 3px solid #ccc;
font-size: 12px;
color: #666;
}
.document-container h1 {
font-size: 24px;
margin-top: 24px;
margin-bottom: 16px;
font-weight: bold;
color: #222;
}
.document-container h1.title {
font-size: 28px;
text-align: center;
margin-bottom: 24px;
}
.document-container h2 {
font-size: 20px;
margin-top: 20px;
margin-bottom: 14px;
font-weight: bold;
color: #333;
}
.document-container h2.subtitle {
font-size: 18px;
text-align: center;
margin-bottom: 20px;
color: #555;
}
.document-container p {
margin-bottom: 16px;
text-align: justify;
overflow-wrap: break-word;
}
.document-container table {
border-collapse: collapse;
width: 100%;
margin-bottom: 16px;
}
.document-container table.docx-table {
border: 1px solid #ddd;
margin: 16px 0;
}
.document-container table.docx-table th,
.document-container table.docx-table td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
.document-container table.docx-table th {
background-color: #f2f2f2;
font-weight: bold;
}
.document-container ul, .document-container ol {
margin-left: 20px;
margin-bottom: 16px;
}
.document-container li {
margin-bottom: 5px;
}
.document-container img {
max-width: 100%;
height: auto;
margin: 10px 0;
}
.document-container span.underline {
text-decoration: underline;
}
.document-container span.strikethrough {
text-decoration: line-through;
}
/* 段落缩进 */
.document-container p:not(.no-indent) {
text-indent: 2em;
}
`
}} />
</div>
);
}