71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
"""文档文件来源解析器。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from fastapi_common.fastapi_common_logger import logger
|
|
from fastapi_common.fastapi_common_storage.oss_client import OssClient
|
|
|
|
from fastapi_modules.fastapi_leaudit.models.leauditDocumentFile import LeauditDocumentFile
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FileSourcePayload:
|
|
"""可供执行链消费的文件载荷。"""
|
|
|
|
fileName: str
|
|
fileContent: bytes
|
|
sourceType: str
|
|
sourcePath: str | None = None
|
|
|
|
|
|
class FileSourceResolver:
|
|
"""解析文档文件来源。"""
|
|
|
|
def __init__(self, Oss: OssClient | None = None) -> None:
|
|
self.Oss = Oss or OssClient()
|
|
|
|
async def ResolvePayload(self, DocumentFile: LeauditDocumentFile) -> FileSourcePayload:
|
|
"""解析文档文件,返回任务入口可直接消费的字节载荷。"""
|
|
if DocumentFile.localPath:
|
|
LocalPath = Path(DocumentFile.localPath)
|
|
if LocalPath.is_file():
|
|
return FileSourcePayload(
|
|
fileName=DocumentFile.fileName,
|
|
fileContent=LocalPath.read_bytes(),
|
|
sourceType="local",
|
|
sourcePath=str(LocalPath),
|
|
)
|
|
|
|
if DocumentFile.ossUrl:
|
|
return await self._DownloadFromUrl(
|
|
FileName=DocumentFile.fileName,
|
|
Url=DocumentFile.ossUrl,
|
|
)
|
|
|
|
raise ValueError("当前文档文件既无可用 localPath,也无可用 ossUrl")
|
|
|
|
async def ResolvePayloads(self, DocumentFiles: list[LeauditDocumentFile]) -> list[FileSourcePayload]:
|
|
"""批量解析文档文件。"""
|
|
payloads: list[FileSourcePayload] = []
|
|
for documentFile in DocumentFiles:
|
|
payloads.append(await self.ResolvePayload(documentFile))
|
|
return payloads
|
|
|
|
async def _DownloadFromUrl(self, FileName: str, Url: str) -> FileSourcePayload:
|
|
"""从 OSS 或 URL 下载文件内容。"""
|
|
try:
|
|
Content = self.Oss.DownloadBytes(Url)
|
|
except Exception as Error:
|
|
logger.error(f"下载 OSS 文件失败: url={Url}, error={Error}")
|
|
raise
|
|
|
|
return FileSourcePayload(
|
|
fileName=FileName,
|
|
fileContent=Content,
|
|
sourceType="oss",
|
|
sourcePath=Url,
|
|
)
|