feat: add backend rule group and permission support
This commit is contained in:
@@ -9,7 +9,9 @@ import tempfile
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import fitz
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
from leaudit.converters import doc2pdf
|
||||
|
||||
from fastapi_admin.celery_app import celery_app
|
||||
from fastapi_admin.config import (
|
||||
@@ -70,6 +72,7 @@ def leaudit_process_document(
|
||||
rules_resolution = _resolve_rules_runtime(document_id, run_id, rules_path, loop)
|
||||
loop.run_until_complete(_update_run_status_safe(run_id, "running"))
|
||||
rules_path_resolved = rules_resolution["rules_path"]
|
||||
attachment_inputs = list((upload_info or {}).get("attachments") or [])
|
||||
|
||||
rules_file = None
|
||||
if rules_path_resolved:
|
||||
@@ -88,11 +91,31 @@ def leaudit_process_document(
|
||||
"will classify from document content after OCR"
|
||||
)
|
||||
|
||||
suffix = _get_suffix(filename)
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as temp:
|
||||
temp.write(file_content)
|
||||
temp_path = temp.name
|
||||
temp_paths.append(temp_path)
|
||||
attachment_metadata: list[dict[str, Any]] = []
|
||||
for attachment in attachment_inputs:
|
||||
attachment_metadata.append(
|
||||
{
|
||||
"fileId": attachment.get("file_id"),
|
||||
"fileName": attachment.get("file_name") or attachment.get("filename"),
|
||||
"sourceType": attachment.get("source_type"),
|
||||
"sourcePath": attachment.get("source_path"),
|
||||
}
|
||||
)
|
||||
|
||||
temp_path, merged_attachment_metadata = _prepare_execution_input(
|
||||
filename=filename,
|
||||
file_content=file_content,
|
||||
attachments=attachment_inputs,
|
||||
temp_paths=temp_paths,
|
||||
)
|
||||
if merged_attachment_metadata:
|
||||
for item in attachment_metadata:
|
||||
for merged in merged_attachment_metadata:
|
||||
if item["fileId"] == merged["fileId"]:
|
||||
item["localPath"] = merged["localPath"]
|
||||
item["mergedPdfPath"] = merged["mergedPdfPath"]
|
||||
break
|
||||
log.info("[任务ID: %s] 已装配并合并附件 %s 个到单输入文件", task_id, len(merged_attachment_metadata))
|
||||
|
||||
runner = NativeRunner()
|
||||
|
||||
@@ -104,7 +127,11 @@ def leaudit_process_document(
|
||||
run_id=run_id,
|
||||
document_id=document_id,
|
||||
rule_version_id=_optional_int(upload_info, "rule_version_id", "ruleVersionId"),
|
||||
extras={"taskId": task_id},
|
||||
extras={
|
||||
"taskId": task_id,
|
||||
"attachments": attachment_metadata,
|
||||
"mergedInput": bool(merged_attachment_metadata),
|
||||
},
|
||||
),
|
||||
local_file_path=temp_path,
|
||||
rules_file=rules_file,
|
||||
@@ -203,6 +230,7 @@ def leaudit_process_document_by_run(
|
||||
"source_type": context["source_type"],
|
||||
"source_path": context["source_path"],
|
||||
"trigger_source": context["trigger_source"],
|
||||
"attachments": context.get("attachments") or [],
|
||||
},
|
||||
rules_path=rules_path,
|
||||
)
|
||||
@@ -443,6 +471,17 @@ async def _load_run_context(run_id: int) -> dict[str, Any]:
|
||||
|
||||
resolver = FileSourceResolver()
|
||||
payload = await resolver.ResolvePayload(document_file)
|
||||
attachmentResult = await session.execute(
|
||||
select(LeauditDocumentFile)
|
||||
.where(
|
||||
LeauditDocumentFile.documentId == document.Id,
|
||||
LeauditDocumentFile.isActive.is_(True),
|
||||
LeauditDocumentFile.fileRole == "attachment",
|
||||
)
|
||||
.order_by(LeauditDocumentFile.Id.asc())
|
||||
)
|
||||
attachmentFiles = list(attachmentResult.scalars().all())
|
||||
attachmentPayloads = await resolver.ResolvePayloads(attachmentFiles) if attachmentFiles else []
|
||||
|
||||
return {
|
||||
"document_id": document.Id,
|
||||
@@ -453,6 +492,16 @@ async def _load_run_context(run_id: int) -> dict[str, Any]:
|
||||
"rule_version_id": run.ruleVersionId,
|
||||
"rule_source_oss_url": run.ruleSourceOssUrl,
|
||||
"trigger_source": run.triggerSource,
|
||||
"attachments": [
|
||||
{
|
||||
"file_id": attachmentFile.Id,
|
||||
"file_name": attachmentPayload.fileName,
|
||||
"file_content": attachmentPayload.fileContent,
|
||||
"source_type": attachmentPayload.sourceType,
|
||||
"source_path": attachmentPayload.sourcePath,
|
||||
}
|
||||
for attachmentFile, attachmentPayload in zip(attachmentFiles, attachmentPayloads)
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -462,6 +511,92 @@ def _get_suffix(filename: str) -> str:
|
||||
return ext if ext else ".pdf"
|
||||
|
||||
|
||||
def _write_temp_file(*, filename: str, content: bytes, temp_paths: list[str]) -> str:
|
||||
"""Write bytes to a temp file preserving original suffix."""
|
||||
suffix = _get_suffix(filename)
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as temp:
|
||||
temp.write(content)
|
||||
temp_path = temp.name
|
||||
temp_paths.append(temp_path)
|
||||
return temp_path
|
||||
|
||||
|
||||
def _convert_to_pdf_path(*, source_path: str, temp_paths: list[str]) -> str:
|
||||
"""Convert a local source file to PDF temp path when needed."""
|
||||
source = Path(source_path)
|
||||
if source.suffix.lower() == ".pdf":
|
||||
return source_path
|
||||
pdf_temp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
||||
pdf_temp.close()
|
||||
temp_paths.append(pdf_temp.name)
|
||||
doc2pdf.convert(source, pdf_temp.name, soffice="auto", pdfa=False, force=True, verify=False)
|
||||
return pdf_temp.name
|
||||
|
||||
|
||||
def _merge_pdf_paths(*, pdf_paths: list[str], temp_paths: list[str]) -> str:
|
||||
"""Merge many pdf files into one temp pdf."""
|
||||
merged_temp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
||||
merged_temp.close()
|
||||
temp_paths.append(merged_temp.name)
|
||||
output = fitz.open()
|
||||
try:
|
||||
for pdf_path in pdf_paths:
|
||||
src = fitz.open(pdf_path)
|
||||
try:
|
||||
output.insert_pdf(src)
|
||||
finally:
|
||||
src.close()
|
||||
output.save(merged_temp.name)
|
||||
finally:
|
||||
output.close()
|
||||
return merged_temp.name
|
||||
|
||||
|
||||
def _prepare_execution_input(
|
||||
*,
|
||||
filename: str,
|
||||
file_content: bytes,
|
||||
attachments: list[dict[str, Any]],
|
||||
temp_paths: list[str],
|
||||
) -> tuple[str, list[dict[str, Any]]]:
|
||||
"""Prepare the actual execution input file.
|
||||
|
||||
Without attachments: use the main file directly.
|
||||
With attachments: convert main file and attachments to PDFs, merge into one PDF.
|
||||
"""
|
||||
main_local_path = _write_temp_file(filename=filename, content=file_content, temp_paths=temp_paths)
|
||||
if not attachments:
|
||||
return main_local_path, []
|
||||
|
||||
main_pdf_path = _convert_to_pdf_path(source_path=main_local_path, temp_paths=temp_paths)
|
||||
merged_attachment_metadata: list[dict[str, Any]] = []
|
||||
attachment_pdf_paths: list[str] = []
|
||||
for attachment in attachments:
|
||||
attachment_name = str(attachment.get("file_name") or attachment.get("filename") or "attachment.bin")
|
||||
attachment_content = attachment["file_content"]
|
||||
attachment_local_path = _write_temp_file(
|
||||
filename=attachment_name,
|
||||
content=attachment_content,
|
||||
temp_paths=temp_paths,
|
||||
)
|
||||
attachment_pdf_path = _convert_to_pdf_path(source_path=attachment_local_path, temp_paths=temp_paths)
|
||||
attachment_pdf_paths.append(attachment_pdf_path)
|
||||
merged_attachment_metadata.append(
|
||||
{
|
||||
"fileId": attachment.get("file_id"),
|
||||
"fileName": attachment_name,
|
||||
"localPath": attachment_local_path,
|
||||
"mergedPdfPath": attachment_pdf_path,
|
||||
}
|
||||
)
|
||||
|
||||
merged_pdf_path = _merge_pdf_paths(
|
||||
pdf_paths=[main_pdf_path, *attachment_pdf_paths],
|
||||
temp_paths=temp_paths,
|
||||
)
|
||||
return merged_pdf_path, merged_attachment_metadata
|
||||
|
||||
|
||||
def dispatch_leaudit_task(
|
||||
run_id: int,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user