feat: add tenant-scoped rule and permission management
This commit is contained in:
@@ -27,6 +27,64 @@ log = logging.getLogger(__name__)
|
||||
class StorageAdapter:
|
||||
"""Write leaudit pipeline results to leaudit_platform database."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._column_exists_cache: dict[str, bool] = {}
|
||||
|
||||
async def _column_exists(self, session, table_name: str, column_name: str) -> bool:
|
||||
cache_key = f"{table_name}.{column_name}"
|
||||
cached = self._column_exists_cache.get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
exists = bool(
|
||||
(
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = current_schema()
|
||||
AND table_name = :table_name
|
||||
AND column_name = :column_name
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"table_name": table_name, "column_name": column_name},
|
||||
)
|
||||
).scalar_one()
|
||||
)
|
||||
self._column_exists_cache[cache_key] = exists
|
||||
return exists
|
||||
|
||||
async def _load_run_tenant_snapshot(self, session, run_id: int | None) -> dict[str, Any]:
|
||||
if run_id is None:
|
||||
return {}
|
||||
|
||||
select_fields = ["id"]
|
||||
if await self._column_exists(session, "leaudit_audit_runs", "tenant_code"):
|
||||
select_fields.append("tenant_code")
|
||||
if await self._column_exists(session, "leaudit_audit_runs", "tenant_name_snapshot"):
|
||||
select_fields.append("tenant_name_snapshot")
|
||||
|
||||
if len(select_fields) == 1:
|
||||
return {}
|
||||
|
||||
row = (
|
||||
await session.execute(
|
||||
text(
|
||||
f"""
|
||||
SELECT {", ".join(select_fields)}
|
||||
FROM leaudit_audit_runs
|
||||
WHERE id = :run_id
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{"run_id": run_id},
|
||||
)
|
||||
).mappings().first()
|
||||
return dict(row) if row else {}
|
||||
|
||||
async def _ensure_run_id(self, document_id: int, run_id: int | None) -> int | None:
|
||||
"""Return explicit ``run_id`` when given, otherwise fall back to latest run.
|
||||
|
||||
@@ -169,6 +227,9 @@ class StorageAdapter:
|
||||
resolved_run_id = await self._ensure_run_id(document_id, run_id)
|
||||
inferred_positions = _build_inferred_field_positions(bundle, ocr_result)
|
||||
async with GetAsyncSession() as session:
|
||||
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
|
||||
rule_results_has_tenant_code = await self._column_exists(session, "leaudit_rule_results", "tenant_code")
|
||||
rule_results_has_tenant_name = await self._column_exists(session, "leaudit_rule_results", "tenant_name_snapshot")
|
||||
# Delete existing results for this document+run
|
||||
await session.execute(
|
||||
text("DELETE FROM leaudit_rule_results WHERE document_id = :did AND run_id = :rid"),
|
||||
@@ -193,6 +254,10 @@ class StorageAdapter:
|
||||
)
|
||||
if rule_version_id is not None:
|
||||
row["rule_version_id"] = rule_version_id
|
||||
if rule_results_has_tenant_code:
|
||||
row["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
|
||||
if rule_results_has_tenant_name:
|
||||
row["tenant_name_snapshot"] = str(run_snapshot.get("tenant_name_snapshot") or "") or None
|
||||
json_columns = {"stages", "extracted_fields", "field_positions", "remediation", "rule_meta"}
|
||||
serialized_row = {
|
||||
key: (json.dumps(value, ensure_ascii=False) if key in json_columns and value is not None else value)
|
||||
@@ -250,65 +315,43 @@ class StorageAdapter:
|
||||
metric = dict(timing or {})
|
||||
|
||||
async with GetAsyncSession() as session:
|
||||
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
|
||||
has_tenant_code = await self._column_exists(session, "leaudit_run_metrics", "tenant_code")
|
||||
await session.execute(
|
||||
text("DELETE FROM leaudit_run_metrics WHERE run_id = :rid"),
|
||||
{"rid": resolved_run_id},
|
||||
)
|
||||
payload = {
|
||||
"run_id": resolved_run_id,
|
||||
"ocr_seconds": metric.get("ocr"),
|
||||
"normalize_seconds": metric.get("normalize"),
|
||||
"extract_seconds": metric.get("extraction", metric.get("extract")),
|
||||
"evaluate_seconds": metric.get("evaluation", metric.get("evaluate")),
|
||||
"rescue_seconds": metric.get("rescue"),
|
||||
"total_seconds": metric.get("total"),
|
||||
"page_count": page_count,
|
||||
"sub_document_count": sub_document_count,
|
||||
"field_count": field_count,
|
||||
"rule_count": rule_count,
|
||||
"llm_call_count": None,
|
||||
"vlm_call_count": None,
|
||||
"rescue_rule_count": rescue_rule_count,
|
||||
"artifact_count": artifact_count,
|
||||
}
|
||||
if has_tenant_code:
|
||||
payload["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
|
||||
columns = list(payload.keys())
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
f"""
|
||||
INSERT INTO leaudit_run_metrics (
|
||||
run_id,
|
||||
ocr_seconds,
|
||||
normalize_seconds,
|
||||
extract_seconds,
|
||||
evaluate_seconds,
|
||||
rescue_seconds,
|
||||
total_seconds,
|
||||
page_count,
|
||||
sub_document_count,
|
||||
field_count,
|
||||
rule_count,
|
||||
llm_call_count,
|
||||
vlm_call_count,
|
||||
rescue_rule_count,
|
||||
artifact_count
|
||||
{", ".join(columns)}
|
||||
) VALUES (
|
||||
:run_id,
|
||||
:ocr_seconds,
|
||||
:normalize_seconds,
|
||||
:extract_seconds,
|
||||
:evaluate_seconds,
|
||||
:rescue_seconds,
|
||||
:total_seconds,
|
||||
:page_count,
|
||||
:sub_document_count,
|
||||
:field_count,
|
||||
:rule_count,
|
||||
:llm_call_count,
|
||||
:vlm_call_count,
|
||||
:rescue_rule_count,
|
||||
:artifact_count
|
||||
{", ".join(f":{column}" for column in columns)}
|
||||
)
|
||||
"""
|
||||
),
|
||||
{
|
||||
"run_id": resolved_run_id,
|
||||
"ocr_seconds": metric.get("ocr"),
|
||||
"normalize_seconds": metric.get("normalize"),
|
||||
"extract_seconds": metric.get("extraction", metric.get("extract")),
|
||||
"evaluate_seconds": metric.get("evaluation", metric.get("evaluate")),
|
||||
"rescue_seconds": metric.get("rescue"),
|
||||
"total_seconds": metric.get("total"),
|
||||
"page_count": page_count,
|
||||
"sub_document_count": sub_document_count,
|
||||
"field_count": field_count,
|
||||
"rule_count": rule_count,
|
||||
"llm_call_count": None,
|
||||
"vlm_call_count": None,
|
||||
"rescue_rule_count": rescue_rule_count,
|
||||
"artifact_count": artifact_count,
|
||||
},
|
||||
payload,
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
@@ -329,40 +372,58 @@ class StorageAdapter:
|
||||
|
||||
resolved_run_id = await self._ensure_run_id(document_id, run_id)
|
||||
async with GetAsyncSession() as session:
|
||||
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
|
||||
has_tenant_code = await self._column_exists(session, "leaudit_run_errors", "tenant_code")
|
||||
has_tenant_name = await self._column_exists(session, "leaudit_run_errors", "tenant_name_snapshot")
|
||||
for message in messages:
|
||||
columns = [
|
||||
"run_id",
|
||||
"document_id",
|
||||
"stage",
|
||||
"level",
|
||||
"error_code",
|
||||
"message",
|
||||
"detail_json",
|
||||
]
|
||||
values = [
|
||||
":run_id",
|
||||
":document_id",
|
||||
":stage",
|
||||
":level",
|
||||
":error_code",
|
||||
":message",
|
||||
":detail_json",
|
||||
]
|
||||
payload = {
|
||||
"run_id": resolved_run_id,
|
||||
"document_id": document_id,
|
||||
"stage": stage,
|
||||
"level": level,
|
||||
"error_code": error_code,
|
||||
"message": message,
|
||||
"detail_json": json.dumps(detail_json, ensure_ascii=False)
|
||||
if detail_json is not None
|
||||
else None,
|
||||
}
|
||||
if has_tenant_code:
|
||||
columns.append("tenant_code")
|
||||
values.append(":tenant_code")
|
||||
payload["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
|
||||
if has_tenant_name:
|
||||
columns.append("tenant_name_snapshot")
|
||||
values.append(":tenant_name_snapshot")
|
||||
payload["tenant_name_snapshot"] = str(run_snapshot.get("tenant_name_snapshot") or "") or None
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
f"""
|
||||
INSERT INTO leaudit_run_errors (
|
||||
run_id,
|
||||
document_id,
|
||||
stage,
|
||||
level,
|
||||
error_code,
|
||||
message,
|
||||
detail_json
|
||||
{", ".join(columns)}
|
||||
) VALUES (
|
||||
:run_id,
|
||||
:document_id,
|
||||
:stage,
|
||||
:level,
|
||||
:error_code,
|
||||
:message,
|
||||
:detail_json
|
||||
{", ".join(values)}
|
||||
)
|
||||
"""
|
||||
),
|
||||
{
|
||||
"run_id": resolved_run_id,
|
||||
"document_id": document_id,
|
||||
"stage": stage,
|
||||
"level": level,
|
||||
"error_code": error_code,
|
||||
"message": message,
|
||||
"detail_json": json.dumps(detail_json, ensure_ascii=False)
|
||||
if detail_json is not None
|
||||
else None,
|
||||
},
|
||||
payload,
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
||||
@@ -42,6 +42,9 @@ from fastapi_modules.fastapi_leaudit.models import (
|
||||
LeauditDocument,
|
||||
LeauditDocumentFile,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.services.impl.ruleTenantMaterializer import (
|
||||
GetRuleTenantMaterializerSingleton,
|
||||
)
|
||||
|
||||
log = logger
|
||||
|
||||
@@ -318,6 +321,24 @@ def leaudit_scan_stuck_documents_task(self) -> dict[str, Any]:
|
||||
loop.close()
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
name="leaudit.materialize_rule_tenants",
|
||||
)
|
||||
def leaudit_materialize_rule_tenants_task(self) -> dict[str, Any]:
|
||||
"""周期补齐真实租户的规则私有副本。"""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
stats = loop.run_until_complete(
|
||||
GetRuleTenantMaterializerSingleton().MaterializeAllEnabledTenants()
|
||||
)
|
||||
log.info("规则租户物化完成: %s", stats)
|
||||
return {"status": "success", **stats}
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
# type_id → rules directory mapping (only fixed-mapping types)
|
||||
# 行政许可 (type_id=2) has 9 sub-types, NOT mapped here —
|
||||
# must come from document metadata (rules_file_path) or content classification.
|
||||
|
||||
Reference in New Issue
Block a user