feat: add tenant-scoped rule and permission management

This commit is contained in:
wren
2026-05-21 22:03:08 +08:00
parent a2c2bf1969
commit 1f1bccf3b3
193 changed files with 64463 additions and 1771 deletions
@@ -27,6 +27,64 @@ log = logging.getLogger(__name__)
class StorageAdapter:
"""Write leaudit pipeline results to leaudit_platform database."""
def __init__(self) -> None:
self._column_exists_cache: dict[str, bool] = {}
async def _column_exists(self, session, table_name: str, column_name: str) -> bool:
cache_key = f"{table_name}.{column_name}"
cached = self._column_exists_cache.get(cache_key)
if cached is not None:
return cached
exists = bool(
(
await session.execute(
text(
"""
SELECT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = current_schema()
AND table_name = :table_name
AND column_name = :column_name
)
"""
),
{"table_name": table_name, "column_name": column_name},
)
).scalar_one()
)
self._column_exists_cache[cache_key] = exists
return exists
async def _load_run_tenant_snapshot(self, session, run_id: int | None) -> dict[str, Any]:
if run_id is None:
return {}
select_fields = ["id"]
if await self._column_exists(session, "leaudit_audit_runs", "tenant_code"):
select_fields.append("tenant_code")
if await self._column_exists(session, "leaudit_audit_runs", "tenant_name_snapshot"):
select_fields.append("tenant_name_snapshot")
if len(select_fields) == 1:
return {}
row = (
await session.execute(
text(
f"""
SELECT {", ".join(select_fields)}
FROM leaudit_audit_runs
WHERE id = :run_id
LIMIT 1
"""
),
{"run_id": run_id},
)
).mappings().first()
return dict(row) if row else {}
async def _ensure_run_id(self, document_id: int, run_id: int | None) -> int | None:
"""Return explicit ``run_id`` when given, otherwise fall back to latest run.
@@ -169,6 +227,9 @@ class StorageAdapter:
resolved_run_id = await self._ensure_run_id(document_id, run_id)
inferred_positions = _build_inferred_field_positions(bundle, ocr_result)
async with GetAsyncSession() as session:
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
rule_results_has_tenant_code = await self._column_exists(session, "leaudit_rule_results", "tenant_code")
rule_results_has_tenant_name = await self._column_exists(session, "leaudit_rule_results", "tenant_name_snapshot")
# Delete existing results for this document+run
await session.execute(
text("DELETE FROM leaudit_rule_results WHERE document_id = :did AND run_id = :rid"),
@@ -193,6 +254,10 @@ class StorageAdapter:
)
if rule_version_id is not None:
row["rule_version_id"] = rule_version_id
if rule_results_has_tenant_code:
row["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
if rule_results_has_tenant_name:
row["tenant_name_snapshot"] = str(run_snapshot.get("tenant_name_snapshot") or "") or None
json_columns = {"stages", "extracted_fields", "field_positions", "remediation", "rule_meta"}
serialized_row = {
key: (json.dumps(value, ensure_ascii=False) if key in json_columns and value is not None else value)
@@ -250,65 +315,43 @@ class StorageAdapter:
metric = dict(timing or {})
async with GetAsyncSession() as session:
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
has_tenant_code = await self._column_exists(session, "leaudit_run_metrics", "tenant_code")
await session.execute(
text("DELETE FROM leaudit_run_metrics WHERE run_id = :rid"),
{"rid": resolved_run_id},
)
payload = {
"run_id": resolved_run_id,
"ocr_seconds": metric.get("ocr"),
"normalize_seconds": metric.get("normalize"),
"extract_seconds": metric.get("extraction", metric.get("extract")),
"evaluate_seconds": metric.get("evaluation", metric.get("evaluate")),
"rescue_seconds": metric.get("rescue"),
"total_seconds": metric.get("total"),
"page_count": page_count,
"sub_document_count": sub_document_count,
"field_count": field_count,
"rule_count": rule_count,
"llm_call_count": None,
"vlm_call_count": None,
"rescue_rule_count": rescue_rule_count,
"artifact_count": artifact_count,
}
if has_tenant_code:
payload["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
columns = list(payload.keys())
await session.execute(
text(
"""
f"""
INSERT INTO leaudit_run_metrics (
run_id,
ocr_seconds,
normalize_seconds,
extract_seconds,
evaluate_seconds,
rescue_seconds,
total_seconds,
page_count,
sub_document_count,
field_count,
rule_count,
llm_call_count,
vlm_call_count,
rescue_rule_count,
artifact_count
{", ".join(columns)}
) VALUES (
:run_id,
:ocr_seconds,
:normalize_seconds,
:extract_seconds,
:evaluate_seconds,
:rescue_seconds,
:total_seconds,
:page_count,
:sub_document_count,
:field_count,
:rule_count,
:llm_call_count,
:vlm_call_count,
:rescue_rule_count,
:artifact_count
{", ".join(f":{column}" for column in columns)}
)
"""
),
{
"run_id": resolved_run_id,
"ocr_seconds": metric.get("ocr"),
"normalize_seconds": metric.get("normalize"),
"extract_seconds": metric.get("extraction", metric.get("extract")),
"evaluate_seconds": metric.get("evaluation", metric.get("evaluate")),
"rescue_seconds": metric.get("rescue"),
"total_seconds": metric.get("total"),
"page_count": page_count,
"sub_document_count": sub_document_count,
"field_count": field_count,
"rule_count": rule_count,
"llm_call_count": None,
"vlm_call_count": None,
"rescue_rule_count": rescue_rule_count,
"artifact_count": artifact_count,
},
payload,
)
await session.commit()
@@ -329,40 +372,58 @@ class StorageAdapter:
resolved_run_id = await self._ensure_run_id(document_id, run_id)
async with GetAsyncSession() as session:
run_snapshot = await self._load_run_tenant_snapshot(session, resolved_run_id)
has_tenant_code = await self._column_exists(session, "leaudit_run_errors", "tenant_code")
has_tenant_name = await self._column_exists(session, "leaudit_run_errors", "tenant_name_snapshot")
for message in messages:
columns = [
"run_id",
"document_id",
"stage",
"level",
"error_code",
"message",
"detail_json",
]
values = [
":run_id",
":document_id",
":stage",
":level",
":error_code",
":message",
":detail_json",
]
payload = {
"run_id": resolved_run_id,
"document_id": document_id,
"stage": stage,
"level": level,
"error_code": error_code,
"message": message,
"detail_json": json.dumps(detail_json, ensure_ascii=False)
if detail_json is not None
else None,
}
if has_tenant_code:
columns.append("tenant_code")
values.append(":tenant_code")
payload["tenant_code"] = str(run_snapshot.get("tenant_code") or "") or None
if has_tenant_name:
columns.append("tenant_name_snapshot")
values.append(":tenant_name_snapshot")
payload["tenant_name_snapshot"] = str(run_snapshot.get("tenant_name_snapshot") or "") or None
await session.execute(
text(
"""
f"""
INSERT INTO leaudit_run_errors (
run_id,
document_id,
stage,
level,
error_code,
message,
detail_json
{", ".join(columns)}
) VALUES (
:run_id,
:document_id,
:stage,
:level,
:error_code,
:message,
:detail_json
{", ".join(values)}
)
"""
),
{
"run_id": resolved_run_id,
"document_id": document_id,
"stage": stage,
"level": level,
"error_code": error_code,
"message": message,
"detail_json": json.dumps(detail_json, ensure_ascii=False)
if detail_json is not None
else None,
},
payload,
)
await session.commit()
@@ -42,6 +42,9 @@ from fastapi_modules.fastapi_leaudit.models import (
LeauditDocument,
LeauditDocumentFile,
)
from fastapi_modules.fastapi_leaudit.services.impl.ruleTenantMaterializer import (
GetRuleTenantMaterializerSingleton,
)
log = logger
@@ -318,6 +321,24 @@ def leaudit_scan_stuck_documents_task(self) -> dict[str, Any]:
loop.close()
@celery_app.task(
bind=True,
name="leaudit.materialize_rule_tenants",
)
def leaudit_materialize_rule_tenants_task(self) -> dict[str, Any]:
"""周期补齐真实租户的规则私有副本。"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
stats = loop.run_until_complete(
GetRuleTenantMaterializerSingleton().MaterializeAllEnabledTenants()
)
log.info("规则租户物化完成: %s", stats)
return {"status": "success", **stats}
finally:
loop.close()
# type_id → rules directory mapping (only fixed-mapping types)
# 行政许可 (type_id=2) has 9 sub-types, NOT mapped here —
# must come from document metadata (rules_file_path) or content classification.