"""评查服务实现。 编排 LeAudit 引擎执行链路: 文档 → OCR → Extract → Evaluate → Rescue → Persist """ from datetime import datetime from fastapi_common.fastapi_common_logger import logger from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException from sqlalchemy import select, text from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import ( AuditArtifactVO, AuditFieldResultVO, AuditMetricsVO, AuditRescueOutcomeVO, AuditResultVO, AuditRunErrorVO, AuditRunVO, ) from fastapi_modules.fastapi_leaudit.leaudit_bridge.fileSourceResolver import FileSourceResolver from fastapi_modules.fastapi_leaudit.leaudit_bridge.tasks import dispatch_leaudit_task from fastapi_modules.fastapi_leaudit.models import ( LeauditAuditRun, LeauditDocument, LeauditDocumentFile, ) from fastapi_modules.fastapi_leaudit.services import IAuditService class AuditServiceImpl(IAuditService): """评查服务实现。""" async def Run(self, DocumentId: int, RuleType: str | None = None, Force: bool = False) -> AuditRunVO: """触发文档评查。 当前阶段同步触发 bridge 执行链,后续再切换为 Celery 异步分发。 """ async with GetAsyncSession() as session: logger.info(f"触发评查: documentId={DocumentId}, ruleType={RuleType}") document = await session.get(LeauditDocument, DocumentId) if not document: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查文档不存在") fileResult = await session.execute( select(LeauditDocumentFile) .where( LeauditDocumentFile.documentId == DocumentId, LeauditDocumentFile.isActive.is_(True), ) .order_by(LeauditDocumentFile.Id.desc()) .limit(1) ) documentFile = fileResult.scalar_one_or_none() if not documentFile: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档没有可执行文件版本") runNoResult = await session.execute( select(LeauditAuditRun.runNo) .where(LeauditAuditRun.documentId == DocumentId) .order_by(LeauditAuditRun.runNo.desc()) .limit(1) ) latestRunNo = runNoResult.scalar_one_or_none() or 0 bindingResult = await session.execute( text( """ SELECT rs.id AS rule_set_id, rs.current_version_id AS rule_version_id, rv.oss_url AS rule_source_oss_url, rv.file_sha256 AS rule_source_sha256, rv.metadata_type_id AS rule_type_id FROM leaudit_rule_type_bindings b JOIN leaudit_rule_sets rs ON rs.id = b.rule_set_id LEFT JOIN leaudit_rule_versions rv ON rv.id = rs.current_version_id WHERE b.doc_type_id = :doc_type_id AND b.is_active = true AND b.region = :region ORDER BY b.priority DESC, b.id DESC LIMIT 1 """ ), {"doc_type_id": document.typeId, "region": document.region}, ) binding = bindingResult.mappings().first() if not binding or not binding["rule_set_id"] or not binding["rule_version_id"]: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档类型未绑定可用规则版本") run = LeauditAuditRun( documentId=DocumentId, documentFileId=documentFile.Id, runNo=int(latestRunNo) + 1, triggerSource="manual" if not Force else "retry", status="pending", ruleSetId=int(binding["rule_set_id"]), ruleVersionId=int(binding["rule_version_id"]), ruleTypeId=binding["rule_type_id"], ruleSourceOssUrl=binding["rule_source_oss_url"], ruleSourceSha256=binding["rule_source_sha256"], startedAt=datetime.now(), ) session.add(run) await session.flush() document.currentRunId = run.Id document.processingStatus = "running" await session.commit() await session.refresh(run) try: Resolver = FileSourceResolver() Payload = await Resolver.ResolvePayload(documentFile) except Exception as Error: raise LeauditException( StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, f"读取评查文件失败: {Error}", ) from Error dispatch_leaudit_task( document_id=DocumentId, file_content=Payload.fileContent, filename=Payload.fileName, upload_info={ "run_id": run.Id, "rule_version_id": run.ruleVersionId, "rule_source_oss_url": run.ruleSourceOssUrl, "source_type": Payload.sourceType, "source_path": Payload.sourcePath, }, rules_path=RuleType, ) await session.refresh(run) return AuditRunVO( runId=run.Id, documentId=run.documentId, runNo=run.runNo, documentFileId=run.documentFileId, status=run.status, phase=run.phase, resultStatus=run.resultStatus, ruleSetId=run.ruleSetId, ruleVersionId=run.ruleVersionId, ruleTypeId=run.ruleTypeId, rescueApplied=run.rescueApplied or False, totalScore=float(run.totalScore) if run.totalScore else None, passedCount=run.passedCount, failedCount=run.failedCount, skippedCount=run.skippedCount, startedAt=run.startedAt, finishedAt=run.finishedAt, ) async def GetRunStatus(self, RunId: int) -> AuditRunVO: """查询评查运行状态。""" async with GetAsyncSession() as session: run = await session.get(LeauditAuditRun, RunId) if not run: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查运行记录不存在") return AuditRunVO( runId=run.Id, documentId=run.documentId, runNo=run.runNo, documentFileId=run.documentFileId, status=run.status, phase=run.phase, resultStatus=run.resultStatus, ruleSetId=run.ruleSetId, ruleVersionId=run.ruleVersionId, ruleTypeId=run.ruleTypeId, rescueApplied=run.rescueApplied or False, totalScore=float(run.totalScore) if run.totalScore else None, passedCount=run.passedCount, failedCount=run.failedCount, skippedCount=run.skippedCount, startedAt=run.startedAt, finishedAt=run.finishedAt, ) async def GetResult(self, RunId: int) -> AuditResultVO: """获取评查结果。""" async with GetAsyncSession() as session: run = await session.get(LeauditAuditRun, RunId) if not run: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查运行记录不存在") ruleResult = await session.execute( text( """ SELECT rule_id, rule_name, risk, score, passed, status, skip_reason, confidence, pass_message, fail_message, remediation, extracted_fields, field_positions, rescue_applied, rescue_passed FROM leaudit_rule_results WHERE run_id = :run_id ORDER BY id ASC """ ), {"run_id": RunId}, ) fieldResult = await session.execute( text( """ SELECT field_name, field_type, value_text, confidence, grounding_method, fallback_value, raw_value_json, meta_json FROM leaudit_field_results WHERE run_id = :run_id ORDER BY id ASC """ ), {"run_id": RunId}, ) errorResult = await session.execute( text( """ SELECT stage, level, error_code, message, detail_json, created_at FROM leaudit_run_errors WHERE run_id = :run_id ORDER BY id ASC """ ), {"run_id": RunId}, ) rescueResult = await session.execute( text( """ SELECT rule_id, status, diagnosis, diagnosis_confidence, final_status, failure_reason, llm_calls, vlm_calls, duration_ms, requires_human_review, payload FROM leaudit_rescue_outcomes WHERE run_id = :run_id ORDER BY id ASC """ ), {"run_id": RunId}, ) metricResult = await session.execute( text( """ SELECT ocr_seconds, normalize_seconds, extract_seconds, evaluate_seconds, rescue_seconds, total_seconds, page_count, sub_document_count, field_count, rule_count, llm_call_count, vlm_call_count, rescue_rule_count, artifact_count FROM leaudit_run_metrics WHERE run_id = :run_id ORDER BY id DESC LIMIT 1 """ ), {"run_id": RunId}, ) artifactResult = await session.execute( text( """ SELECT artifact_type, artifact_role, file_name, file_ext, mime_type, file_size, oss_url, is_persisted FROM leaudit_artifacts WHERE run_id = :run_id ORDER BY id ASC """ ), {"run_id": RunId}, ) rules = [dict(row) for row in ruleResult.mappings().all()] fields = [ AuditFieldResultVO( fieldName=row["field_name"], fieldType=row["field_type"], valueText=row["value_text"], confidence=float(row["confidence"]) if row["confidence"] is not None else None, groundingMethod=row["grounding_method"], fallbackValue=row["fallback_value"], rawValueJson=row["raw_value_json"], metaJson=row["meta_json"], ) for row in fieldResult.mappings().all() ] errors = [ AuditRunErrorVO( stage=row["stage"], level=row["level"], errorCode=row["error_code"], message=row["message"], detailJson=row["detail_json"], createdAt=row["created_at"], ) for row in errorResult.mappings().all() ] rescueOutcomes = [ AuditRescueOutcomeVO( ruleId=row["rule_id"], status=row["status"], diagnosis=row["diagnosis"], diagnosisConfidence=float(row["diagnosis_confidence"]) if row["diagnosis_confidence"] is not None else None, finalStatus=row["final_status"], failureReason=row["failure_reason"], llmCalls=row["llm_calls"], vlmCalls=row["vlm_calls"], durationMs=row["duration_ms"], requiresHumanReview=bool(row["requires_human_review"]), payload=row["payload"], ) for row in rescueResult.mappings().all() ] metricRow = metricResult.mappings().first() metrics = ( AuditMetricsVO( ocrSeconds=float(metricRow["ocr_seconds"]) if metricRow["ocr_seconds"] is not None else None, normalizeSeconds=float(metricRow["normalize_seconds"]) if metricRow["normalize_seconds"] is not None else None, extractSeconds=float(metricRow["extract_seconds"]) if metricRow["extract_seconds"] is not None else None, evaluateSeconds=float(metricRow["evaluate_seconds"]) if metricRow["evaluate_seconds"] is not None else None, rescueSeconds=float(metricRow["rescue_seconds"]) if metricRow["rescue_seconds"] is not None else None, totalSeconds=float(metricRow["total_seconds"]) if metricRow["total_seconds"] is not None else None, pageCount=metricRow["page_count"], subDocumentCount=metricRow["sub_document_count"], fieldCount=metricRow["field_count"], ruleCount=metricRow["rule_count"], llmCallCount=metricRow["llm_call_count"], vlmCallCount=metricRow["vlm_call_count"], rescueRuleCount=metricRow["rescue_rule_count"], artifactCount=metricRow["artifact_count"], ) if metricRow else None ) artifacts = [ AuditArtifactVO( artifactType=row["artifact_type"], artifactRole=row["artifact_role"], fileName=row["file_name"], fileExt=row["file_ext"], mimeType=row["mime_type"], fileSize=row["file_size"], ossUrl=row["oss_url"], isPersisted=row["is_persisted"], ) for row in artifactResult.mappings().all() ] return AuditResultVO( runId=run.Id, documentId=run.documentId, documentFileId=run.documentFileId, status=run.status, totalScore=float(run.totalScore) if run.totalScore else None, passedCount=run.passedCount or 0, failedCount=run.failedCount or 0, skippedCount=run.skippedCount or 0, phase=run.phase, resultStatus=run.resultStatus, rescueApplied=run.rescueApplied or False, ruleSetId=run.ruleSetId, ruleVersionId=run.ruleVersionId, startedAt=run.startedAt, finishedAt=run.finishedAt, rules=rules, fields=fields, errors=errors, rescueOutcomes=rescueOutcomes, metrics=metrics, artifacts=artifacts, )