fix: remove sha256 duplicate check so re-upload always creates new version in group

This commit is contained in:
wren
2026-05-13 16:41:11 +08:00
parent 9eec45cbd0
commit 0a8f46008d
7 changed files with 297 additions and 84 deletions
@@ -173,65 +173,65 @@ class DocumentServiceImpl(IDocumentService):
else:
versionGroupKey = uuid.uuid4().hex
document = await LeauditDocument.create_new(
Session,
document = await LeauditDocument.create_new(
Session,
bizDocumentId=internalDocumentNo,
typeId=resolvedTypeId,
groupId=resolvedGroupId,
region=normalizedRegion,
processingStatus="waiting",
versionGroupKey=versionGroupKey,
versionNo=versionNo,
previousVersionId=previousVersionId,
rootVersionId=rootVersionId,
isLatestVersion=True,
normalizedName=normalizedName,
)
if document.rootVersionId is None:
document.rootVersionId = document.Id
rootVersionId = document.Id
else:
rootVersionId = document.rootVersionId
versionNo=versionNo,
previousVersionId=previousVersionId,
rootVersionId=rootVersionId,
isLatestVersion=True,
normalizedName=normalizedName,
)
if document.rootVersionId is None:
document.rootVersionId = document.Id
rootVersionId = document.Id
else:
rootVersionId = document.rootVersionId
versionLabel = f"v{document.versionNo}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionLabel,
FileRole=normalizedFileRole,
FileName=FileName,
Year=uploadedAt.year,
Month=uploadedAt.month,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
versionLabel = f"v{document.versionNo}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionLabel,
FileRole=normalizedFileRole,
FileName=FileName,
Year=uploadedAt.year,
Month=uploadedAt.month,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
_ = versionCount # single-version-per-document in current model; kept for future extension
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
_ = versionCount # single-version-per-document in current model; kept for future extension
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
if normalizedFileRole == "primary" and Attachments:
await self._appendAttachmentFiles(
@@ -96,8 +96,103 @@ class GovdocServiceImpl(IGovdocService):
# ── 结果与报告 ────────────────────────────────────────
async def GetRunResult(self, runId: int) -> dict[str, Any]:
logger.info("[Govdoc] GetRunResult placeholder — runId=%s", runId)
return {"runId": runId, "summary": {}}
"""从 govdoc_runs + govdoc_rule_results 读取审查结果,含 structure/outline。"""
import json as _json
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
from sqlalchemy import text
async with GetAsyncSession() as session:
run_row = await session.execute(
text(
"""SELECT id, document_id, status, phase, total_score, passed_count,
failed_count, skipped_count, result_status, result_summary_json,
rules_path, started_at, finished_at
FROM govdoc_runs WHERE id = :rid"""
),
{"rid": runId},
)
run_data = run_row.mappings().first()
if not run_data:
return {"runId": runId, "summary": {}, "checkedRules": [], "findings": [],
"structure": [], "outline": [], "entities": {}}
rules_rows = await session.execute(
text(
"""SELECT rule_id, rule_name, severity, category, result, skip_reason,
message, suggestion, actual, expected, evidence,
paragraph_index, paragraph_text, location_path, score
FROM govdoc_rule_results WHERE run_id = :rid"""
),
{"rid": runId},
)
rule_results = [dict(r._mapping) for r in rules_rows.fetchall()]
aux_raw = run_data.get("result_summary_json")
aux = {}
if aux_raw:
try:
aux = _json.loads(aux_raw) if isinstance(aux_raw, str) else aux_raw
except (TypeError, _json.JSONDecodeError):
pass
findings = []
for rr in rule_results:
loc = {}
if rr.get("paragraph_index") is not None:
loc["paragraph_index"] = rr["paragraph_index"]
if rr.get("paragraph_text"):
loc["context"] = rr["paragraph_text"]
if rr.get("location_path"):
loc["role"] = rr["location_path"]
findings.append({
"finding_id": f"{rr['rule_id']}-{rr['paragraph_index'] or 0}",
"rule_id": rr["rule_id"],
"rule_name": rr["rule_name"],
"severity": rr["severity"],
"category": rr["category"],
"location": loc if loc else None,
"actual": rr.get("actual") or {},
"expected": rr.get("expected") or {},
"message": rr.get("message") or "",
"suggestion": rr.get("suggestion") or "",
"evidence": rr.get("evidence") or "",
"confidence": 1.0,
})
checked_rules = []
seen = set()
for rr in rule_results:
rid = rr["rule_id"]
if rid in seen:
continue
seen.add(rid)
status = rr.get("result", "pass")
checked_rules.append({
"rule_id": rid,
"name": rr["rule_name"],
"severity": rr["severity"],
"category": rr["category"],
"status": status if status in ("pass", "fail", "skipped") else "pass",
"skip_reason": rr.get("skip_reason"),
})
return {
"runId": runId,
"summary": {
"score": run_data.get("total_score", 100),
"total_findings": len(findings),
"by_severity": {},
"by_category": {},
"passed_count": run_data.get("passed_count", 0),
"failed_count": run_data.get("failed_count", 0),
"skipped_count": run_data.get("skipped_count", 0),
},
"checkedRules": checked_rules,
"findings": findings,
"structure": aux.get("structure", []),
"outline": aux.get("outline", []),
"entities": {},
}
async def GetRunFindings(self, runId: int) -> dict[str, Any]:
logger.info("[Govdoc] GetRunFindings placeholder — runId=%s", runId)
@@ -133,6 +228,81 @@ class GovdocServiceImpl(IGovdocService):
# ── 规则 ──────────────────────────────────────────────
async def ListRules(self) -> dict[str, Any]:
logger.info("[Govdoc] ListRules placeholder")
return {"rules": []}
async def ListRules(self, rulesPath: str | None = None) -> dict[str, Any]:
"""从 govdoc 规则 YAML 文件加载规则清单。"""
rules = await self._load_rules_list(rulesPath)
return {"rules": rules, "total_rules": len(rules)}
async def GetRuleDetail(self, ruleId: str, rulesPath: str | None = None) -> dict[str, Any]:
"""获取单条规则完整详情(名称、严重度、stages、消息等)。"""
ruleset = await self._load_ruleset(rulesPath)
if ruleset is None:
return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}
for rule in ruleset.all_rules():
if rule.rule_id == ruleId:
return {
"rule_id": rule.rule_id,
"name": rule.name,
"severity": rule.severity,
"category": rule.category,
"group": "",
"applies_to": rule.applies_to.model_dump() if rule.applies_to else None,
"target": rule.target,
"on_missing": rule.on_missing,
"stages": [s.model_dump(exclude_none=True) for s in (rule.stages or [])],
"messages": rule.messages.model_dump() if rule.messages else {},
}
return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}
# ── 规则加载助手 ────────────────────────────────────
async def _resolve_rules_path(self, rulesPath: str | None = None) -> str | None:
"""解析规则 YAML 文件路径。
优先级:传入参数 > govdoc_runs 表记录 > None
"""
if rulesPath:
return rulesPath
# 尝试从最近的 completed run 中获取 rules_path
try:
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
from sqlalchemy import text
async with GetAsyncSession() as session:
row = await session.execute(
text(
"""SELECT rules_path FROM govdoc_runs
WHERE rules_path IS NOT NULL AND status = 'completed'
ORDER BY id DESC LIMIT 1"""
)
)
result = row.mappings().first()
if result and result.get("rules_path"):
return result["rules_path"]
except Exception:
pass
return None
async def _load_ruleset(self, rulesPath: str | None = None):
"""加载 rules.yaml 为 RuleSet 对象。"""
resolved = await self._resolve_rules_path(rulesPath)
if not resolved:
logger.warning("[Govdoc] Cannot resolve rules path for GetRuleDetail/ListRules")
return None
from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.loader import load_rules
return load_rules(resolved)
async def _load_rules_list(self, rulesPath: str | None = None) -> list[dict[str, Any]]:
"""加载规则列表(简要信息)。"""
ruleset = await self._load_ruleset(rulesPath)
if ruleset is None:
return []
result = []
for rule in ruleset.all_rules():
result.append({
"rule_id": rule.rule_id,
"name": rule.name,
"severity": rule.severity,
"category": rule.category,
"group": "",
})
return result