"""doc / wps → docx 转换。""" from __future__ import annotations import shutil import subprocess from pathlib import Path from fastapi_modules.fastapi_leaudit.govdoc_engine.config import get_settings class UnsupportedFormat(Exception): pass class ConversionError(Exception): pass _SUPPORTED_DIRECT = {".docx"} _SUPPORTED_CONVERT = {".doc", ".wps"} _SOFFICE_FALLBACK_PATHS = ( "/opt/homebrew/bin/soffice", "/usr/local/bin/soffice", "/Applications/LibreOffice.app/Contents/MacOS/soffice", "/usr/bin/soffice", ) def load_to_docx(src: Path) -> Path: """统一返回 .docx 路径。.doc/.wps 调 soffice 转换。""" ext = src.suffix.lower() if ext in _SUPPORTED_DIRECT: return src if ext in _SUPPORTED_CONVERT: return _convert_via_soffice(src) raise UnsupportedFormat(f"unsupported file type: {ext}") def _convert_via_soffice(src: Path) -> Path: soffice = _resolve_soffice_path(get_settings().soffice_path) out_dir = src.parent cmd = [ soffice, "--headless", "--convert-to", "docx", "--outdir", str(out_dir), str(src), ] try: result = subprocess.run( cmd, capture_output=True, timeout=60, ) except subprocess.TimeoutExpired as e: raise ConversionError("soffice timeout") from e if result.returncode != 0: raise ConversionError( f"soffice exit {result.returncode}: {result.stderr.decode(errors='ignore')}" ) out = out_dir / (src.stem + ".docx") if not out.exists(): raise ConversionError(f"expected output not found: {out}") return out def _resolve_soffice_path(configured: str) -> str: candidates = [configured, *_SOFFICE_FALLBACK_PATHS] checked: list[str] = [] for candidate in candidates: if candidate in checked: continue checked.append(candidate) resolved = shutil.which(candidate) if resolved: return resolved if Path(candidate).exists(): return candidate raise ConversionError( f"soffice not found; checked: {', '.join(checked)}. " "Install LibreOffice or set SOFFICE_PATH." )