Files

84 lines
2.2 KiB
Python

"""doc / wps → docx 转换。"""
from __future__ import annotations
import shutil
import subprocess
from pathlib import Path
from fastapi_modules.fastapi_leaudit.govdoc_engine.config import get_settings
class UnsupportedFormat(Exception):
pass
class ConversionError(Exception):
pass
_SUPPORTED_DIRECT = {".docx"}
_SUPPORTED_CONVERT = {".doc", ".wps"}
_SOFFICE_FALLBACK_PATHS = (
"/opt/homebrew/bin/soffice",
"/usr/local/bin/soffice",
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
"/usr/bin/soffice",
)
def load_to_docx(src: Path) -> Path:
"""统一返回 .docx 路径。.doc/.wps 调 soffice 转换。"""
ext = src.suffix.lower()
if ext in _SUPPORTED_DIRECT:
return src
if ext in _SUPPORTED_CONVERT:
return _convert_via_soffice(src)
raise UnsupportedFormat(f"unsupported file type: {ext}")
def _convert_via_soffice(src: Path) -> Path:
soffice = _resolve_soffice_path(get_settings().soffice_path)
out_dir = src.parent
cmd = [
soffice, "--headless", "--convert-to", "docx",
"--outdir", str(out_dir), str(src),
]
try:
result = subprocess.run(
cmd, capture_output=True, timeout=60,
)
except subprocess.TimeoutExpired as e:
raise ConversionError("soffice timeout") from e
if result.returncode != 0:
raise ConversionError(
f"soffice exit {result.returncode}: {result.stderr.decode(errors='ignore')}"
)
out = out_dir / (src.stem + ".docx")
if not out.exists():
raise ConversionError(f"expected output not found: {out}")
return out
def _resolve_soffice_path(configured: str) -> str:
candidates = [configured, *_SOFFICE_FALLBACK_PATHS]
checked: list[str] = []
for candidate in candidates:
if candidate in checked:
continue
checked.append(candidate)
resolved = shutil.which(candidate)
if resolved:
return resolved
if Path(candidate).exists():
return candidate
raise ConversionError(
f"soffice not found; checked: {', '.join(checked)}. "
"Install LibreOffice or set SOFFICE_PATH."
)