84 lines
2.2 KiB
Python
84 lines
2.2 KiB
Python
"""doc / wps → docx 转换。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from fastapi_modules.fastapi_leaudit.govdoc_engine.config import get_settings
|
|
|
|
|
|
class UnsupportedFormat(Exception):
|
|
pass
|
|
|
|
|
|
class ConversionError(Exception):
|
|
pass
|
|
|
|
|
|
_SUPPORTED_DIRECT = {".docx"}
|
|
_SUPPORTED_CONVERT = {".doc", ".wps"}
|
|
_SOFFICE_FALLBACK_PATHS = (
|
|
"/opt/homebrew/bin/soffice",
|
|
"/usr/local/bin/soffice",
|
|
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
"/usr/bin/soffice",
|
|
)
|
|
|
|
|
|
def load_to_docx(src: Path) -> Path:
|
|
"""统一返回 .docx 路径。.doc/.wps 调 soffice 转换。"""
|
|
ext = src.suffix.lower()
|
|
if ext in _SUPPORTED_DIRECT:
|
|
return src
|
|
if ext in _SUPPORTED_CONVERT:
|
|
return _convert_via_soffice(src)
|
|
raise UnsupportedFormat(f"unsupported file type: {ext}")
|
|
|
|
|
|
def _convert_via_soffice(src: Path) -> Path:
|
|
soffice = _resolve_soffice_path(get_settings().soffice_path)
|
|
|
|
out_dir = src.parent
|
|
cmd = [
|
|
soffice, "--headless", "--convert-to", "docx",
|
|
"--outdir", str(out_dir), str(src),
|
|
]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, timeout=60,
|
|
)
|
|
except subprocess.TimeoutExpired as e:
|
|
raise ConversionError("soffice timeout") from e
|
|
|
|
if result.returncode != 0:
|
|
raise ConversionError(
|
|
f"soffice exit {result.returncode}: {result.stderr.decode(errors='ignore')}"
|
|
)
|
|
|
|
out = out_dir / (src.stem + ".docx")
|
|
if not out.exists():
|
|
raise ConversionError(f"expected output not found: {out}")
|
|
return out
|
|
|
|
|
|
def _resolve_soffice_path(configured: str) -> str:
|
|
candidates = [configured, *_SOFFICE_FALLBACK_PATHS]
|
|
checked: list[str] = []
|
|
for candidate in candidates:
|
|
if candidate in checked:
|
|
continue
|
|
checked.append(candidate)
|
|
|
|
resolved = shutil.which(candidate)
|
|
if resolved:
|
|
return resolved
|
|
if Path(candidate).exists():
|
|
return candidate
|
|
|
|
raise ConversionError(
|
|
f"soffice not found; checked: {', '.join(checked)}. "
|
|
"Install LibreOffice or set SOFFICE_PATH."
|
|
)
|