feat(docx): 跨平台启动器替换 bash 包装,复用预装依赖免每次安装 (#21)

## 概述 / Summary

把 docx 技能对"客户端预装运行时依赖"的复用方式从 **bash 包装脚本**改为**跨平台 runtime 启动器**,实现
Win/macOS/Linux 一致、不依赖 Git Bash,并修复若干 POSIX 硬编码导致的 Windows 崩溃点。

Switch the docx skill's reuse of client-preinstalled runtime deps from a
**bash wrapper** to **cross-platform runtime launchers**, so it behaves
identically on Win/macOS/Linux without Git Bash, and fix several
POSIX-hardcoded crashes on Windows.

## 改动 / Changes

- **新增 / Add** `scripts/preload-deps.cjs`(Node 预加载,注入 `NODE_PATH`)与
`scripts/with-deps.py`(Python 启动器,按需切换到内置含 lxml 的 Python);**删除** bash 版
`with-deps.sh`。
- 生成走 `node -r preload-deps.cjs`,office 脚本走 `python with-deps.py` ——
离线复用预装的 docx-js / defusedxml / lxml,免每次 `npm`/`pip install`,且**不依赖
bash**。
- `comment.py` 补 defusedxml sys.path shim;`validate.py` 修临时目录泄漏(atexit
清理)。
- `accept_changes.py` 去除 `/tmp` 硬编码(`tempfile.gettempdir` +
`Path.as_uri`);`soffice.py` 仅 Linux 启用 AF_UNIX shim,避免 Windows 崩溃。
- `SKILL.md` / `SKILL.zh-CN.md` 同步命令形式、加 ESM
警告与外部工具(pandoc/LibreOffice/poppler)跨平台安装指引,`source_hash` 重算。

## 测试 / Testing

- 真实 dev 根目录端到端:生成 docx(免安装)+ 完整 XSD 校验(含 lxml)+ unpack/pack 往返均通过。
- 仓库 `validate-i18n.py` 校验通过;全 py 脚本 `py_compile` + `preload-deps.cjs`
`node --check` 通过。

---

- [x] 我已阅读并同意 CLA / I have read and agree to the CLA

Co-authored-by: 张馨元 <zhangxy@iynss.com>
Co-authored-by: Yige <a@wyr.me>
This commit is contained in:
Zxy-y
2026-06-04 11:14:36 +08:00
committed by GitHub
parent b15fce19bf
commit 17fe79ab49
10 changed files with 276 additions and 77 deletions

View File

@@ -11,15 +11,28 @@ Examples:
"""
import argparse
import os
import sys
import shutil
import tempfile
import zipfile
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
try:
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
_VALIDATORS_AVAILABLE = True
except ImportError as _e:
# validators 依赖 lxml编译型扩展未随客户端预装。缺失时优雅降级
# 跳过 XSD 完整校验而非崩溃——打包/编辑本身不需要 lxml。
_VALIDATORS_AVAILABLE = False
_VALIDATORS_IMPORT_ERROR = _e
def pack(
input_directory: str,
@@ -39,15 +52,22 @@ def pack(
return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"
if validate and original_file:
original_path = Path(original_file)
if original_path.exists():
success, output = _run_validation(
input_dir, original_path, suffix, infer_author_func
if not _VALIDATORS_AVAILABLE:
print(
"Warning: lxml 未安装,已跳过 XSD 完整校验(文件仍正常打包)。"
"如需完整校验请安装 lxmlpip install lxml",
file=sys.stderr,
)
if output:
print(output)
if not success:
return None, f"Error: Validation failed for {input_dir}"
else:
original_path = Path(original_file)
if original_path.exists():
success, output = _run_validation(
input_dir, original_path, suffix, infer_author_func
)
if output:
print(output)
if not success:
return None, f"Error: Validation failed for {input_dir}"
with tempfile.TemporaryDirectory() as temp_dir:
temp_content_dir = Path(temp_dir) / "content"

View File

@@ -17,6 +17,7 @@ Usage:
import os
import socket
import subprocess
import sys
import tempfile
from pathlib import Path
@@ -42,11 +43,16 @@ _SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so"
def _needs_shim() -> bool:
# AF_UNIX socket 屏蔽的 LD_PRELOAD + gcc(.so) 兜底仅对 Linux 沙箱有意义;
# macOS/Windows 上既无 LD_PRELOAD 机制也无该限制,且 socket.AF_UNIX 在部分
# Windows Python 上不存在AttributeError直接判定为不需要 shim。
if sys.platform != "linux":
return False
try:
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.close()
return False
except OSError:
except (OSError, AttributeError):
return True

View File

@@ -14,10 +14,16 @@ Examples:
"""
import argparse
import os
import sys
import zipfile
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
from helpers.merge_runs import merge_runs as do_merge_runs

View File

@@ -14,15 +14,36 @@ Auto-repair fixes:
"""
import argparse
import atexit
import os
import shutil
import sys
import tempfile
import zipfile
from pathlib import Path
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
try:
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
_VALIDATORS_AVAILABLE = True
except ImportError:
# validators 依赖 lxml编译型扩展未随客户端预装。缺失时优雅降级
# 提示安装并以成功退出,而非崩溃——避免阻塞依赖本脚本的上层流程。
_VALIDATORS_AVAILABLE = False
def main():
if not _VALIDATORS_AVAILABLE:
print(
"Warning: lxml 未安装,已跳过 XSD 完整校验。如需完整校验请安装 lxmlpip install lxml",
file=sys.stderr,
)
return 0
parser = argparse.ArgumentParser(description="Validate Office document XML files")
parser.add_argument(
"path",
@@ -70,6 +91,8 @@ def main():
if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:
temp_dir = tempfile.mkdtemp()
# 解包目录在进程退出时清理(含 sys.exit避免每次校验泄漏 /tmp 目录
atexit.register(shutil.rmtree, temp_dir, ignore_errors=True)
with zipfile.ZipFile(path, "r") as zf:
zf.extractall(temp_dir)
unpacked_dir = Path(temp_dir)