feat(docx): 跨平台启动器替换 bash 包装,复用预装依赖免每次安装 (#21)

## 概述 / Summary

把 docx 技能对"客户端预装运行时依赖"的复用方式从 **bash 包装脚本**改为**跨平台 runtime 启动器**,实现
Win/macOS/Linux 一致、不依赖 Git Bash,并修复若干 POSIX 硬编码导致的 Windows 崩溃点。

Switch the docx skill's reuse of client-preinstalled runtime deps from a
**bash wrapper** to **cross-platform runtime launchers**, so it behaves
identically on Win/macOS/Linux without Git Bash, and fix several
POSIX-hardcoded crashes on Windows.

## 改动 / Changes

- **新增 / Add** `scripts/preload-deps.cjs`(Node 预加载,注入 `NODE_PATH`)与
`scripts/with-deps.py`(Python 启动器,按需切换到内置含 lxml 的 Python);**删除** bash 版
`with-deps.sh`。
- 生成走 `node -r preload-deps.cjs`,office 脚本走 `python with-deps.py` ——
离线复用预装的 docx-js / defusedxml / lxml,免每次 `npm`/`pip install`,且**不依赖
bash**。
- `comment.py` 补 defusedxml sys.path shim;`validate.py` 修临时目录泄漏(atexit
清理)。
- `accept_changes.py` 去除 `/tmp` 硬编码(`tempfile.gettempdir` +
`Path.as_uri`);`soffice.py` 仅 Linux 启用 AF_UNIX shim,避免 Windows 崩溃。
- `SKILL.md` / `SKILL.zh-CN.md` 同步命令形式、加 ESM
警告与外部工具(pandoc/LibreOffice/poppler)跨平台安装指引,`source_hash` 重算。

## 测试 / Testing

- 真实 dev 根目录端到端:生成 docx(免安装)+ 完整 XSD 校验(含 lxml)+ unpack/pack 往返均通过。
- 仓库 `validate-i18n.py` 校验通过;全 py 脚本 `py_compile` + `preload-deps.cjs`
`node --check` 通过。

---

- [x] 我已阅读并同意 CLA / I have read and agree to the CLA

Co-authored-by: 张馨元 <zhangxy@iynss.com>
Co-authored-by: Yige <a@wyr.me>
This commit is contained in:
Zxy-y
2026-06-04 11:14:36 +08:00
committed by GitHub
parent b15fce19bf
commit 17fe79ab49
10 changed files with 276 additions and 77 deletions

View File

@@ -5,16 +5,21 @@ Requires LibreOffice (soffice) to be installed.
import argparse
import logging
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from office.soffice import get_soffice_env
logger = logging.getLogger(__name__)
LIBREOFFICE_PROFILE = "/tmp/libreoffice_docx_profile"
MACRO_DIR = f"{LIBREOFFICE_PROFILE}/user/basic/Standard"
# 跨平台临时目录Windows 无 /tmpLibreOffice 的 UserInstallation 需 file:// URI
# Path.as_uri() 在 POSIX/Windows 上分别生成 file:///tmp/... 与 file:///C:/...
LIBREOFFICE_PROFILE = os.path.join(tempfile.gettempdir(), "libreoffice_docx_profile")
LIBREOFFICE_PROFILE_URI = Path(LIBREOFFICE_PROFILE).as_uri()
MACRO_DIR = os.path.join(LIBREOFFICE_PROFILE, "user", "basic", "Standard")
ACCEPT_CHANGES_MACRO = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
@@ -58,7 +63,7 @@ def accept_changes(
cmd = [
"soffice",
"--headless",
f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}",
f"-env:UserInstallation={LIBREOFFICE_PROFILE_URI}",
"--norestore",
"vnd.sun.star.script:Standard.Module1.AcceptAllTrackedChanges?language=Basic&location=application",
str(output_path.absolute()),
@@ -100,7 +105,7 @@ def _setup_libreoffice_macro() -> bool:
[
"soffice",
"--headless",
f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}",
f"-env:UserInstallation={LIBREOFFICE_PROFILE_URI}",
"--terminate_after_init",
],
capture_output=True,

View File

@@ -14,12 +14,18 @@ After running, add markers to document.xml:
"""
import argparse
import os
import random
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
TEMPLATE_DIR = Path(__file__).parent / "templates"

View File

@@ -11,15 +11,28 @@ Examples:
"""
import argparse
import os
import sys
import shutil
import tempfile
import zipfile
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
try:
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
_VALIDATORS_AVAILABLE = True
except ImportError as _e:
# validators 依赖 lxml编译型扩展未随客户端预装。缺失时优雅降级
# 跳过 XSD 完整校验而非崩溃——打包/编辑本身不需要 lxml。
_VALIDATORS_AVAILABLE = False
_VALIDATORS_IMPORT_ERROR = _e
def pack(
input_directory: str,
@@ -39,15 +52,22 @@ def pack(
return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"
if validate and original_file:
original_path = Path(original_file)
if original_path.exists():
success, output = _run_validation(
input_dir, original_path, suffix, infer_author_func
if not _VALIDATORS_AVAILABLE:
print(
"Warning: lxml 未安装,已跳过 XSD 完整校验(文件仍正常打包)。"
"如需完整校验请安装 lxmlpip install lxml",
file=sys.stderr,
)
if output:
print(output)
if not success:
return None, f"Error: Validation failed for {input_dir}"
else:
original_path = Path(original_file)
if original_path.exists():
success, output = _run_validation(
input_dir, original_path, suffix, infer_author_func
)
if output:
print(output)
if not success:
return None, f"Error: Validation failed for {input_dir}"
with tempfile.TemporaryDirectory() as temp_dir:
temp_content_dir = Path(temp_dir) / "content"

View File

@@ -17,6 +17,7 @@ Usage:
import os
import socket
import subprocess
import sys
import tempfile
from pathlib import Path
@@ -42,11 +43,16 @@ _SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so"
def _needs_shim() -> bool:
# AF_UNIX socket 屏蔽的 LD_PRELOAD + gcc(.so) 兜底仅对 Linux 沙箱有意义;
# macOS/Windows 上既无 LD_PRELOAD 机制也无该限制,且 socket.AF_UNIX 在部分
# Windows Python 上不存在AttributeError直接判定为不需要 shim。
if sys.platform != "linux":
return False
try:
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.close()
return False
except OSError:
except (OSError, AttributeError):
return True

View File

@@ -14,10 +14,16 @@ Examples:
"""
import argparse
import os
import sys
import zipfile
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
from helpers.merge_runs import merge_runs as do_merge_runs

View File

@@ -14,15 +14,36 @@ Auto-repair fixes:
"""
import argparse
import atexit
import os
import shutil
import sys
import tempfile
import zipfile
from pathlib import Path
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
# 复用客户端预装的共享 Python 依赖defusedxml 等office → scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
try:
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
_VALIDATORS_AVAILABLE = True
except ImportError:
# validators 依赖 lxml编译型扩展未随客户端预装。缺失时优雅降级
# 提示安装并以成功退出,而非崩溃——避免阻塞依赖本脚本的上层流程。
_VALIDATORS_AVAILABLE = False
def main():
if not _VALIDATORS_AVAILABLE:
print(
"Warning: lxml 未安装,已跳过 XSD 完整校验。如需完整校验请安装 lxmlpip install lxml",
file=sys.stderr,
)
return 0
parser = argparse.ArgumentParser(description="Validate Office document XML files")
parser.add_argument(
"path",
@@ -70,6 +91,8 @@ def main():
if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:
temp_dir = tempfile.mkdtemp()
# 解包目录在进程退出时清理(含 sys.exit避免每次校验泄漏 /tmp 目录
atexit.register(shutil.rmtree, temp_dir, ignore_errors=True)
with zipfile.ZipFile(path, "r") as zf:
zf.extractall(temp_dir)
unpacked_dir = Path(temp_dir)

View File

@@ -0,0 +1,23 @@
// preload-deps.cjs —— 跨平台 Node 预加载(无需 bash让 docx 生成复用客户端预装依赖
//
// 客户端启动时会把 docx-js 预装到 <DESIRECORE_ROOT>/runtime-deps/node_modules/。
// 本文件通过 `node -r` 预加载,把该目录注入模块解析路径,使生成脚本里的
// require('docx') 无需联网 `npm install` 即可命中预装库。纯 Node 实现,在
// Windows / macOS / Linux 上用同一条命令运行,不依赖 bash / Git Bash
//
// node -r "<skill-dir>/scripts/preload-deps.cjs" generate.js
//
// 若预装目录不存在(老客户端 / 离线种子缺失),则不做任何事,由生成脚本自身
// 回退require 失败 → 提示 npm install -g docx。env 仅作用于本进程。
'use strict'
const path = require('path')
const fs = require('fs')
const Module = require('module')
// scripts → docx → skills → <ROOT>;预装 Node 依赖在 <ROOT>/runtime-deps/node_modules
const depsDir = path.resolve(__dirname, '..', '..', '..', 'runtime-deps', 'node_modules')
if (fs.existsSync(depsDir)) {
// path.delimiter 跨平台自动取 ';'(Windows) / ':'(POSIX)
process.env.NODE_PATH = depsDir + (process.env.NODE_PATH ? path.delimiter + process.env.NODE_PATH : '')
Module._initPaths() // 让随后运行的 generate.js 的 require('docx') 命中预装库
}

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""with-deps.py —— 跨平台 Python 启动器(无需 bash对应已废弃的 with-deps.sh
让 office 脚本复用客户端预装的共享依赖,免去运行时 pip install
- defusedxml纯 Python注入 PYTHONPATH<ROOT>/runtime-deps/python-libs
- lxml编译型扩展绑定具体解释器 → 若存在受控 Python<ROOT>/runtime-deps/
python-runtime已装 lxml用它运行目标脚本从而离线启用完整 XSD 校验;
受控 Python 不存在 / 无法执行(如 macOS 公证拦截)→ 自动退回当前 Python
(此时 lxml 缺失,校验会优雅降级跳过,不会崩)。
纯 Python 实现,在 Windows / macOS / Linux 上用同一条命令运行,不依赖 bash
python "<skill-dir>/scripts/with-deps.py" office/unpack.py document.docx unpacked/
python "<skill-dir>/scripts/with-deps.py" office/validate.py doc.docx
目标脚本以 [解释器, 目标, *参数] 直接拉起 —— 等价于 `python <目标>`,因此脚本目录
会被 Python 自动加入 sys.path、__name__ == "__main__"、argv 与直接运行完全一致。
"""
import os
import subprocess
import sys
_HERE = os.path.dirname(os.path.abspath(__file__)) # .../skills/docx/scripts
# scripts → docx → skills → <ROOT>
_ROOT = os.path.abspath(os.path.join(_HERE, "..", "..", ".."))
_DEPS = os.path.join(_ROOT, "runtime-deps")
_PYLIBS = os.path.join(_DEPS, "python-libs")
_BUNDLED = os.path.join(
_DEPS,
"python-runtime",
"python.exe" if os.name == "nt" else os.path.join("bin", "python3"),
)
def main() -> int:
if len(sys.argv) < 2:
sys.stderr.write("usage: with-deps.py <script.py> [args...]\n")
return 2
# 目标脚本相对 scripts/ 解析(如 office/validate.py也支持绝对路径
arg = sys.argv[1]
target = arg if os.path.isabs(arg) else os.path.join(_HERE, arg)
if not os.path.isfile(target):
sys.stderr.write(f"with-deps.py: target not found: {target}\n")
return 2
# 选解释器:有受控 Python含 lxml且当前不是它 → 用它;否则用当前/系统 Python
interp = sys.executable
if os.path.isfile(_BUNDLED) and os.path.realpath(_BUNDLED) != os.path.realpath(sys.executable):
interp = _BUNDLED
# 注入 defusedxmlos.pathsep 跨平台自动 ';' / ':'
env = dict(os.environ)
if os.path.isdir(_PYLIBS):
existing = env.get("PYTHONPATH")
env["PYTHONPATH"] = _PYLIBS + (os.pathsep + existing if existing else "")
cmd = [interp, target, *sys.argv[2:]]
try:
rc = subprocess.run(cmd, env=env).returncode
except OSError:
rc = 126 # 受控 Python 无法启动
# 受控 Python 跑不了rc<0=被信号杀,如 macOS Gatekeeper126/127=无法执行)
# → 退回系统 Python让脚本在缺 lxml 时优雅降级,而不是把整条命令判为失败
if interp != sys.executable and (rc < 0 or rc in (126, 127)):
rc = subprocess.run([sys.executable, target, *sys.argv[2:]], env=env).returncode
return rc
if __name__ == "__main__":
raise SystemExit(main())