Files
market/skills/docx/scripts/comment.py
Zxy-y 17fe79ab49 feat(docx): 跨平台启动器替换 bash 包装,复用预装依赖免每次安装 (#21)
## 概述 / Summary

把 docx 技能对"客户端预装运行时依赖"的复用方式从 **bash 包装脚本**改为**跨平台 runtime 启动器**,实现
Win/macOS/Linux 一致、不依赖 Git Bash,并修复若干 POSIX 硬编码导致的 Windows 崩溃点。

Switch the docx skill's reuse of client-preinstalled runtime deps from a
**bash wrapper** to **cross-platform runtime launchers**, so it behaves
identically on Win/macOS/Linux without Git Bash, and fix several
POSIX-hardcoded crashes on Windows.

## 改动 / Changes

- **新增 / Add** `scripts/preload-deps.cjs`(Node 预加载,注入 `NODE_PATH`)与
`scripts/with-deps.py`(Python 启动器,按需切换到内置含 lxml 的 Python);**删除** bash 版
`with-deps.sh`。
- 生成走 `node -r preload-deps.cjs`,office 脚本走 `python with-deps.py` ——
离线复用预装的 docx-js / defusedxml / lxml,免每次 `npm`/`pip install`,且**不依赖
bash**。
- `comment.py` 补 defusedxml sys.path shim;`validate.py` 修临时目录泄漏(atexit
清理)。
- `accept_changes.py` 去除 `/tmp` 硬编码(`tempfile.gettempdir` +
`Path.as_uri`);`soffice.py` 仅 Linux 启用 AF_UNIX shim,避免 Windows 崩溃。
- `SKILL.md` / `SKILL.zh-CN.md` 同步命令形式、加 ESM
警告与外部工具(pandoc/LibreOffice/poppler)跨平台安装指引,`source_hash` 重算。

## 测试 / Testing

- 真实 dev 根目录端到端:生成 docx(免安装)+ 完整 XSD 校验(含 lxml)+ unpack/pack 往返均通过。
- 仓库 `validate-i18n.py` 校验通过;全 py 脚本 `py_compile` + `preload-deps.cjs`
`node --check` 通过。

---

- [x] 我已阅读并同意 CLA / I have read and agree to the CLA

Co-authored-by: 张馨元 <zhangxy@iynss.com>
Co-authored-by: Yige <a@wyr.me>
2026-06-04 11:14:36 +08:00

325 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Add comments to DOCX documents.
Usage:
python comment.py unpacked/ 0 "Comment text"
python comment.py unpacked/ 1 "Reply text" --parent 0
Text should be pre-escaped XML (e.g., &amp; for &, &#x2019; for smart quotes).
After running, add markers to document.xml:
<w:commentRangeStart w:id="0"/>
... commented content ...
<w:commentRangeEnd w:id="0"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
"""
import argparse
import os
import random
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
# 复用客户端预装的共享 Python 依赖defusedxml 等scripts → docx → skills → <ROOT>
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "runtime-deps", "python-libs")
if os.path.isdir(_deps):
sys.path.insert(0, _deps)
import defusedxml.minidom
TEMPLATE_DIR = Path(__file__).parent / "templates"
NS = {
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
"w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
"w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
}
COMMENT_XML = """\
<w:comment w:id="{id}" w:author="{author}" w:date="{date}" w:initials="{initials}">
<w:p w14:paraId="{para_id}" w14:textId="77777777">
<w:r>
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
<w:annotationRef/>
</w:r>
<w:r>
<w:rPr>
<w:color w:val="000000"/>
<w:sz w:val="20"/>
<w:szCs w:val="20"/>
</w:rPr>
<w:t>{text}</w:t>
</w:r>
</w:p>
</w:comment>"""
COMMENT_MARKER_TEMPLATE = """
Add to document.xml (markers must be direct children of w:p, never inside w:r):
<w:commentRangeStart w:id="{cid}"/>
<w:r>...</w:r>
<w:commentRangeEnd w:id="{cid}"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
REPLY_MARKER_TEMPLATE = """
Nest markers inside parent {pid}'s markers (markers must be direct children of w:p, never inside w:r):
<w:commentRangeStart w:id="{pid}"/><w:commentRangeStart w:id="{cid}"/>
<w:r>...</w:r>
<w:commentRangeEnd w:id="{cid}"/><w:commentRangeEnd w:id="{pid}"/>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{pid}"/></w:r>
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
def _generate_hex_id() -> str:
return f"{random.randint(0, 0x7FFFFFFE):08X}"
SMART_QUOTE_ENTITIES = {
"\u201c": "&#x201C;",
"\u201d": "&#x201D;",
"\u2018": "&#x2018;",
"\u2019": "&#x2019;",
}
def _encode_smart_quotes(text: str) -> str:
for char, entity in SMART_QUOTE_ENTITIES.items():
text = text.replace(char, entity)
return text
def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8"))
root = dom.getElementsByTagName(root_tag)[0]
ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items())
wrapper_dom = defusedxml.minidom.parseString(f"<root {ns_attrs}>{content}</root>")
for child in wrapper_dom.documentElement.childNodes:
if child.nodeType == child.ELEMENT_NODE:
root.appendChild(dom.importNode(child, True))
output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
xml_path.write_text(output, encoding="utf-8")
def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8"))
for c in dom.getElementsByTagName("w:comment"):
if c.getAttribute("w:id") == str(comment_id):
for p in c.getElementsByTagName("w:p"):
if pid := p.getAttribute("w14:paraId"):
return pid
return None
def _get_next_rid(rels_path: Path) -> int:
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
max_rid = 0
for rel in dom.getElementsByTagName("Relationship"):
rid = rel.getAttribute("Id")
if rid and rid.startswith("rId"):
try:
max_rid = max(max_rid, int(rid[3:]))
except ValueError:
pass
return max_rid + 1
def _has_relationship(rels_path: Path, target: str) -> bool:
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
for rel in dom.getElementsByTagName("Relationship"):
if rel.getAttribute("Target") == target:
return True
return False
def _has_content_type(ct_path: Path, part_name: str) -> bool:
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
for override in dom.getElementsByTagName("Override"):
if override.getAttribute("PartName") == part_name:
return True
return False
def _ensure_comment_relationships(unpacked_dir: Path) -> None:
rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels"
if not rels_path.exists():
return
if _has_relationship(rels_path, "comments.xml"):
return
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
root = dom.documentElement
next_rid = _get_next_rid(rels_path)
rels = [
(
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
"comments.xml",
),
(
"http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
"commentsExtended.xml",
),
(
"http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
"commentsIds.xml",
),
(
"http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible",
"commentsExtensible.xml",
),
]
for rel_type, target in rels:
rel = dom.createElement("Relationship")
rel.setAttribute("Id", f"rId{next_rid}")
rel.setAttribute("Type", rel_type)
rel.setAttribute("Target", target)
root.appendChild(rel)
next_rid += 1
rels_path.write_bytes(dom.toxml(encoding="UTF-8"))
def _ensure_comment_content_types(unpacked_dir: Path) -> None:
ct_path = unpacked_dir / "[Content_Types].xml"
if not ct_path.exists():
return
if _has_content_type(ct_path, "/word/comments.xml"):
return
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
root = dom.documentElement
overrides = [
(
"/word/comments.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
),
(
"/word/commentsExtended.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
),
(
"/word/commentsIds.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
),
(
"/word/commentsExtensible.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml",
),
]
for part_name, content_type in overrides:
override = dom.createElement("Override")
override.setAttribute("PartName", part_name)
override.setAttribute("ContentType", content_type)
root.appendChild(override)
ct_path.write_bytes(dom.toxml(encoding="UTF-8"))
def add_comment(
unpacked_dir: str,
comment_id: int,
text: str,
author: str = "Claude",
initials: str = "C",
parent_id: int | None = None,
) -> tuple[str, str]:
word = Path(unpacked_dir) / "word"
if not word.exists():
return "", f"Error: {word} not found"
para_id, durable_id = _generate_hex_id(), _generate_hex_id()
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
comments = word / "comments.xml"
first_comment = not comments.exists()
if first_comment:
shutil.copy(TEMPLATE_DIR / "comments.xml", comments)
_ensure_comment_relationships(Path(unpacked_dir))
_ensure_comment_content_types(Path(unpacked_dir))
_append_xml(
comments,
"w:comments",
COMMENT_XML.format(
id=comment_id,
author=author,
date=ts,
initials=initials,
para_id=para_id,
text=text,
),
)
ext = word / "commentsExtended.xml"
if not ext.exists():
shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext)
if parent_id is not None:
parent_para = _find_para_id(comments, parent_id)
if not parent_para:
return "", f"Error: Parent comment {parent_id} not found"
_append_xml(
ext,
"w15:commentsEx",
f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para}" w15:done="0"/>',
)
else:
_append_xml(
ext,
"w15:commentsEx",
f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>',
)
ids = word / "commentsIds.xml"
if not ids.exists():
shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids)
_append_xml(
ids,
"w16cid:commentsIds",
f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>',
)
extensible = word / "commentsExtensible.xml"
if not extensible.exists():
shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible)
_append_xml(
extensible,
"w16cex:commentsExtensible",
f'<w16cex:commentExtensible w16cex:durableId="{durable_id}" w16cex:dateUtc="{ts}"/>',
)
action = "reply" if parent_id is not None else "comment"
return para_id, f"Added {action} {comment_id} (para_id={para_id})"
if __name__ == "__main__":
p = argparse.ArgumentParser(description="Add comments to DOCX documents")
p.add_argument("unpacked_dir", help="Unpacked DOCX directory")
p.add_argument("comment_id", type=int, help="Comment ID (must be unique)")
p.add_argument("text", help="Comment text")
p.add_argument("--author", default="Claude", help="Author name")
p.add_argument("--initials", default="C", help="Author initials")
p.add_argument("--parent", type=int, help="Parent comment ID (for replies)")
args = p.parse_args()
para_id, msg = add_comment(
args.unpacked_dir,
args.comment_id,
args.text,
args.author,
args.initials,
args.parent,
)
print(msg)
if "Error" in msg:
sys.exit(1)
cid = args.comment_id
if args.parent is not None:
print(REPLY_MARKER_TEMPLATE.format(pid=args.parent, cid=cid))
else:
print(COMMENT_MARKER_TEMPLATE.format(cid=cid))