mirror of
https://git.openapi.site/https://github.com/desirecore/market.git
synced 2026-06-06 05:30:39 +08:00
## 概述 / Summary 把 docx 技能对"客户端预装运行时依赖"的复用方式从 **bash 包装脚本**改为**跨平台 runtime 启动器**,实现 Win/macOS/Linux 一致、不依赖 Git Bash,并修复若干 POSIX 硬编码导致的 Windows 崩溃点。 Switch the docx skill's reuse of client-preinstalled runtime deps from a **bash wrapper** to **cross-platform runtime launchers**, so it behaves identically on Win/macOS/Linux without Git Bash, and fix several POSIX-hardcoded crashes on Windows. ## 改动 / Changes - **新增 / Add** `scripts/preload-deps.cjs`(Node 预加载,注入 `NODE_PATH`)与 `scripts/with-deps.py`(Python 启动器,按需切换到内置含 lxml 的 Python);**删除** bash 版 `with-deps.sh`。 - 生成走 `node -r preload-deps.cjs`,office 脚本走 `python with-deps.py` —— 离线复用预装的 docx-js / defusedxml / lxml,免每次 `npm`/`pip install`,且**不依赖 bash**。 - `comment.py` 补 defusedxml sys.path shim;`validate.py` 修临时目录泄漏(atexit 清理)。 - `accept_changes.py` 去除 `/tmp` 硬编码(`tempfile.gettempdir` + `Path.as_uri`);`soffice.py` 仅 Linux 启用 AF_UNIX shim,避免 Windows 崩溃。 - `SKILL.md` / `SKILL.zh-CN.md` 同步命令形式、加 ESM 警告与外部工具(pandoc/LibreOffice/poppler)跨平台安装指引,`source_hash` 重算。 ## 测试 / Testing - 真实 dev 根目录端到端:生成 docx(免安装)+ 完整 XSD 校验(含 lxml)+ unpack/pack 往返均通过。 - 仓库 `validate-i18n.py` 校验通过;全 py 脚本 `py_compile` + `preload-deps.cjs` `node --check` 通过。 --- - [x] 我已阅读并同意 CLA / I have read and agree to the CLA Co-authored-by: 张馨元 <zhangxy@iynss.com> Co-authored-by: Yige <a@wyr.me>
325 lines
11 KiB
Python
325 lines
11 KiB
Python
"""Add comments to DOCX documents.
|
||
|
||
Usage:
|
||
python comment.py unpacked/ 0 "Comment text"
|
||
python comment.py unpacked/ 1 "Reply text" --parent 0
|
||
|
||
Text should be pre-escaped XML (e.g., & for &, ’ for smart quotes).
|
||
|
||
After running, add markers to document.xml:
|
||
<w:commentRangeStart w:id="0"/>
|
||
... commented content ...
|
||
<w:commentRangeEnd w:id="0"/>
|
||
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
|
||
"""
|
||
|
||
import argparse
|
||
import os
|
||
import random
|
||
import shutil
|
||
import sys
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
# 复用客户端预装的共享 Python 依赖(defusedxml 等):scripts → docx → skills → <ROOT>
|
||
_deps = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "runtime-deps", "python-libs")
|
||
if os.path.isdir(_deps):
|
||
sys.path.insert(0, _deps)
|
||
|
||
import defusedxml.minidom
|
||
|
||
TEMPLATE_DIR = Path(__file__).parent / "templates"
|
||
NS = {
|
||
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
||
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
||
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
|
||
"w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
|
||
"w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
|
||
}
|
||
|
||
COMMENT_XML = """\
|
||
<w:comment w:id="{id}" w:author="{author}" w:date="{date}" w:initials="{initials}">
|
||
<w:p w14:paraId="{para_id}" w14:textId="77777777">
|
||
<w:r>
|
||
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
||
<w:annotationRef/>
|
||
</w:r>
|
||
<w:r>
|
||
<w:rPr>
|
||
<w:color w:val="000000"/>
|
||
<w:sz w:val="20"/>
|
||
<w:szCs w:val="20"/>
|
||
</w:rPr>
|
||
<w:t>{text}</w:t>
|
||
</w:r>
|
||
</w:p>
|
||
</w:comment>"""
|
||
|
||
COMMENT_MARKER_TEMPLATE = """
|
||
Add to document.xml (markers must be direct children of w:p, never inside w:r):
|
||
<w:commentRangeStart w:id="{cid}"/>
|
||
<w:r>...</w:r>
|
||
<w:commentRangeEnd w:id="{cid}"/>
|
||
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
|
||
|
||
REPLY_MARKER_TEMPLATE = """
|
||
Nest markers inside parent {pid}'s markers (markers must be direct children of w:p, never inside w:r):
|
||
<w:commentRangeStart w:id="{pid}"/><w:commentRangeStart w:id="{cid}"/>
|
||
<w:r>...</w:r>
|
||
<w:commentRangeEnd w:id="{cid}"/><w:commentRangeEnd w:id="{pid}"/>
|
||
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{pid}"/></w:r>
|
||
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
|
||
|
||
|
||
def _generate_hex_id() -> str:
|
||
return f"{random.randint(0, 0x7FFFFFFE):08X}"
|
||
|
||
|
||
SMART_QUOTE_ENTITIES = {
|
||
"\u201c": "“",
|
||
"\u201d": "”",
|
||
"\u2018": "‘",
|
||
"\u2019": "’",
|
||
}
|
||
|
||
|
||
def _encode_smart_quotes(text: str) -> str:
|
||
for char, entity in SMART_QUOTE_ENTITIES.items():
|
||
text = text.replace(char, entity)
|
||
return text
|
||
|
||
|
||
def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
|
||
dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8"))
|
||
root = dom.getElementsByTagName(root_tag)[0]
|
||
ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items())
|
||
wrapper_dom = defusedxml.minidom.parseString(f"<root {ns_attrs}>{content}</root>")
|
||
for child in wrapper_dom.documentElement.childNodes:
|
||
if child.nodeType == child.ELEMENT_NODE:
|
||
root.appendChild(dom.importNode(child, True))
|
||
output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
|
||
xml_path.write_text(output, encoding="utf-8")
|
||
|
||
|
||
def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
|
||
dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8"))
|
||
for c in dom.getElementsByTagName("w:comment"):
|
||
if c.getAttribute("w:id") == str(comment_id):
|
||
for p in c.getElementsByTagName("w:p"):
|
||
if pid := p.getAttribute("w14:paraId"):
|
||
return pid
|
||
return None
|
||
|
||
|
||
def _get_next_rid(rels_path: Path) -> int:
|
||
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
||
max_rid = 0
|
||
for rel in dom.getElementsByTagName("Relationship"):
|
||
rid = rel.getAttribute("Id")
|
||
if rid and rid.startswith("rId"):
|
||
try:
|
||
max_rid = max(max_rid, int(rid[3:]))
|
||
except ValueError:
|
||
pass
|
||
return max_rid + 1
|
||
|
||
|
||
def _has_relationship(rels_path: Path, target: str) -> bool:
|
||
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
||
for rel in dom.getElementsByTagName("Relationship"):
|
||
if rel.getAttribute("Target") == target:
|
||
return True
|
||
return False
|
||
|
||
|
||
def _has_content_type(ct_path: Path, part_name: str) -> bool:
|
||
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
||
for override in dom.getElementsByTagName("Override"):
|
||
if override.getAttribute("PartName") == part_name:
|
||
return True
|
||
return False
|
||
|
||
|
||
def _ensure_comment_relationships(unpacked_dir: Path) -> None:
|
||
rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels"
|
||
if not rels_path.exists():
|
||
return
|
||
|
||
if _has_relationship(rels_path, "comments.xml"):
|
||
return
|
||
|
||
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
||
root = dom.documentElement
|
||
next_rid = _get_next_rid(rels_path)
|
||
|
||
rels = [
|
||
(
|
||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
|
||
"comments.xml",
|
||
),
|
||
(
|
||
"http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
|
||
"commentsExtended.xml",
|
||
),
|
||
(
|
||
"http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
|
||
"commentsIds.xml",
|
||
),
|
||
(
|
||
"http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible",
|
||
"commentsExtensible.xml",
|
||
),
|
||
]
|
||
|
||
for rel_type, target in rels:
|
||
rel = dom.createElement("Relationship")
|
||
rel.setAttribute("Id", f"rId{next_rid}")
|
||
rel.setAttribute("Type", rel_type)
|
||
rel.setAttribute("Target", target)
|
||
root.appendChild(rel)
|
||
next_rid += 1
|
||
|
||
rels_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
||
|
||
|
||
def _ensure_comment_content_types(unpacked_dir: Path) -> None:
|
||
ct_path = unpacked_dir / "[Content_Types].xml"
|
||
if not ct_path.exists():
|
||
return
|
||
|
||
if _has_content_type(ct_path, "/word/comments.xml"):
|
||
return
|
||
|
||
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
||
root = dom.documentElement
|
||
|
||
overrides = [
|
||
(
|
||
"/word/comments.xml",
|
||
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
|
||
),
|
||
(
|
||
"/word/commentsExtended.xml",
|
||
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
|
||
),
|
||
(
|
||
"/word/commentsIds.xml",
|
||
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
|
||
),
|
||
(
|
||
"/word/commentsExtensible.xml",
|
||
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml",
|
||
),
|
||
]
|
||
|
||
for part_name, content_type in overrides:
|
||
override = dom.createElement("Override")
|
||
override.setAttribute("PartName", part_name)
|
||
override.setAttribute("ContentType", content_type)
|
||
root.appendChild(override)
|
||
|
||
ct_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
||
|
||
|
||
def add_comment(
|
||
unpacked_dir: str,
|
||
comment_id: int,
|
||
text: str,
|
||
author: str = "Claude",
|
||
initials: str = "C",
|
||
parent_id: int | None = None,
|
||
) -> tuple[str, str]:
|
||
word = Path(unpacked_dir) / "word"
|
||
if not word.exists():
|
||
return "", f"Error: {word} not found"
|
||
|
||
para_id, durable_id = _generate_hex_id(), _generate_hex_id()
|
||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
|
||
comments = word / "comments.xml"
|
||
first_comment = not comments.exists()
|
||
if first_comment:
|
||
shutil.copy(TEMPLATE_DIR / "comments.xml", comments)
|
||
_ensure_comment_relationships(Path(unpacked_dir))
|
||
_ensure_comment_content_types(Path(unpacked_dir))
|
||
_append_xml(
|
||
comments,
|
||
"w:comments",
|
||
COMMENT_XML.format(
|
||
id=comment_id,
|
||
author=author,
|
||
date=ts,
|
||
initials=initials,
|
||
para_id=para_id,
|
||
text=text,
|
||
),
|
||
)
|
||
|
||
ext = word / "commentsExtended.xml"
|
||
if not ext.exists():
|
||
shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext)
|
||
if parent_id is not None:
|
||
parent_para = _find_para_id(comments, parent_id)
|
||
if not parent_para:
|
||
return "", f"Error: Parent comment {parent_id} not found"
|
||
_append_xml(
|
||
ext,
|
||
"w15:commentsEx",
|
||
f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para}" w15:done="0"/>',
|
||
)
|
||
else:
|
||
_append_xml(
|
||
ext,
|
||
"w15:commentsEx",
|
||
f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>',
|
||
)
|
||
|
||
ids = word / "commentsIds.xml"
|
||
if not ids.exists():
|
||
shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids)
|
||
_append_xml(
|
||
ids,
|
||
"w16cid:commentsIds",
|
||
f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>',
|
||
)
|
||
|
||
extensible = word / "commentsExtensible.xml"
|
||
if not extensible.exists():
|
||
shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible)
|
||
_append_xml(
|
||
extensible,
|
||
"w16cex:commentsExtensible",
|
||
f'<w16cex:commentExtensible w16cex:durableId="{durable_id}" w16cex:dateUtc="{ts}"/>',
|
||
)
|
||
|
||
action = "reply" if parent_id is not None else "comment"
|
||
return para_id, f"Added {action} {comment_id} (para_id={para_id})"
|
||
|
||
|
||
if __name__ == "__main__":
|
||
p = argparse.ArgumentParser(description="Add comments to DOCX documents")
|
||
p.add_argument("unpacked_dir", help="Unpacked DOCX directory")
|
||
p.add_argument("comment_id", type=int, help="Comment ID (must be unique)")
|
||
p.add_argument("text", help="Comment text")
|
||
p.add_argument("--author", default="Claude", help="Author name")
|
||
p.add_argument("--initials", default="C", help="Author initials")
|
||
p.add_argument("--parent", type=int, help="Parent comment ID (for replies)")
|
||
args = p.parse_args()
|
||
|
||
para_id, msg = add_comment(
|
||
args.unpacked_dir,
|
||
args.comment_id,
|
||
args.text,
|
||
args.author,
|
||
args.initials,
|
||
args.parent,
|
||
)
|
||
print(msg)
|
||
if "Error" in msg:
|
||
sys.exit(1)
|
||
cid = args.comment_id
|
||
if args.parent is not None:
|
||
print(REPLY_MARKER_TEMPLATE.format(pid=args.parent, cid=cid))
|
||
else:
|
||
print(COMMENT_MARKER_TEMPLATE.format(cid=cid))
|