diff --git a/builtin-skills.json b/builtin-skills.json
index a18f829..e7318ac 100644
--- a/builtin-skills.json
+++ b/builtin-skills.json
@@ -3,10 +3,14 @@
"create-agent",
"delete-agent",
"discover-agent",
+ "docx",
"manage-skills",
"manage-teams",
+ "pdf",
+ "pptx",
"s3-storage-operations",
"skill-creator",
- "update-agent"
+ "update-agent",
+ "xlsx"
]
}
diff --git a/skills/docx/SKILL.md b/skills/docx/SKILL.md
new file mode 100644
index 0000000..4d6a1b0
--- /dev/null
+++ b/skills/docx/SKILL.md
@@ -0,0 +1,510 @@
+---
+name: Word 文档处理
+description: >-
+ Use this skill whenever the user wants to create, read, edit, or manipulate
+ Word documents (.docx files). Triggers include: any mention of "Word doc",
+ "word document", ".docx", or requests to produce professional documents with
+ formatting like tables of contents, headings, page numbers, or letterheads.
+ Also use when extracting or reorganizing content from .docx files, inserting
+ or replacing images in documents, performing find-and-replace in Word files,
+ working with tracked changes or comments, or converting content into a polished
+ Word document. If the user asks for a "report", "memo", "letter", "template",
+ or similar deliverable as a Word or .docx file, use this skill. Do NOT use for
+ PDFs, spreadsheets, Google Docs, or general coding tasks unrelated to document
+ generation.
+version: 1.0.0
+type: procedural
+risk_level: low
+status: enabled
+tags:
+ - docx
+ - word
+ - document
+ - office
+metadata:
+ author: anthropic
+ updated_at: '2026-04-04'
+market:
+ short_desc: 创建、编辑和处理 Word 文档(.docx)
+ category: productivity
+ maintainer:
+ name: DesireCore Official
+ verified: true
+ channel: latest
+---
+
+# DOCX creation, editing, and analysis
+
+## Overview
+
+A .docx file is a ZIP archive containing XML files.
+
+## Quick Reference
+
+| Task | Approach |
+|------|----------|
+| Read/analyze content | `pandoc` or unpack for raw XML |
+| Create new document | Use `docx-js` - see Creating New Documents below |
+| Edit existing document | Unpack → edit XML → repack - see Editing Existing Documents below |
+
+### Converting .doc to .docx
+
+Legacy `.doc` files must be converted before editing:
+
+```bash
+python scripts/office/soffice.py --headless --convert-to docx document.doc
+```
+
+### Reading Content
+
+```bash
+# Text extraction with tracked changes
+pandoc --track-changes=all document.docx -o output.md
+
+# Raw XML access
+python scripts/office/unpack.py document.docx unpacked/
+```
+
+### Converting to Images
+
+```bash
+python scripts/office/soffice.py --headless --convert-to pdf document.docx
+pdftoppm -jpeg -r 150 document.pdf page
+```
+
+### Accepting Tracked Changes
+
+To produce a clean document with all tracked changes accepted (requires LibreOffice):
+
+```bash
+python scripts/accept_changes.py input.docx output.docx
+```
+
+---
+
+## Creating New Documents
+
+Generate .docx files with JavaScript, then validate. Install: `npm install -g docx`
+
+### Setup
+```javascript
+const { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell, ImageRun,
+ Header, Footer, AlignmentType, PageOrientation, LevelFormat, ExternalHyperlink,
+ TableOfContents, HeadingLevel, BorderStyle, WidthType, ShadingType,
+ VerticalAlign, PageNumber, PageBreak } = require('docx');
+
+const doc = new Document({ sections: [{ children: [/* content */] }] });
+Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer));
+```
+
+### Validation
+After creating the file, validate it. If validation fails, unpack, fix the XML, and repack.
+```bash
+python scripts/office/validate.py doc.docx
+```
+
+### Page Size
+
+```javascript
+// CRITICAL: docx-js defaults to A4, not US Letter
+// Always set page size explicitly for consistent results
+sections: [{
+ properties: {
+ page: {
+ size: {
+ width: 12240, // 8.5 inches in DXA
+ height: 15840 // 11 inches in DXA
+ },
+ margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } // 1 inch margins
+ }
+ },
+ children: [/* content */]
+}]
+```
+
+**Common page sizes (DXA units, 1440 DXA = 1 inch):**
+
+| Paper | Width | Height | Content Width (1" margins) |
+|-------|-------|--------|---------------------------|
+| US Letter | 12,240 | 15,840 | 9,360 |
+| A4 (default) | 11,906 | 16,838 | 9,026 |
+
+**Landscape orientation:** docx-js swaps width/height internally, so pass portrait dimensions and let it handle the swap:
+```javascript
+size: {
+ width: 12240, // Pass SHORT edge as width
+ height: 15840, // Pass LONG edge as height
+ orientation: PageOrientation.LANDSCAPE // docx-js swaps them in the XML
+},
+// Content width = 15840 - left margin - right margin (uses the long edge)
+```
+
+### Styles (Override Built-in Headings)
+
+Use Arial as the default font (universally supported). Keep titles black for readability.
+
+```javascript
+const doc = new Document({
+ styles: {
+ default: { document: { run: { font: "Arial", size: 24 } } }, // 12pt default
+ paragraphStyles: [
+ // IMPORTANT: Use exact IDs to override built-in styles
+ { id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true,
+ run: { size: 32, bold: true, font: "Arial" },
+ paragraph: { spacing: { before: 240, after: 240 }, outlineLevel: 0 } }, // outlineLevel required for TOC
+ { id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true,
+ run: { size: 28, bold: true, font: "Arial" },
+ paragraph: { spacing: { before: 180, after: 180 }, outlineLevel: 1 } },
+ ]
+ },
+ sections: [{
+ children: [
+ new Paragraph({ heading: HeadingLevel.HEADING_1, children: [new TextRun("Title")] }),
+ ]
+ }]
+});
+```
+
+### Lists (NEVER use unicode bullets)
+
+```javascript
+// ❌ WRONG - never manually insert bullet characters
+new Paragraph({ children: [new TextRun("• Item")] }) // BAD
+new Paragraph({ children: [new TextRun("\u2022 Item")] }) // BAD
+
+// ✅ CORRECT - use numbering config with LevelFormat.BULLET
+const doc = new Document({
+ numbering: {
+ config: [
+ { reference: "bullets",
+ levels: [{ level: 0, format: LevelFormat.BULLET, text: "•", alignment: AlignmentType.LEFT,
+ style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
+ { reference: "numbers",
+ levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.", alignment: AlignmentType.LEFT,
+ style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
+ ]
+ },
+ sections: [{
+ children: [
+ new Paragraph({ numbering: { reference: "bullets", level: 0 },
+ children: [new TextRun("Bullet item")] }),
+ new Paragraph({ numbering: { reference: "numbers", level: 0 },
+ children: [new TextRun("Numbered item")] }),
+ ]
+ }]
+});
+
+// ⚠️ Each reference creates INDEPENDENT numbering
+// Same reference = continues (1,2,3 then 4,5,6)
+// Different reference = restarts (1,2,3 then 1,2,3)
+```
+
+### Tables
+
+**CRITICAL: Tables need dual widths** - set both `columnWidths` on the table AND `width` on each cell. Without both, tables render incorrectly on some platforms.
+
+```javascript
+// CRITICAL: Always set table width for consistent rendering
+// CRITICAL: Use ShadingType.CLEAR (not SOLID) to prevent black backgrounds
+const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
+const borders = { top: border, bottom: border, left: border, right: border };
+
+new Table({
+ width: { size: 9360, type: WidthType.DXA }, // Always use DXA (percentages break in Google Docs)
+ columnWidths: [4680, 4680], // Must sum to table width (DXA: 1440 = 1 inch)
+ rows: [
+ new TableRow({
+ children: [
+ new TableCell({
+ borders,
+ width: { size: 4680, type: WidthType.DXA }, // Also set on each cell
+ shading: { fill: "D5E8F0", type: ShadingType.CLEAR }, // CLEAR not SOLID
+ margins: { top: 80, bottom: 80, left: 120, right: 120 }, // Cell padding (internal, not added to width)
+ children: [new Paragraph({ children: [new TextRun("Cell")] })]
+ })
+ ]
+ })
+ ]
+})
+```
+
+**Table width calculation:**
+
+Always use `WidthType.DXA` — `WidthType.PERCENTAGE` breaks in Google Docs.
+
+```javascript
+// Table width = sum of columnWidths = content width
+// US Letter with 1" margins: 12240 - 2880 = 9360 DXA
+width: { size: 9360, type: WidthType.DXA },
+columnWidths: [7000, 2360] // Must sum to table width
+```
+
+**Width rules:**
+- **Always use `WidthType.DXA`** — never `WidthType.PERCENTAGE` (incompatible with Google Docs)
+- Table width must equal the sum of `columnWidths`
+- Cell `width` must match corresponding `columnWidth`
+- Cell `margins` are internal padding - they reduce content area, not add to cell width
+- For full-width tables: use content width (page width minus left and right margins)
+
+### Images
+
+```javascript
+// CRITICAL: type parameter is REQUIRED
+new Paragraph({
+ children: [new ImageRun({
+ type: "png", // Required: png, jpg, jpeg, gif, bmp, svg
+ data: fs.readFileSync("image.png"),
+ transformation: { width: 200, height: 150 },
+ altText: { title: "Title", description: "Desc", name: "Name" } // All three required
+ })]
+})
+```
+
+### Page Breaks
+
+```javascript
+// CRITICAL: PageBreak must be inside a Paragraph
+new Paragraph({ children: [new PageBreak()] })
+
+// Or use pageBreakBefore
+new Paragraph({ pageBreakBefore: true, children: [new TextRun("New page")] })
+```
+
+### Table of Contents
+
+```javascript
+// CRITICAL: Headings must use HeadingLevel ONLY - no custom styles
+new TableOfContents("Table of Contents", { hyperlink: true, headingStyleRange: "1-3" })
+```
+
+### Headers/Footers
+
+```javascript
+sections: [{
+ properties: {
+ page: { margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } } // 1440 = 1 inch
+ },
+ headers: {
+ default: new Header({ children: [new Paragraph({ children: [new TextRun("Header")] })] })
+ },
+ footers: {
+ default: new Footer({ children: [new Paragraph({
+ children: [new TextRun("Page "), new TextRun({ children: [PageNumber.CURRENT] })]
+ })] })
+ },
+ children: [/* content */]
+}]
+```
+
+### Critical Rules for docx-js
+
+- **Set page size explicitly** - docx-js defaults to A4; use US Letter (12240 x 15840 DXA) for US documents
+- **Landscape: pass portrait dimensions** - docx-js swaps width/height internally; pass short edge as `width`, long edge as `height`, and set `orientation: PageOrientation.LANDSCAPE`
+- **Never use `\n`** - use separate Paragraph elements
+- **Never use unicode bullets** - use `LevelFormat.BULLET` with numbering config
+- **PageBreak must be in Paragraph** - standalone creates invalid XML
+- **ImageRun requires `type`** - always specify png/jpg/etc
+- **Always set table `width` with DXA** - never use `WidthType.PERCENTAGE` (breaks in Google Docs)
+- **Tables need dual widths** - `columnWidths` array AND cell `width`, both must match
+- **Table width = sum of columnWidths** - for DXA, ensure they add up exactly
+- **Always add cell margins** - use `margins: { top: 80, bottom: 80, left: 120, right: 120 }` for readable padding
+- **Use `ShadingType.CLEAR`** - never SOLID for table shading
+- **TOC requires HeadingLevel only** - no custom styles on heading paragraphs
+- **Override built-in styles** - use exact IDs: "Heading1", "Heading2", etc.
+- **Include `outlineLevel`** - required for TOC (0 for H1, 1 for H2, etc.)
+
+---
+
+## Editing Existing Documents
+
+**Follow all 3 steps in order.**
+
+### Step 1: Unpack
+```bash
+python scripts/office/unpack.py document.docx unpacked/
+```
+Extracts XML, pretty-prints, merges adjacent runs, and converts smart quotes to XML entities (`“` etc.) so they survive editing. Use `--merge-runs false` to skip run merging.
+
+### Step 2: Edit XML
+
+Edit files in `unpacked/word/`. See XML Reference below for patterns.
+
+**Use "Claude" as the author** for tracked changes and comments, unless the user explicitly requests use of a different name.
+
+**Use the Edit tool directly for string replacement. Do not write Python scripts.** Scripts introduce unnecessary complexity. The Edit tool shows exactly what is being replaced.
+
+**CRITICAL: Use smart quotes for new content.** When adding text with apostrophes or quotes, use XML entities to produce smart quotes:
+```xml
+
+Here’s a quote: “Hello”
+```
+| Entity | Character |
+|--------|-----------|
+| `‘` | ‘ (left single) |
+| `’` | ’ (right single / apostrophe) |
+| `“` | “ (left double) |
+| `”` | ” (right double) |
+
+**Adding comments:** Use `comment.py` to handle boilerplate across multiple XML files (text must be pre-escaped XML):
+```bash
+python scripts/comment.py unpacked/ 0 "Comment text with & and ’"
+python scripts/comment.py unpacked/ 1 "Reply text" --parent 0 # reply to comment 0
+python scripts/comment.py unpacked/ 0 "Text" --author "Custom Author" # custom author name
+```
+Then add markers to document.xml (see Comments in XML Reference).
+
+### Step 3: Pack
+```bash
+python scripts/office/pack.py unpacked/ output.docx --original document.docx
+```
+Validates with auto-repair, condenses XML, and creates DOCX. Use `--validate false` to skip.
+
+**Auto-repair will fix:**
+- `durableId` >= 0x7FFFFFFF (regenerates valid ID)
+- Missing `xml:space="preserve"` on `` with whitespace
+
+**Auto-repair won't fix:**
+- Malformed XML, invalid element nesting, missing relationships, schema violations
+
+### Common Pitfalls
+
+- **Replace entire `` elements**: When adding tracked changes, replace the whole `...` block with `......` as siblings. Don't inject tracked change tags inside a run.
+- **Preserve `` formatting**: Copy the original run's `` block into your tracked change runs to maintain bold, font size, etc.
+
+---
+
+## XML Reference
+
+### Schema Compliance
+
+- **Element order in ``**: ``, ``, ``, ``, ``, `` last
+- **Whitespace**: Add `xml:space="preserve"` to `` with leading/trailing spaces
+- **RSIDs**: Must be 8-digit hex (e.g., `00AB1234`)
+
+### Tracked Changes
+
+**Insertion:**
+```xml
+
+ inserted text
+
+```
+
+**Deletion:**
+```xml
+
+ deleted text
+
+```
+
+**Inside ``**: Use `` instead of ``, and `` instead of ``.
+
+**Minimal edits** - only mark what changes:
+```xml
+
+The term is
+
+ 30
+
+
+ 60
+
+ days.
+```
+
+**Deleting entire paragraphs/list items** - when removing ALL content from a paragraph, also mark the paragraph mark as deleted so it merges with the next paragraph. Add `` inside ``:
+```xml
+
+
+ ...
+
+
+
+
+
+ Entire paragraph content being deleted...
+
+
+```
+Without the `` in ``, accepting changes leaves an empty paragraph/list item.
+
+**Rejecting another author's insertion** - nest deletion inside their insertion:
+```xml
+
+
+ their inserted text
+
+
+```
+
+**Restoring another author's deletion** - add insertion after (don't modify their deletion):
+```xml
+
+ deleted text
+
+
+ deleted text
+
+```
+
+### Comments
+
+After running `comment.py` (see Step 2), add markers to document.xml. For replies, use `--parent` flag and nest markers inside the parent's.
+
+**CRITICAL: `` and `` are siblings of ``, never inside ``.**
+
+```xml
+
+
+
+ deleted
+
+ more text
+
+
+
+
+
+
+ text
+
+
+
+
+```
+
+### Images
+
+1. Add image file to `word/media/`
+2. Add relationship to `word/_rels/document.xml.rels`:
+```xml
+
+```
+3. Add content type to `[Content_Types].xml`:
+```xml
+
+```
+4. Reference in document.xml:
+```xml
+
+
+
+
+
+
+
+
+
+
+
+
+```
+
+---
+
+## Dependencies
+
+- **pandoc**: Text extraction
+- **docx**: `npm install -g docx` (new documents)
+- **LibreOffice**: PDF conversion (auto-configured for sandboxed environments via `scripts/office/soffice.py`)
+- **Poppler**: `pdftoppm` for images
diff --git a/skills/docx/scripts/__init__.py b/skills/docx/scripts/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/skills/docx/scripts/__init__.py
@@ -0,0 +1 @@
+
diff --git a/skills/docx/scripts/accept_changes.py b/skills/docx/scripts/accept_changes.py
new file mode 100644
index 0000000..8e36316
--- /dev/null
+++ b/skills/docx/scripts/accept_changes.py
@@ -0,0 +1,135 @@
+"""Accept all tracked changes in a DOCX file using LibreOffice.
+
+Requires LibreOffice (soffice) to be installed.
+"""
+
+import argparse
+import logging
+import shutil
+import subprocess
+from pathlib import Path
+
+from office.soffice import get_soffice_env
+
+logger = logging.getLogger(__name__)
+
+LIBREOFFICE_PROFILE = "/tmp/libreoffice_docx_profile"
+MACRO_DIR = f"{LIBREOFFICE_PROFILE}/user/basic/Standard"
+
+ACCEPT_CHANGES_MACRO = """
+
+
+ Sub AcceptAllTrackedChanges()
+ Dim document As Object
+ Dim dispatcher As Object
+
+ document = ThisComponent.CurrentController.Frame
+ dispatcher = createUnoService("com.sun.star.frame.DispatchHelper")
+
+ dispatcher.executeDispatch(document, ".uno:AcceptAllTrackedChanges", "", 0, Array())
+ ThisComponent.store()
+ ThisComponent.close(True)
+ End Sub
+"""
+
+
+def accept_changes(
+ input_file: str,
+ output_file: str,
+) -> tuple[None, str]:
+ input_path = Path(input_file)
+ output_path = Path(output_file)
+
+ if not input_path.exists():
+ return None, f"Error: Input file not found: {input_file}"
+
+ if not input_path.suffix.lower() == ".docx":
+ return None, f"Error: Input file is not a DOCX file: {input_file}"
+
+ try:
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(input_path, output_path)
+ except Exception as e:
+ return None, f"Error: Failed to copy input file to output location: {e}"
+
+ if not _setup_libreoffice_macro():
+ return None, "Error: Failed to setup LibreOffice macro"
+
+ cmd = [
+ "soffice",
+ "--headless",
+ f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}",
+ "--norestore",
+ "vnd.sun.star.script:Standard.Module1.AcceptAllTrackedChanges?language=Basic&location=application",
+ str(output_path.absolute()),
+ ]
+
+ try:
+ result = subprocess.run(
+ cmd,
+ capture_output=True,
+ text=True,
+ timeout=30,
+ check=False,
+ env=get_soffice_env(),
+ )
+ except subprocess.TimeoutExpired:
+ return (
+ None,
+ f"Successfully accepted all tracked changes: {input_file} -> {output_file}",
+ )
+
+ if result.returncode != 0:
+ return None, f"Error: LibreOffice failed: {result.stderr}"
+
+ return (
+ None,
+ f"Successfully accepted all tracked changes: {input_file} -> {output_file}",
+ )
+
+
+def _setup_libreoffice_macro() -> bool:
+ macro_dir = Path(MACRO_DIR)
+ macro_file = macro_dir / "Module1.xba"
+
+ if macro_file.exists() and "AcceptAllTrackedChanges" in macro_file.read_text():
+ return True
+
+ if not macro_dir.exists():
+ subprocess.run(
+ [
+ "soffice",
+ "--headless",
+ f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}",
+ "--terminate_after_init",
+ ],
+ capture_output=True,
+ timeout=10,
+ check=False,
+ env=get_soffice_env(),
+ )
+ macro_dir.mkdir(parents=True, exist_ok=True)
+
+ try:
+ macro_file.write_text(ACCEPT_CHANGES_MACRO)
+ return True
+ except Exception as e:
+ logger.warning(f"Failed to setup LibreOffice macro: {e}")
+ return False
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Accept all tracked changes in a DOCX file"
+ )
+ parser.add_argument("input_file", help="Input DOCX file with tracked changes")
+ parser.add_argument(
+ "output_file", help="Output DOCX file (clean, no tracked changes)"
+ )
+ args = parser.parse_args()
+
+ _, message = accept_changes(args.input_file, args.output_file)
+ print(message)
+
+ if "Error" in message:
+ raise SystemExit(1)
diff --git a/skills/docx/scripts/comment.py b/skills/docx/scripts/comment.py
new file mode 100644
index 0000000..36e1c93
--- /dev/null
+++ b/skills/docx/scripts/comment.py
@@ -0,0 +1,318 @@
+"""Add comments to DOCX documents.
+
+Usage:
+ python comment.py unpacked/ 0 "Comment text"
+ python comment.py unpacked/ 1 "Reply text" --parent 0
+
+Text should be pre-escaped XML (e.g., & for &, ’ for smart quotes).
+
+After running, add markers to document.xml:
+
+ ... commented content ...
+
+
+"""
+
+import argparse
+import random
+import shutil
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import defusedxml.minidom
+
+TEMPLATE_DIR = Path(__file__).parent / "templates"
+NS = {
+ "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
+ "w14": "http://schemas.microsoft.com/office/word/2010/wordml",
+ "w15": "http://schemas.microsoft.com/office/word/2012/wordml",
+ "w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
+ "w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
+}
+
+COMMENT_XML = """\
+
+
+
+
+
+
+
+
+
+
+
+
+ {text}
+
+
+"""
+
+COMMENT_MARKER_TEMPLATE = """
+Add to document.xml (markers must be direct children of w:p, never inside w:r):
+
+ ...
+
+ """
+
+REPLY_MARKER_TEMPLATE = """
+Nest markers inside parent {pid}'s markers (markers must be direct children of w:p, never inside w:r):
+
+ ...
+
+
+ """
+
+
+def _generate_hex_id() -> str:
+ return f"{random.randint(0, 0x7FFFFFFE):08X}"
+
+
+SMART_QUOTE_ENTITIES = {
+ "\u201c": "“",
+ "\u201d": "”",
+ "\u2018": "‘",
+ "\u2019": "’",
+}
+
+
+def _encode_smart_quotes(text: str) -> str:
+ for char, entity in SMART_QUOTE_ENTITIES.items():
+ text = text.replace(char, entity)
+ return text
+
+
+def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
+ dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8"))
+ root = dom.getElementsByTagName(root_tag)[0]
+ ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items())
+ wrapper_dom = defusedxml.minidom.parseString(f"{content}")
+ for child in wrapper_dom.documentElement.childNodes:
+ if child.nodeType == child.ELEMENT_NODE:
+ root.appendChild(dom.importNode(child, True))
+ output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
+ xml_path.write_text(output, encoding="utf-8")
+
+
+def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
+ dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8"))
+ for c in dom.getElementsByTagName("w:comment"):
+ if c.getAttribute("w:id") == str(comment_id):
+ for p in c.getElementsByTagName("w:p"):
+ if pid := p.getAttribute("w14:paraId"):
+ return pid
+ return None
+
+
+def _get_next_rid(rels_path: Path) -> int:
+ dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
+ max_rid = 0
+ for rel in dom.getElementsByTagName("Relationship"):
+ rid = rel.getAttribute("Id")
+ if rid and rid.startswith("rId"):
+ try:
+ max_rid = max(max_rid, int(rid[3:]))
+ except ValueError:
+ pass
+ return max_rid + 1
+
+
+def _has_relationship(rels_path: Path, target: str) -> bool:
+ dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
+ for rel in dom.getElementsByTagName("Relationship"):
+ if rel.getAttribute("Target") == target:
+ return True
+ return False
+
+
+def _has_content_type(ct_path: Path, part_name: str) -> bool:
+ dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
+ for override in dom.getElementsByTagName("Override"):
+ if override.getAttribute("PartName") == part_name:
+ return True
+ return False
+
+
+def _ensure_comment_relationships(unpacked_dir: Path) -> None:
+ rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels"
+ if not rels_path.exists():
+ return
+
+ if _has_relationship(rels_path, "comments.xml"):
+ return
+
+ dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
+ root = dom.documentElement
+ next_rid = _get_next_rid(rels_path)
+
+ rels = [
+ (
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
+ "comments.xml",
+ ),
+ (
+ "http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
+ "commentsExtended.xml",
+ ),
+ (
+ "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
+ "commentsIds.xml",
+ ),
+ (
+ "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible",
+ "commentsExtensible.xml",
+ ),
+ ]
+
+ for rel_type, target in rels:
+ rel = dom.createElement("Relationship")
+ rel.setAttribute("Id", f"rId{next_rid}")
+ rel.setAttribute("Type", rel_type)
+ rel.setAttribute("Target", target)
+ root.appendChild(rel)
+ next_rid += 1
+
+ rels_path.write_bytes(dom.toxml(encoding="UTF-8"))
+
+
+def _ensure_comment_content_types(unpacked_dir: Path) -> None:
+ ct_path = unpacked_dir / "[Content_Types].xml"
+ if not ct_path.exists():
+ return
+
+ if _has_content_type(ct_path, "/word/comments.xml"):
+ return
+
+ dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
+ root = dom.documentElement
+
+ overrides = [
+ (
+ "/word/comments.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
+ ),
+ (
+ "/word/commentsExtended.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
+ ),
+ (
+ "/word/commentsIds.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
+ ),
+ (
+ "/word/commentsExtensible.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml",
+ ),
+ ]
+
+ for part_name, content_type in overrides:
+ override = dom.createElement("Override")
+ override.setAttribute("PartName", part_name)
+ override.setAttribute("ContentType", content_type)
+ root.appendChild(override)
+
+ ct_path.write_bytes(dom.toxml(encoding="UTF-8"))
+
+
+def add_comment(
+ unpacked_dir: str,
+ comment_id: int,
+ text: str,
+ author: str = "Claude",
+ initials: str = "C",
+ parent_id: int | None = None,
+) -> tuple[str, str]:
+ word = Path(unpacked_dir) / "word"
+ if not word.exists():
+ return "", f"Error: {word} not found"
+
+ para_id, durable_id = _generate_hex_id(), _generate_hex_id()
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ comments = word / "comments.xml"
+ first_comment = not comments.exists()
+ if first_comment:
+ shutil.copy(TEMPLATE_DIR / "comments.xml", comments)
+ _ensure_comment_relationships(Path(unpacked_dir))
+ _ensure_comment_content_types(Path(unpacked_dir))
+ _append_xml(
+ comments,
+ "w:comments",
+ COMMENT_XML.format(
+ id=comment_id,
+ author=author,
+ date=ts,
+ initials=initials,
+ para_id=para_id,
+ text=text,
+ ),
+ )
+
+ ext = word / "commentsExtended.xml"
+ if not ext.exists():
+ shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext)
+ if parent_id is not None:
+ parent_para = _find_para_id(comments, parent_id)
+ if not parent_para:
+ return "", f"Error: Parent comment {parent_id} not found"
+ _append_xml(
+ ext,
+ "w15:commentsEx",
+ f'',
+ )
+ else:
+ _append_xml(
+ ext,
+ "w15:commentsEx",
+ f'',
+ )
+
+ ids = word / "commentsIds.xml"
+ if not ids.exists():
+ shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids)
+ _append_xml(
+ ids,
+ "w16cid:commentsIds",
+ f'',
+ )
+
+ extensible = word / "commentsExtensible.xml"
+ if not extensible.exists():
+ shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible)
+ _append_xml(
+ extensible,
+ "w16cex:commentsExtensible",
+ f'',
+ )
+
+ action = "reply" if parent_id is not None else "comment"
+ return para_id, f"Added {action} {comment_id} (para_id={para_id})"
+
+
+if __name__ == "__main__":
+ p = argparse.ArgumentParser(description="Add comments to DOCX documents")
+ p.add_argument("unpacked_dir", help="Unpacked DOCX directory")
+ p.add_argument("comment_id", type=int, help="Comment ID (must be unique)")
+ p.add_argument("text", help="Comment text")
+ p.add_argument("--author", default="Claude", help="Author name")
+ p.add_argument("--initials", default="C", help="Author initials")
+ p.add_argument("--parent", type=int, help="Parent comment ID (for replies)")
+ args = p.parse_args()
+
+ para_id, msg = add_comment(
+ args.unpacked_dir,
+ args.comment_id,
+ args.text,
+ args.author,
+ args.initials,
+ args.parent,
+ )
+ print(msg)
+ if "Error" in msg:
+ sys.exit(1)
+ cid = args.comment_id
+ if args.parent is not None:
+ print(REPLY_MARKER_TEMPLATE.format(pid=args.parent, cid=cid))
+ else:
+ print(COMMENT_MARKER_TEMPLATE.format(cid=cid))
diff --git a/skills/docx/scripts/office/helpers/__init__.py b/skills/docx/scripts/office/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/skills/docx/scripts/office/helpers/merge_runs.py b/skills/docx/scripts/office/helpers/merge_runs.py
new file mode 100644
index 0000000..ad7c25e
--- /dev/null
+++ b/skills/docx/scripts/office/helpers/merge_runs.py
@@ -0,0 +1,199 @@
+"""Merge adjacent runs with identical formatting in DOCX.
+
+Merges adjacent elements that have identical properties.
+Works on runs in paragraphs and inside tracked changes (, ).
+
+Also:
+- Removes rsid attributes from runs (revision metadata that doesn't affect rendering)
+- Removes proofErr elements (spell/grammar markers that block merging)
+"""
+
+from pathlib import Path
+
+import defusedxml.minidom
+
+
+def merge_runs(input_dir: str) -> tuple[int, str]:
+ doc_xml = Path(input_dir) / "word" / "document.xml"
+
+ if not doc_xml.exists():
+ return 0, f"Error: {doc_xml} not found"
+
+ try:
+ dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
+ root = dom.documentElement
+
+ _remove_elements(root, "proofErr")
+ _strip_run_rsid_attrs(root)
+
+ containers = {run.parentNode for run in _find_elements(root, "r")}
+
+ merge_count = 0
+ for container in containers:
+ merge_count += _merge_runs_in(container)
+
+ doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
+ return merge_count, f"Merged {merge_count} runs"
+
+ except Exception as e:
+ return 0, f"Error: {e}"
+
+
+
+
+def _find_elements(root, tag: str) -> list:
+ results = []
+
+ def traverse(node):
+ if node.nodeType == node.ELEMENT_NODE:
+ name = node.localName or node.tagName
+ if name == tag or name.endswith(f":{tag}"):
+ results.append(node)
+ for child in node.childNodes:
+ traverse(child)
+
+ traverse(root)
+ return results
+
+
+def _get_child(parent, tag: str):
+ for child in parent.childNodes:
+ if child.nodeType == child.ELEMENT_NODE:
+ name = child.localName or child.tagName
+ if name == tag or name.endswith(f":{tag}"):
+ return child
+ return None
+
+
+def _get_children(parent, tag: str) -> list:
+ results = []
+ for child in parent.childNodes:
+ if child.nodeType == child.ELEMENT_NODE:
+ name = child.localName or child.tagName
+ if name == tag or name.endswith(f":{tag}"):
+ results.append(child)
+ return results
+
+
+def _is_adjacent(elem1, elem2) -> bool:
+ node = elem1.nextSibling
+ while node:
+ if node == elem2:
+ return True
+ if node.nodeType == node.ELEMENT_NODE:
+ return False
+ if node.nodeType == node.TEXT_NODE and node.data.strip():
+ return False
+ node = node.nextSibling
+ return False
+
+
+
+
+def _remove_elements(root, tag: str):
+ for elem in _find_elements(root, tag):
+ if elem.parentNode:
+ elem.parentNode.removeChild(elem)
+
+
+def _strip_run_rsid_attrs(root):
+ for run in _find_elements(root, "r"):
+ for attr in list(run.attributes.values()):
+ if "rsid" in attr.name.lower():
+ run.removeAttribute(attr.name)
+
+
+
+
+def _merge_runs_in(container) -> int:
+ merge_count = 0
+ run = _first_child_run(container)
+
+ while run:
+ while True:
+ next_elem = _next_element_sibling(run)
+ if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
+ _merge_run_content(run, next_elem)
+ container.removeChild(next_elem)
+ merge_count += 1
+ else:
+ break
+
+ _consolidate_text(run)
+ run = _next_sibling_run(run)
+
+ return merge_count
+
+
+def _first_child_run(container):
+ for child in container.childNodes:
+ if child.nodeType == child.ELEMENT_NODE and _is_run(child):
+ return child
+ return None
+
+
+def _next_element_sibling(node):
+ sibling = node.nextSibling
+ while sibling:
+ if sibling.nodeType == sibling.ELEMENT_NODE:
+ return sibling
+ sibling = sibling.nextSibling
+ return None
+
+
+def _next_sibling_run(node):
+ sibling = node.nextSibling
+ while sibling:
+ if sibling.nodeType == sibling.ELEMENT_NODE:
+ if _is_run(sibling):
+ return sibling
+ sibling = sibling.nextSibling
+ return None
+
+
+def _is_run(node) -> bool:
+ name = node.localName or node.tagName
+ return name == "r" or name.endswith(":r")
+
+
+def _can_merge(run1, run2) -> bool:
+ rpr1 = _get_child(run1, "rPr")
+ rpr2 = _get_child(run2, "rPr")
+
+ if (rpr1 is None) != (rpr2 is None):
+ return False
+ if rpr1 is None:
+ return True
+ return rpr1.toxml() == rpr2.toxml()
+
+
+def _merge_run_content(target, source):
+ for child in list(source.childNodes):
+ if child.nodeType == child.ELEMENT_NODE:
+ name = child.localName or child.tagName
+ if name != "rPr" and not name.endswith(":rPr"):
+ target.appendChild(child)
+
+
+def _consolidate_text(run):
+ t_elements = _get_children(run, "t")
+
+ for i in range(len(t_elements) - 1, 0, -1):
+ curr, prev = t_elements[i], t_elements[i - 1]
+
+ if _is_adjacent(prev, curr):
+ prev_text = prev.firstChild.data if prev.firstChild else ""
+ curr_text = curr.firstChild.data if curr.firstChild else ""
+ merged = prev_text + curr_text
+
+ if prev.firstChild:
+ prev.firstChild.data = merged
+ else:
+ prev.appendChild(run.ownerDocument.createTextNode(merged))
+
+ if merged.startswith(" ") or merged.endswith(" "):
+ prev.setAttribute("xml:space", "preserve")
+ elif prev.hasAttribute("xml:space"):
+ prev.removeAttribute("xml:space")
+
+ run.removeChild(curr)
diff --git a/skills/docx/scripts/office/helpers/simplify_redlines.py b/skills/docx/scripts/office/helpers/simplify_redlines.py
new file mode 100644
index 0000000..db963bb
--- /dev/null
+++ b/skills/docx/scripts/office/helpers/simplify_redlines.py
@@ -0,0 +1,197 @@
+"""Simplify tracked changes by merging adjacent w:ins or w:del elements.
+
+Merges adjacent elements from the same author into a single element.
+Same for elements. This makes heavily-redlined documents easier to
+work with by reducing the number of tracked change wrappers.
+
+Rules:
+- Only merges w:ins with w:ins, w:del with w:del (same element type)
+- Only merges if same author (ignores timestamp differences)
+- Only merges if truly adjacent (only whitespace between them)
+"""
+
+import xml.etree.ElementTree as ET
+import zipfile
+from pathlib import Path
+
+import defusedxml.minidom
+
+WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+
+
+def simplify_redlines(input_dir: str) -> tuple[int, str]:
+ doc_xml = Path(input_dir) / "word" / "document.xml"
+
+ if not doc_xml.exists():
+ return 0, f"Error: {doc_xml} not found"
+
+ try:
+ dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
+ root = dom.documentElement
+
+ merge_count = 0
+
+ containers = _find_elements(root, "p") + _find_elements(root, "tc")
+
+ for container in containers:
+ merge_count += _merge_tracked_changes_in(container, "ins")
+ merge_count += _merge_tracked_changes_in(container, "del")
+
+ doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
+ return merge_count, f"Simplified {merge_count} tracked changes"
+
+ except Exception as e:
+ return 0, f"Error: {e}"
+
+
+def _merge_tracked_changes_in(container, tag: str) -> int:
+ merge_count = 0
+
+ tracked = [
+ child
+ for child in container.childNodes
+ if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
+ ]
+
+ if len(tracked) < 2:
+ return 0
+
+ i = 0
+ while i < len(tracked) - 1:
+ curr = tracked[i]
+ next_elem = tracked[i + 1]
+
+ if _can_merge_tracked(curr, next_elem):
+ _merge_tracked_content(curr, next_elem)
+ container.removeChild(next_elem)
+ tracked.pop(i + 1)
+ merge_count += 1
+ else:
+ i += 1
+
+ return merge_count
+
+
+def _is_element(node, tag: str) -> bool:
+ name = node.localName or node.tagName
+ return name == tag or name.endswith(f":{tag}")
+
+
+def _get_author(elem) -> str:
+ author = elem.getAttribute("w:author")
+ if not author:
+ for attr in elem.attributes.values():
+ if attr.localName == "author" or attr.name.endswith(":author"):
+ return attr.value
+ return author
+
+
+def _can_merge_tracked(elem1, elem2) -> bool:
+ if _get_author(elem1) != _get_author(elem2):
+ return False
+
+ node = elem1.nextSibling
+ while node and node != elem2:
+ if node.nodeType == node.ELEMENT_NODE:
+ return False
+ if node.nodeType == node.TEXT_NODE and node.data.strip():
+ return False
+ node = node.nextSibling
+
+ return True
+
+
+def _merge_tracked_content(target, source):
+ while source.firstChild:
+ child = source.firstChild
+ source.removeChild(child)
+ target.appendChild(child)
+
+
+def _find_elements(root, tag: str) -> list:
+ results = []
+
+ def traverse(node):
+ if node.nodeType == node.ELEMENT_NODE:
+ name = node.localName or node.tagName
+ if name == tag or name.endswith(f":{tag}"):
+ results.append(node)
+ for child in node.childNodes:
+ traverse(child)
+
+ traverse(root)
+ return results
+
+
+def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
+ if not doc_xml_path.exists():
+ return {}
+
+ try:
+ tree = ET.parse(doc_xml_path)
+ root = tree.getroot()
+ except ET.ParseError:
+ return {}
+
+ namespaces = {"w": WORD_NS}
+ author_attr = f"{{{WORD_NS}}}author"
+
+ authors: dict[str, int] = {}
+ for tag in ["ins", "del"]:
+ for elem in root.findall(f".//w:{tag}", namespaces):
+ author = elem.get(author_attr)
+ if author:
+ authors[author] = authors.get(author, 0) + 1
+
+ return authors
+
+
+def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
+ try:
+ with zipfile.ZipFile(docx_path, "r") as zf:
+ if "word/document.xml" not in zf.namelist():
+ return {}
+ with zf.open("word/document.xml") as f:
+ tree = ET.parse(f)
+ root = tree.getroot()
+
+ namespaces = {"w": WORD_NS}
+ author_attr = f"{{{WORD_NS}}}author"
+
+ authors: dict[str, int] = {}
+ for tag in ["ins", "del"]:
+ for elem in root.findall(f".//w:{tag}", namespaces):
+ author = elem.get(author_attr)
+ if author:
+ authors[author] = authors.get(author, 0) + 1
+ return authors
+ except (zipfile.BadZipFile, ET.ParseError):
+ return {}
+
+
+def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
+ modified_xml = modified_dir / "word" / "document.xml"
+ modified_authors = get_tracked_change_authors(modified_xml)
+
+ if not modified_authors:
+ return default
+
+ original_authors = _get_authors_from_docx(original_docx)
+
+ new_changes: dict[str, int] = {}
+ for author, count in modified_authors.items():
+ original_count = original_authors.get(author, 0)
+ diff = count - original_count
+ if diff > 0:
+ new_changes[author] = diff
+
+ if not new_changes:
+ return default
+
+ if len(new_changes) == 1:
+ return next(iter(new_changes))
+
+ raise ValueError(
+ f"Multiple authors added new changes: {new_changes}. "
+ "Cannot infer which author to validate."
+ )
diff --git a/skills/docx/scripts/office/pack.py b/skills/docx/scripts/office/pack.py
new file mode 100644
index 0000000..db29ed8
--- /dev/null
+++ b/skills/docx/scripts/office/pack.py
@@ -0,0 +1,159 @@
+"""Pack a directory into a DOCX, PPTX, or XLSX file.
+
+Validates with auto-repair, condenses XML formatting, and creates the Office file.
+
+Usage:
+ python pack.py [--original ] [--validate true|false]
+
+Examples:
+ python pack.py unpacked/ output.docx --original input.docx
+ python pack.py unpacked/ output.pptx --validate false
+"""
+
+import argparse
+import sys
+import shutil
+import tempfile
+import zipfile
+from pathlib import Path
+
+import defusedxml.minidom
+
+from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
+
+def pack(
+ input_directory: str,
+ output_file: str,
+ original_file: str | None = None,
+ validate: bool = True,
+ infer_author_func=None,
+) -> tuple[None, str]:
+ input_dir = Path(input_directory)
+ output_path = Path(output_file)
+ suffix = output_path.suffix.lower()
+
+ if not input_dir.is_dir():
+ return None, f"Error: {input_dir} is not a directory"
+
+ if suffix not in {".docx", ".pptx", ".xlsx"}:
+ return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"
+
+ if validate and original_file:
+ original_path = Path(original_file)
+ if original_path.exists():
+ success, output = _run_validation(
+ input_dir, original_path, suffix, infer_author_func
+ )
+ if output:
+ print(output)
+ if not success:
+ return None, f"Error: Validation failed for {input_dir}"
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_content_dir = Path(temp_dir) / "content"
+ shutil.copytree(input_dir, temp_content_dir)
+
+ for pattern in ["*.xml", "*.rels"]:
+ for xml_file in temp_content_dir.rglob(pattern):
+ _condense_xml(xml_file)
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
+ for f in temp_content_dir.rglob("*"):
+ if f.is_file():
+ zf.write(f, f.relative_to(temp_content_dir))
+
+ return None, f"Successfully packed {input_dir} to {output_file}"
+
+
+def _run_validation(
+ unpacked_dir: Path,
+ original_file: Path,
+ suffix: str,
+ infer_author_func=None,
+) -> tuple[bool, str | None]:
+ output_lines = []
+ validators = []
+
+ if suffix == ".docx":
+ author = "Claude"
+ if infer_author_func:
+ try:
+ author = infer_author_func(unpacked_dir, original_file)
+ except ValueError as e:
+ print(f"Warning: {e} Using default author 'Claude'.", file=sys.stderr)
+
+ validators = [
+ DOCXSchemaValidator(unpacked_dir, original_file),
+ RedliningValidator(unpacked_dir, original_file, author=author),
+ ]
+ elif suffix == ".pptx":
+ validators = [PPTXSchemaValidator(unpacked_dir, original_file)]
+
+ if not validators:
+ return True, None
+
+ total_repairs = sum(v.repair() for v in validators)
+ if total_repairs:
+ output_lines.append(f"Auto-repaired {total_repairs} issue(s)")
+
+ success = all(v.validate() for v in validators)
+
+ if success:
+ output_lines.append("All validations PASSED!")
+
+ return success, "\n".join(output_lines) if output_lines else None
+
+
+def _condense_xml(xml_file: Path) -> None:
+ try:
+ with open(xml_file, encoding="utf-8") as f:
+ dom = defusedxml.minidom.parse(f)
+
+ for element in dom.getElementsByTagName("*"):
+ if element.tagName.endswith(":t"):
+ continue
+
+ for child in list(element.childNodes):
+ if (
+ child.nodeType == child.TEXT_NODE
+ and child.nodeValue
+ and child.nodeValue.strip() == ""
+ ) or child.nodeType == child.COMMENT_NODE:
+ element.removeChild(child)
+
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
+ except Exception as e:
+ print(f"ERROR: Failed to parse {xml_file.name}: {e}", file=sys.stderr)
+ raise
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Pack a directory into a DOCX, PPTX, or XLSX file"
+ )
+ parser.add_argument("input_directory", help="Unpacked Office document directory")
+ parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
+ parser.add_argument(
+ "--original",
+ help="Original file for validation comparison",
+ )
+ parser.add_argument(
+ "--validate",
+ type=lambda x: x.lower() == "true",
+ default=True,
+ metavar="true|false",
+ help="Run validation with auto-repair (default: true)",
+ )
+ args = parser.parse_args()
+
+ _, message = pack(
+ args.input_directory,
+ args.output_file,
+ original_file=args.original,
+ validate=args.validate,
+ )
+ print(message)
+
+ if "Error" in message:
+ sys.exit(1)
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
new file mode 100644
index 0000000..6454ef9
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
@@ -0,0 +1,1499 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
new file mode 100644
index 0000000..afa4f46
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
@@ -0,0 +1,146 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
new file mode 100644
index 0000000..64e66b8
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
@@ -0,0 +1,1085 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
new file mode 100644
index 0000000..687eea8
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
@@ -0,0 +1,11 @@
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
new file mode 100644
index 0000000..6ac81b0
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
@@ -0,0 +1,3081 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
new file mode 100644
index 0000000..1dbf051
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
new file mode 100644
index 0000000..f1af17d
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
@@ -0,0 +1,185 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
new file mode 100644
index 0000000..0a185ab
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
@@ -0,0 +1,287 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
new file mode 100644
index 0000000..14ef488
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
@@ -0,0 +1,1676 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
new file mode 100644
index 0000000..c20f3bf
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
new file mode 100644
index 0000000..ac60252
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
new file mode 100644
index 0000000..424b8ba
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
@@ -0,0 +1,174 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
new file mode 100644
index 0000000..2bddce2
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
new file mode 100644
index 0000000..8a8c18b
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
new file mode 100644
index 0000000..5c42706
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
@@ -0,0 +1,59 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
new file mode 100644
index 0000000..853c341
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
new file mode 100644
index 0000000..da835ee
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
@@ -0,0 +1,195 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
new file mode 100644
index 0000000..87ad265
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
@@ -0,0 +1,582 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
new file mode 100644
index 0000000..9e86f1b
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
new file mode 100644
index 0000000..d0be42e
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
@@ -0,0 +1,4439 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
new file mode 100644
index 0000000..8821dd1
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
@@ -0,0 +1,570 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
new file mode 100644
index 0000000..ca2575c
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
@@ -0,0 +1,509 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
new file mode 100644
index 0000000..dd079e6
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
new file mode 100644
index 0000000..3dd6cf6
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
@@ -0,0 +1,108 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
new file mode 100644
index 0000000..f1041e3
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
@@ -0,0 +1,96 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
new file mode 100644
index 0000000..9c5b7a6
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
@@ -0,0 +1,3646 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
new file mode 100644
index 0000000..0f13678
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
@@ -0,0 +1,116 @@
+
+
+
+
+
+ See http://www.w3.org/XML/1998/namespace.html and
+ http://www.w3.org/TR/REC-xml for information about this namespace.
+
+ This schema document describes the XML namespace, in a form
+ suitable for import by other schema documents.
+
+ Note that local names in this namespace are intended to be defined
+ only by the World Wide Web Consortium or its subgroups. The
+ following names are currently defined in this namespace and should
+ not be used with conflicting semantics by any Working Group,
+ specification, or document instance:
+
+ base (as an attribute name): denotes an attribute whose value
+ provides a URI to be used as the base for interpreting any
+ relative URIs in the scope of the element on which it
+ appears; its value is inherited. This name is reserved
+ by virtue of its definition in the XML Base specification.
+
+ lang (as an attribute name): denotes an attribute whose value
+ is a language code for the natural language of the content of
+ any element; its value is inherited. This name is reserved
+ by virtue of its definition in the XML specification.
+
+ space (as an attribute name): denotes an attribute whose
+ value is a keyword indicating what whitespace processing
+ discipline is intended for the content of the element; its
+ value is inherited. This name is reserved by virtue of its
+ definition in the XML specification.
+
+ Father (in any context at all): denotes Jon Bosak, the chair of
+ the original XML Working Group. This name is reserved by
+ the following decision of the W3C XML Plenary and
+ XML Coordination groups:
+
+ In appreciation for his vision, leadership and dedication
+ the W3C XML Plenary on this 10th day of February, 2000
+ reserves for Jon Bosak in perpetuity the XML name
+ xml:Father
+
+
+
+
+ This schema defines attributes and an attribute group
+ suitable for use by
+ schemas wishing to allow xml:base, xml:lang or xml:space attributes
+ on elements they define.
+
+ To enable this, such a schema must import this schema
+ for the XML namespace, e.g. as follows:
+ <schema . . .>
+ . . .
+ <import namespace="http://www.w3.org/XML/1998/namespace"
+ schemaLocation="http://www.w3.org/2001/03/xml.xsd"/>
+
+ Subsequently, qualified reference to any of the attributes
+ or the group defined below will have the desired effect, e.g.
+
+ <type . . .>
+ . . .
+ <attributeGroup ref="xml:specialAttrs"/>
+
+ will define a type which will schema-validate an instance
+ element with any of those attributes
+
+
+
+ In keeping with the XML Schema WG's standard versioning
+ policy, this schema document will persist at
+ http://www.w3.org/2001/03/xml.xsd.
+ At the date of issue it can also be found at
+ http://www.w3.org/2001/xml.xsd.
+ The schema document at that URI may however change in the future,
+ in order to remain compatible with the latest version of XML Schema
+ itself. In other words, if the XML Schema namespace changes, the version
+ of this document at
+ http://www.w3.org/2001/xml.xsd will change
+ accordingly; the version at
+ http://www.w3.org/2001/03/xml.xsd will not change.
+
+
+
+
+
+ In due course, we should install the relevant ISO 2- and 3-letter
+ codes as the enumerated possible values . . .
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ See http://www.w3.org/TR/xmlbase/ for
+ information about this attribute.
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
new file mode 100644
index 0000000..a6de9d2
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
new file mode 100644
index 0000000..10e978b
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
new file mode 100644
index 0000000..4248bf7
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
new file mode 100644
index 0000000..5649746
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/mce/mc.xsd b/skills/docx/scripts/office/schemas/mce/mc.xsd
new file mode 100644
index 0000000..ef72545
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/mce/mc.xsd
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd
new file mode 100644
index 0000000..f65f777
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd
@@ -0,0 +1,560 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd
new file mode 100644
index 0000000..6b00755
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd
new file mode 100644
index 0000000..f321d33
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
new file mode 100644
index 0000000..364c6a9
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
new file mode 100644
index 0000000..fed9d15
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
new file mode 100644
index 0000000..680cf15
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd b/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
new file mode 100644
index 0000000..89ada90
--- /dev/null
+++ b/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/skills/docx/scripts/office/soffice.py b/skills/docx/scripts/office/soffice.py
new file mode 100644
index 0000000..c7f7e32
--- /dev/null
+++ b/skills/docx/scripts/office/soffice.py
@@ -0,0 +1,183 @@
+"""
+Helper for running LibreOffice (soffice) in environments where AF_UNIX
+sockets may be blocked (e.g., sandboxed VMs). Detects the restriction
+at runtime and applies an LD_PRELOAD shim if needed.
+
+Usage:
+ from office.soffice import run_soffice, get_soffice_env
+
+ # Option 1 – run soffice directly
+ result = run_soffice(["--headless", "--convert-to", "pdf", "input.docx"])
+
+ # Option 2 – get env dict for your own subprocess calls
+ env = get_soffice_env()
+ subprocess.run(["soffice", ...], env=env)
+"""
+
+import os
+import socket
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+def get_soffice_env() -> dict:
+ env = os.environ.copy()
+ env["SAL_USE_VCLPLUGIN"] = "svp"
+
+ if _needs_shim():
+ shim = _ensure_shim()
+ env["LD_PRELOAD"] = str(shim)
+
+ return env
+
+
+def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:
+ env = get_soffice_env()
+ return subprocess.run(["soffice"] + args, env=env, **kwargs)
+
+
+
+_SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so"
+
+
+def _needs_shim() -> bool:
+ try:
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.close()
+ return False
+ except OSError:
+ return True
+
+
+def _ensure_shim() -> Path:
+ if _SHIM_SO.exists():
+ return _SHIM_SO
+
+ src = Path(tempfile.gettempdir()) / "lo_socket_shim.c"
+ src.write_text(_SHIM_SOURCE)
+ subprocess.run(
+ ["gcc", "-shared", "-fPIC", "-o", str(_SHIM_SO), str(src), "-ldl"],
+ check=True,
+ capture_output=True,
+ )
+ src.unlink()
+ return _SHIM_SO
+
+
+
+_SHIM_SOURCE = r"""
+#define _GNU_SOURCE
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+static int (*real_socket)(int, int, int);
+static int (*real_socketpair)(int, int, int, int[2]);
+static int (*real_listen)(int, int);
+static int (*real_accept)(int, struct sockaddr *, socklen_t *);
+static int (*real_close)(int);
+static int (*real_read)(int, void *, size_t);
+
+/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */
+static int is_shimmed[1024];
+static int peer_of[1024];
+static int wake_r[1024]; /* accept() blocks reading this */
+static int wake_w[1024]; /* close() writes to this */
+static int listener_fd = -1; /* FD that received listen() */
+
+__attribute__((constructor))
+static void init(void) {
+ real_socket = dlsym(RTLD_NEXT, "socket");
+ real_socketpair = dlsym(RTLD_NEXT, "socketpair");
+ real_listen = dlsym(RTLD_NEXT, "listen");
+ real_accept = dlsym(RTLD_NEXT, "accept");
+ real_close = dlsym(RTLD_NEXT, "close");
+ real_read = dlsym(RTLD_NEXT, "read");
+ for (int i = 0; i < 1024; i++) {
+ peer_of[i] = -1;
+ wake_r[i] = -1;
+ wake_w[i] = -1;
+ }
+}
+
+/* ---- socket ---------------------------------------------------------- */
+int socket(int domain, int type, int protocol) {
+ if (domain == AF_UNIX) {
+ int fd = real_socket(domain, type, protocol);
+ if (fd >= 0) return fd;
+ /* socket(AF_UNIX) blocked – fall back to socketpair(). */
+ int sv[2];
+ if (real_socketpair(domain, type, protocol, sv) == 0) {
+ if (sv[0] >= 0 && sv[0] < 1024) {
+ is_shimmed[sv[0]] = 1;
+ peer_of[sv[0]] = sv[1];
+ int wp[2];
+ if (pipe(wp) == 0) {
+ wake_r[sv[0]] = wp[0];
+ wake_w[sv[0]] = wp[1];
+ }
+ }
+ return sv[0];
+ }
+ errno = EPERM;
+ return -1;
+ }
+ return real_socket(domain, type, protocol);
+}
+
+/* ---- listen ---------------------------------------------------------- */
+int listen(int sockfd, int backlog) {
+ if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
+ listener_fd = sockfd;
+ return 0;
+ }
+ return real_listen(sockfd, backlog);
+}
+
+/* ---- accept ---------------------------------------------------------- */
+int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
+ if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
+ /* Block until close() writes to the wake pipe. */
+ if (wake_r[sockfd] >= 0) {
+ char buf;
+ real_read(wake_r[sockfd], &buf, 1);
+ }
+ errno = ECONNABORTED;
+ return -1;
+ }
+ return real_accept(sockfd, addr, addrlen);
+}
+
+/* ---- close ----------------------------------------------------------- */
+int close(int fd) {
+ if (fd >= 0 && fd < 1024 && is_shimmed[fd]) {
+ int was_listener = (fd == listener_fd);
+ is_shimmed[fd] = 0;
+
+ if (wake_w[fd] >= 0) { /* unblock accept() */
+ char c = 0;
+ write(wake_w[fd], &c, 1);
+ real_close(wake_w[fd]);
+ wake_w[fd] = -1;
+ }
+ if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd] = -1; }
+ if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; }
+
+ if (was_listener)
+ _exit(0); /* conversion done – exit */
+ }
+ return real_close(fd);
+}
+"""
+
+
+
+if __name__ == "__main__":
+ import sys
+ result = run_soffice(sys.argv[1:])
+ sys.exit(result.returncode)
diff --git a/skills/docx/scripts/office/unpack.py b/skills/docx/scripts/office/unpack.py
new file mode 100644
index 0000000..0015253
--- /dev/null
+++ b/skills/docx/scripts/office/unpack.py
@@ -0,0 +1,132 @@
+"""Unpack Office files (DOCX, PPTX, XLSX) for editing.
+
+Extracts the ZIP archive, pretty-prints XML files, and optionally:
+- Merges adjacent runs with identical formatting (DOCX only)
+- Simplifies adjacent tracked changes from same author (DOCX only)
+
+Usage:
+ python unpack.py [options]
+
+Examples:
+ python unpack.py document.docx unpacked/
+ python unpack.py presentation.pptx unpacked/
+ python unpack.py document.docx unpacked/ --merge-runs false
+"""
+
+import argparse
+import sys
+import zipfile
+from pathlib import Path
+
+import defusedxml.minidom
+
+from helpers.merge_runs import merge_runs as do_merge_runs
+from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines
+
+SMART_QUOTE_REPLACEMENTS = {
+ "\u201c": "“",
+ "\u201d": "”",
+ "\u2018": "‘",
+ "\u2019": "’",
+}
+
+
+def unpack(
+ input_file: str,
+ output_directory: str,
+ merge_runs: bool = True,
+ simplify_redlines: bool = True,
+) -> tuple[None, str]:
+ input_path = Path(input_file)
+ output_path = Path(output_directory)
+ suffix = input_path.suffix.lower()
+
+ if not input_path.exists():
+ return None, f"Error: {input_file} does not exist"
+
+ if suffix not in {".docx", ".pptx", ".xlsx"}:
+ return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file"
+
+ try:
+ output_path.mkdir(parents=True, exist_ok=True)
+
+ with zipfile.ZipFile(input_path, "r") as zf:
+ zf.extractall(output_path)
+
+ xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
+ for xml_file in xml_files:
+ _pretty_print_xml(xml_file)
+
+ message = f"Unpacked {input_file} ({len(xml_files)} XML files)"
+
+ if suffix == ".docx":
+ if simplify_redlines:
+ simplify_count, _ = do_simplify_redlines(str(output_path))
+ message += f", simplified {simplify_count} tracked changes"
+
+ if merge_runs:
+ merge_count, _ = do_merge_runs(str(output_path))
+ message += f", merged {merge_count} runs"
+
+ for xml_file in xml_files:
+ _escape_smart_quotes(xml_file)
+
+ return None, message
+
+ except zipfile.BadZipFile:
+ return None, f"Error: {input_file} is not a valid Office file"
+ except Exception as e:
+ return None, f"Error unpacking: {e}"
+
+
+def _pretty_print_xml(xml_file: Path) -> None:
+ try:
+ content = xml_file.read_text(encoding="utf-8")
+ dom = defusedxml.minidom.parseString(content)
+ xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))
+ except Exception:
+ pass
+
+
+def _escape_smart_quotes(xml_file: Path) -> None:
+ try:
+ content = xml_file.read_text(encoding="utf-8")
+ for char, entity in SMART_QUOTE_REPLACEMENTS.items():
+ content = content.replace(char, entity)
+ xml_file.write_text(content, encoding="utf-8")
+ except Exception:
+ pass
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"
+ )
+ parser.add_argument("input_file", help="Office file to unpack")
+ parser.add_argument("output_directory", help="Output directory")
+ parser.add_argument(
+ "--merge-runs",
+ type=lambda x: x.lower() == "true",
+ default=True,
+ metavar="true|false",
+ help="Merge adjacent runs with identical formatting (DOCX only, default: true)",
+ )
+ parser.add_argument(
+ "--simplify-redlines",
+ type=lambda x: x.lower() == "true",
+ default=True,
+ metavar="true|false",
+ help="Merge adjacent tracked changes from same author (DOCX only, default: true)",
+ )
+ args = parser.parse_args()
+
+ _, message = unpack(
+ args.input_file,
+ args.output_directory,
+ merge_runs=args.merge_runs,
+ simplify_redlines=args.simplify_redlines,
+ )
+ print(message)
+
+ if "Error" in message:
+ sys.exit(1)
diff --git a/skills/docx/scripts/office/validate.py b/skills/docx/scripts/office/validate.py
new file mode 100644
index 0000000..03b01f6
--- /dev/null
+++ b/skills/docx/scripts/office/validate.py
@@ -0,0 +1,111 @@
+"""
+Command line tool to validate Office document XML files against XSD schemas and tracked changes.
+
+Usage:
+ python validate.py [--original ] [--auto-repair] [--author NAME]
+
+The first argument can be either:
+- An unpacked directory containing the Office document XML files
+- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory
+
+Auto-repair fixes:
+- paraId/durableId values that exceed OOXML limits
+- Missing xml:space="preserve" on w:t elements with whitespace
+"""
+
+import argparse
+import sys
+import tempfile
+import zipfile
+from pathlib import Path
+
+from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Validate Office document XML files")
+ parser.add_argument(
+ "path",
+ help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",
+ )
+ parser.add_argument(
+ "--original",
+ required=False,
+ default=None,
+ help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.",
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help="Enable verbose output",
+ )
+ parser.add_argument(
+ "--auto-repair",
+ action="store_true",
+ help="Automatically repair common issues (hex IDs, whitespace preservation)",
+ )
+ parser.add_argument(
+ "--author",
+ default="Claude",
+ help="Author name for redlining validation (default: Claude)",
+ )
+ args = parser.parse_args()
+
+ path = Path(args.path)
+ assert path.exists(), f"Error: {path} does not exist"
+
+ original_file = None
+ if args.original:
+ original_file = Path(args.original)
+ assert original_file.is_file(), f"Error: {original_file} is not a file"
+ assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], (
+ f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
+ )
+
+ file_extension = (original_file or path).suffix.lower()
+ assert file_extension in [".docx", ".pptx", ".xlsx"], (
+ f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file."
+ )
+
+ if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:
+ temp_dir = tempfile.mkdtemp()
+ with zipfile.ZipFile(path, "r") as zf:
+ zf.extractall(temp_dir)
+ unpacked_dir = Path(temp_dir)
+ else:
+ assert path.is_dir(), f"Error: {path} is not a directory or Office file"
+ unpacked_dir = path
+
+ match file_extension:
+ case ".docx":
+ validators = [
+ DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
+ ]
+ if original_file:
+ validators.append(
+ RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author)
+ )
+ case ".pptx":
+ validators = [
+ PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
+ ]
+ case _:
+ print(f"Error: Validation not supported for file type {file_extension}")
+ sys.exit(1)
+
+ if args.auto_repair:
+ total_repairs = sum(v.repair() for v in validators)
+ if total_repairs:
+ print(f"Auto-repaired {total_repairs} issue(s)")
+
+ success = all(v.validate() for v in validators)
+
+ if success:
+ print("All validations PASSED!")
+
+ sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/skills/docx/scripts/office/validators/__init__.py b/skills/docx/scripts/office/validators/__init__.py
new file mode 100644
index 0000000..db092ec
--- /dev/null
+++ b/skills/docx/scripts/office/validators/__init__.py
@@ -0,0 +1,15 @@
+"""
+Validation modules for Word document processing.
+"""
+
+from .base import BaseSchemaValidator
+from .docx import DOCXSchemaValidator
+from .pptx import PPTXSchemaValidator
+from .redlining import RedliningValidator
+
+__all__ = [
+ "BaseSchemaValidator",
+ "DOCXSchemaValidator",
+ "PPTXSchemaValidator",
+ "RedliningValidator",
+]
diff --git a/skills/docx/scripts/office/validators/base.py b/skills/docx/scripts/office/validators/base.py
new file mode 100644
index 0000000..db4a06a
--- /dev/null
+++ b/skills/docx/scripts/office/validators/base.py
@@ -0,0 +1,847 @@
+"""
+Base validator with common validation logic for document files.
+"""
+
+import re
+from pathlib import Path
+
+import defusedxml.minidom
+import lxml.etree
+
+
+class BaseSchemaValidator:
+
+ IGNORED_VALIDATION_ERRORS = [
+ "hyphenationZone",
+ "purl.org/dc/terms",
+ ]
+
+ UNIQUE_ID_REQUIREMENTS = {
+ "comment": ("id", "file"),
+ "commentrangestart": ("id", "file"),
+ "commentrangeend": ("id", "file"),
+ "bookmarkstart": ("id", "file"),
+ "bookmarkend": ("id", "file"),
+ "sldid": ("id", "file"),
+ "sldmasterid": ("id", "global"),
+ "sldlayoutid": ("id", "global"),
+ "cm": ("authorid", "file"),
+ "sheet": ("sheetid", "file"),
+ "definedname": ("id", "file"),
+ "cxnsp": ("id", "file"),
+ "sp": ("id", "file"),
+ "pic": ("id", "file"),
+ "grpsp": ("id", "file"),
+ }
+
+ EXCLUDED_ID_CONTAINERS = {
+ "sectionlst",
+ }
+
+ ELEMENT_RELATIONSHIP_TYPES = {}
+
+ SCHEMA_MAPPINGS = {
+ "word": "ISO-IEC29500-4_2016/wml.xsd",
+ "ppt": "ISO-IEC29500-4_2016/pml.xsd",
+ "xl": "ISO-IEC29500-4_2016/sml.xsd",
+ "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
+ "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
+ "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
+ "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
+ ".rels": "ecma/fouth-edition/opc-relationships.xsd",
+ "people.xml": "microsoft/wml-2012.xsd",
+ "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
+ "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
+ "commentsExtended.xml": "microsoft/wml-2012.xsd",
+ "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
+ "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
+ "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
+ }
+
+ MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
+ XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
+
+ PACKAGE_RELATIONSHIPS_NAMESPACE = (
+ "http://schemas.openxmlformats.org/package/2006/relationships"
+ )
+ OFFICE_RELATIONSHIPS_NAMESPACE = (
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
+ )
+ CONTENT_TYPES_NAMESPACE = (
+ "http://schemas.openxmlformats.org/package/2006/content-types"
+ )
+
+ MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
+
+ OOXML_NAMESPACES = {
+ "http://schemas.openxmlformats.org/officeDocument/2006/math",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
+ "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
+ "http://schemas.openxmlformats.org/drawingml/2006/main",
+ "http://schemas.openxmlformats.org/drawingml/2006/chart",
+ "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
+ "http://schemas.openxmlformats.org/drawingml/2006/diagram",
+ "http://schemas.openxmlformats.org/drawingml/2006/picture",
+ "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
+ "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
+ "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
+ "http://schemas.openxmlformats.org/presentationml/2006/main",
+ "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
+ "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
+ "http://www.w3.org/XML/1998/namespace",
+ }
+
+ def __init__(self, unpacked_dir, original_file=None, verbose=False):
+ self.unpacked_dir = Path(unpacked_dir).resolve()
+ self.original_file = Path(original_file) if original_file else None
+ self.verbose = verbose
+
+ self.schemas_dir = Path(__file__).parent.parent / "schemas"
+
+ patterns = ["*.xml", "*.rels"]
+ self.xml_files = [
+ f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
+ ]
+
+ if not self.xml_files:
+ print(f"Warning: No XML files found in {self.unpacked_dir}")
+
+ def validate(self):
+ raise NotImplementedError("Subclasses must implement the validate method")
+
+ def repair(self) -> int:
+ return self.repair_whitespace_preservation()
+
+ def repair_whitespace_preservation(self) -> int:
+ repairs = 0
+
+ for xml_file in self.xml_files:
+ try:
+ content = xml_file.read_text(encoding="utf-8")
+ dom = defusedxml.minidom.parseString(content)
+ modified = False
+
+ for elem in dom.getElementsByTagName("*"):
+ if elem.tagName.endswith(":t") and elem.firstChild:
+ text = elem.firstChild.nodeValue
+ if text and (text.startswith((' ', '\t')) or text.endswith((' ', '\t'))):
+ if elem.getAttribute("xml:space") != "preserve":
+ elem.setAttribute("xml:space", "preserve")
+ text_preview = repr(text[:30]) + "..." if len(text) > 30 else repr(text)
+ print(f" Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}")
+ repairs += 1
+ modified = True
+
+ if modified:
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
+
+ except Exception:
+ pass
+
+ return repairs
+
+ def validate_xml(self):
+ errors = []
+
+ for xml_file in self.xml_files:
+ try:
+ lxml.etree.parse(str(xml_file))
+ except lxml.etree.XMLSyntaxError as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {e.lineno}: {e.msg}"
+ )
+ except Exception as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Unexpected error: {str(e)}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} XML violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All XML files are well-formed")
+ return True
+
+ def validate_namespaces(self):
+ errors = []
+
+ for xml_file in self.xml_files:
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+ declared = set(root.nsmap.keys()) - {None}
+
+ for attr_val in [
+ v for k, v in root.attrib.items() if k.endswith("Ignorable")
+ ]:
+ undeclared = set(attr_val.split()) - declared
+ errors.extend(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Namespace '{ns}' in Ignorable but not declared"
+ for ns in undeclared
+ )
+ except lxml.etree.XMLSyntaxError:
+ continue
+
+ if errors:
+ print(f"FAILED - {len(errors)} namespace issues:")
+ for error in errors:
+ print(error)
+ return False
+ if self.verbose:
+ print("PASSED - All namespace prefixes properly declared")
+ return True
+
+ def validate_unique_ids(self):
+ errors = []
+ global_ids = {}
+
+ for xml_file in self.xml_files:
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+ file_ids = {}
+
+ mc_elements = root.xpath(
+ ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
+ )
+ for elem in mc_elements:
+ elem.getparent().remove(elem)
+
+ for elem in root.iter():
+ tag = (
+ elem.tag.split("}")[-1].lower()
+ if "}" in elem.tag
+ else elem.tag.lower()
+ )
+
+ if tag in self.UNIQUE_ID_REQUIREMENTS:
+ in_excluded_container = any(
+ ancestor.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS
+ for ancestor in elem.iterancestors()
+ )
+ if in_excluded_container:
+ continue
+
+ attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]
+
+ id_value = None
+ for attr, value in elem.attrib.items():
+ attr_local = (
+ attr.split("}")[-1].lower()
+ if "}" in attr
+ else attr.lower()
+ )
+ if attr_local == attr_name:
+ id_value = value
+ break
+
+ if id_value is not None:
+ if scope == "global":
+ if id_value in global_ids:
+ prev_file, prev_line, prev_tag = global_ids[
+ id_value
+ ]
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
+ f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
+ )
+ else:
+ global_ids[id_value] = (
+ xml_file.relative_to(self.unpacked_dir),
+ elem.sourceline,
+ tag,
+ )
+ elif scope == "file":
+ key = (tag, attr_name)
+ if key not in file_ids:
+ file_ids[key] = {}
+
+ if id_value in file_ids[key]:
+ prev_line = file_ids[key][id_value]
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
+ f"(first occurrence at line {prev_line})"
+ )
+ else:
+ file_ids[key][id_value] = elem.sourceline
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All required IDs are unique")
+ return True
+
+ def validate_file_references(self):
+ errors = []
+
+ rels_files = list(self.unpacked_dir.rglob("*.rels"))
+
+ if not rels_files:
+ if self.verbose:
+ print("PASSED - No .rels files found")
+ return True
+
+ all_files = []
+ for file_path in self.unpacked_dir.rglob("*"):
+ if (
+ file_path.is_file()
+ and file_path.name != "[Content_Types].xml"
+ and not file_path.name.endswith(".rels")
+ ):
+ all_files.append(file_path.resolve())
+
+ all_referenced_files = set()
+
+ if self.verbose:
+ print(
+ f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
+ )
+
+ for rels_file in rels_files:
+ try:
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
+
+ rels_dir = rels_file.parent
+
+ referenced_files = set()
+ broken_refs = []
+
+ for rel in rels_root.findall(
+ ".//ns:Relationship",
+ namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
+ ):
+ target = rel.get("Target")
+ if target and not target.startswith(
+ ("http", "mailto:")
+ ):
+ if target.startswith("/"):
+ target_path = self.unpacked_dir / target.lstrip("/")
+ elif rels_file.name == ".rels":
+ target_path = self.unpacked_dir / target
+ else:
+ base_dir = rels_dir.parent
+ target_path = base_dir / target
+
+ try:
+ target_path = target_path.resolve()
+ if target_path.exists() and target_path.is_file():
+ referenced_files.add(target_path)
+ all_referenced_files.add(target_path)
+ else:
+ broken_refs.append((target, rel.sourceline))
+ except (OSError, ValueError):
+ broken_refs.append((target, rel.sourceline))
+
+ if broken_refs:
+ rel_path = rels_file.relative_to(self.unpacked_dir)
+ for broken_ref, line_num in broken_refs:
+ errors.append(
+ f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
+ )
+
+ except Exception as e:
+ rel_path = rels_file.relative_to(self.unpacked_dir)
+ errors.append(f" Error parsing {rel_path}: {e}")
+
+ unreferenced_files = set(all_files) - all_referenced_files
+
+ if unreferenced_files:
+ for unref_file in sorted(unreferenced_files):
+ unref_rel_path = unref_file.relative_to(self.unpacked_dir)
+ errors.append(f" Unreferenced file: {unref_rel_path}")
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} relationship validation errors:")
+ for error in errors:
+ print(error)
+ print(
+ "CRITICAL: These errors will cause the document to appear corrupt. "
+ + "Broken references MUST be fixed, "
+ + "and unreferenced files MUST be referenced or removed."
+ )
+ return False
+ else:
+ if self.verbose:
+ print(
+ "PASSED - All references are valid and all files are properly referenced"
+ )
+ return True
+
+ def validate_all_relationship_ids(self):
+ import lxml.etree
+
+ errors = []
+
+ for xml_file in self.xml_files:
+ if xml_file.suffix == ".rels":
+ continue
+
+ rels_dir = xml_file.parent / "_rels"
+ rels_file = rels_dir / f"{xml_file.name}.rels"
+
+ if not rels_file.exists():
+ continue
+
+ try:
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
+ rid_to_type = {}
+
+ for rel in rels_root.findall(
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
+ ):
+ rid = rel.get("Id")
+ rel_type = rel.get("Type", "")
+ if rid:
+ if rid in rid_to_type:
+ rels_rel_path = rels_file.relative_to(self.unpacked_dir)
+ errors.append(
+ f" {rels_rel_path}: Line {rel.sourceline}: "
+ f"Duplicate relationship ID '{rid}' (IDs must be unique)"
+ )
+ type_name = (
+ rel_type.split("/")[-1] if "/" in rel_type else rel_type
+ )
+ rid_to_type[rid] = type_name
+
+ xml_root = lxml.etree.parse(str(xml_file)).getroot()
+
+ r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE
+ rid_attrs_to_check = ["id", "embed", "link"]
+ for elem in xml_root.iter():
+ for attr_name in rid_attrs_to_check:
+ rid_attr = elem.get(f"{{{r_ns}}}{attr_name}")
+ if not rid_attr:
+ continue
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
+ elem_name = (
+ elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
+ )
+
+ if rid_attr not in rid_to_type:
+ errors.append(
+ f" {xml_rel_path}: Line {elem.sourceline}: "
+ f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' "
+ f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
+ )
+ elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES:
+ expected_type = self._get_expected_relationship_type(
+ elem_name
+ )
+ if expected_type:
+ actual_type = rid_to_type[rid_attr]
+ if expected_type not in actual_type.lower():
+ errors.append(
+ f" {xml_rel_path}: Line {elem.sourceline}: "
+ f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
+ f"but should point to a '{expected_type}' relationship"
+ )
+
+ except Exception as e:
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
+ errors.append(f" Error processing {xml_rel_path}: {e}")
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
+ for error in errors:
+ print(error)
+ print("\nThese ID mismatches will cause the document to appear corrupt!")
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All relationship ID references are valid")
+ return True
+
+ def _get_expected_relationship_type(self, element_name):
+ elem_lower = element_name.lower()
+
+ if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
+ return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]
+
+ if elem_lower.endswith("id") and len(elem_lower) > 2:
+ prefix = elem_lower[:-2]
+ if prefix.endswith("master"):
+ return prefix.lower()
+ elif prefix.endswith("layout"):
+ return prefix.lower()
+ else:
+ if prefix == "sld":
+ return "slide"
+ return prefix.lower()
+
+ if elem_lower.endswith("reference") and len(elem_lower) > 9:
+ prefix = elem_lower[:-9]
+ return prefix.lower()
+
+ return None
+
+ def validate_content_types(self):
+ errors = []
+
+ content_types_file = self.unpacked_dir / "[Content_Types].xml"
+ if not content_types_file.exists():
+ print("FAILED - [Content_Types].xml file not found")
+ return False
+
+ try:
+ root = lxml.etree.parse(str(content_types_file)).getroot()
+ declared_parts = set()
+ declared_extensions = set()
+
+ for override in root.findall(
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
+ ):
+ part_name = override.get("PartName")
+ if part_name is not None:
+ declared_parts.add(part_name.lstrip("/"))
+
+ for default in root.findall(
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
+ ):
+ extension = default.get("Extension")
+ if extension is not None:
+ declared_extensions.add(extension.lower())
+
+ declarable_roots = {
+ "sld",
+ "sldLayout",
+ "sldMaster",
+ "presentation",
+ "document",
+ "workbook",
+ "worksheet",
+ "theme",
+ }
+
+ media_extensions = {
+ "png": "image/png",
+ "jpg": "image/jpeg",
+ "jpeg": "image/jpeg",
+ "gif": "image/gif",
+ "bmp": "image/bmp",
+ "tiff": "image/tiff",
+ "wmf": "image/x-wmf",
+ "emf": "image/x-emf",
+ }
+
+ all_files = list(self.unpacked_dir.rglob("*"))
+ all_files = [f for f in all_files if f.is_file()]
+
+ for xml_file in self.xml_files:
+ path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
+ "\\", "/"
+ )
+
+ if any(
+ skip in path_str
+ for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
+ ):
+ continue
+
+ try:
+ root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
+ root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag
+
+ if root_name in declarable_roots and path_str not in declared_parts:
+ errors.append(
+ f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
+ )
+
+ except Exception:
+ continue
+
+ for file_path in all_files:
+ if file_path.suffix.lower() in {".xml", ".rels"}:
+ continue
+ if file_path.name == "[Content_Types].xml":
+ continue
+ if "_rels" in file_path.parts or "docProps" in file_path.parts:
+ continue
+
+ extension = file_path.suffix.lstrip(".").lower()
+ if extension and extension not in declared_extensions:
+ if extension in media_extensions:
+ relative_path = file_path.relative_to(self.unpacked_dir)
+ errors.append(
+ f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: '
+ )
+
+ except Exception as e:
+ errors.append(f" Error parsing [Content_Types].xml: {e}")
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} content type declaration errors:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print(
+ "PASSED - All content files are properly declared in [Content_Types].xml"
+ )
+ return True
+
+ def validate_file_against_xsd(self, xml_file, verbose=False):
+ xml_file = Path(xml_file).resolve()
+ unpacked_dir = self.unpacked_dir.resolve()
+
+ is_valid, current_errors = self._validate_single_file_xsd(
+ xml_file, unpacked_dir
+ )
+
+ if is_valid is None:
+ return None, set()
+ elif is_valid:
+ return True, set()
+
+ original_errors = self._get_original_file_errors(xml_file)
+
+ assert current_errors is not None
+ new_errors = current_errors - original_errors
+
+ new_errors = {
+ e for e in new_errors
+ if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS)
+ }
+
+ if new_errors:
+ if verbose:
+ relative_path = xml_file.relative_to(unpacked_dir)
+ print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
+ for error in list(new_errors)[:3]:
+ truncated = error[:250] + "..." if len(error) > 250 else error
+ print(f" - {truncated}")
+ return False, new_errors
+ else:
+ if verbose:
+ print(
+ f"PASSED - No new errors (original had {len(current_errors)} errors)"
+ )
+ return True, set()
+
+ def validate_against_xsd(self):
+ new_errors = []
+ original_error_count = 0
+ valid_count = 0
+ skipped_count = 0
+
+ for xml_file in self.xml_files:
+ relative_path = str(xml_file.relative_to(self.unpacked_dir))
+ is_valid, new_file_errors = self.validate_file_against_xsd(
+ xml_file, verbose=False
+ )
+
+ if is_valid is None:
+ skipped_count += 1
+ continue
+ elif is_valid and not new_file_errors:
+ valid_count += 1
+ continue
+ elif is_valid:
+ original_error_count += 1
+ valid_count += 1
+ continue
+
+ new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)")
+ for error in list(new_file_errors)[:3]:
+ new_errors.append(
+ f" - {error[:250]}..." if len(error) > 250 else f" - {error}"
+ )
+
+ if self.verbose:
+ print(f"Validated {len(self.xml_files)} files:")
+ print(f" - Valid: {valid_count}")
+ print(f" - Skipped (no schema): {skipped_count}")
+ if original_error_count:
+ print(f" - With original errors (ignored): {original_error_count}")
+ print(
+ f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}"
+ )
+
+ if new_errors:
+ print("\nFAILED - Found NEW validation errors:")
+ for error in new_errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("\nPASSED - No new XSD validation errors introduced")
+ return True
+
+ def _get_schema_path(self, xml_file):
+ if xml_file.name in self.SCHEMA_MAPPINGS:
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]
+
+ if xml_file.suffix == ".rels":
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
+
+ if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
+
+ if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
+
+ if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]
+
+ return None
+
+ def _clean_ignorable_namespaces(self, xml_doc):
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
+ xml_copy = lxml.etree.fromstring(xml_string)
+
+ for elem in xml_copy.iter():
+ attrs_to_remove = []
+
+ for attr in elem.attrib:
+ if "{" in attr:
+ ns = attr.split("}")[0][1:]
+ if ns not in self.OOXML_NAMESPACES:
+ attrs_to_remove.append(attr)
+
+ for attr in attrs_to_remove:
+ del elem.attrib[attr]
+
+ self._remove_ignorable_elements(xml_copy)
+
+ return lxml.etree.ElementTree(xml_copy)
+
+ def _remove_ignorable_elements(self, root):
+ elements_to_remove = []
+
+ for elem in list(root):
+ if not hasattr(elem, "tag") or callable(elem.tag):
+ continue
+
+ tag_str = str(elem.tag)
+ if tag_str.startswith("{"):
+ ns = tag_str.split("}")[0][1:]
+ if ns not in self.OOXML_NAMESPACES:
+ elements_to_remove.append(elem)
+ continue
+
+ self._remove_ignorable_elements(elem)
+
+ for elem in elements_to_remove:
+ root.remove(elem)
+
+ def _preprocess_for_mc_ignorable(self, xml_doc):
+ root = xml_doc.getroot()
+
+ if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
+ del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]
+
+ return xml_doc
+
+ def _validate_single_file_xsd(self, xml_file, base_path):
+ schema_path = self._get_schema_path(xml_file)
+ if not schema_path:
+ return None, None
+
+ try:
+ with open(schema_path, "rb") as xsd_file:
+ parser = lxml.etree.XMLParser()
+ xsd_doc = lxml.etree.parse(
+ xsd_file, parser=parser, base_url=str(schema_path)
+ )
+ schema = lxml.etree.XMLSchema(xsd_doc)
+
+ with open(xml_file, "r") as f:
+ xml_doc = lxml.etree.parse(f)
+
+ xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
+ xml_doc = self._preprocess_for_mc_ignorable(xml_doc)
+
+ relative_path = xml_file.relative_to(base_path)
+ if (
+ relative_path.parts
+ and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
+ ):
+ xml_doc = self._clean_ignorable_namespaces(xml_doc)
+
+ if schema.validate(xml_doc):
+ return True, set()
+ else:
+ errors = set()
+ for error in schema.error_log:
+ errors.add(error.message)
+ return False, errors
+
+ except Exception as e:
+ return False, {str(e)}
+
+ def _get_original_file_errors(self, xml_file):
+ if self.original_file is None:
+ return set()
+
+ import tempfile
+ import zipfile
+
+ xml_file = Path(xml_file).resolve()
+ unpacked_dir = self.unpacked_dir.resolve()
+ relative_path = xml_file.relative_to(unpacked_dir)
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_path = Path(temp_dir)
+
+ with zipfile.ZipFile(self.original_file, "r") as zip_ref:
+ zip_ref.extractall(temp_path)
+
+ original_xml_file = temp_path / relative_path
+
+ if not original_xml_file.exists():
+ return set()
+
+ is_valid, errors = self._validate_single_file_xsd(
+ original_xml_file, temp_path
+ )
+ return errors if errors else set()
+
+ def _remove_template_tags_from_text_nodes(self, xml_doc):
+ warnings = []
+ template_pattern = re.compile(r"\{\{[^}]*\}\}")
+
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
+ xml_copy = lxml.etree.fromstring(xml_string)
+
+ def process_text_content(text, content_type):
+ if not text:
+ return text
+ matches = list(template_pattern.finditer(text))
+ if matches:
+ for match in matches:
+ warnings.append(
+ f"Found template tag in {content_type}: {match.group()}"
+ )
+ return template_pattern.sub("", text)
+ return text
+
+ for elem in xml_copy.iter():
+ if not hasattr(elem, "tag") or callable(elem.tag):
+ continue
+ tag_str = str(elem.tag)
+ if tag_str.endswith("}t") or tag_str == "t":
+ continue
+
+ elem.text = process_text_content(elem.text, "text content")
+ elem.tail = process_text_content(elem.tail, "tail content")
+
+ return lxml.etree.ElementTree(xml_copy), warnings
+
+
+if __name__ == "__main__":
+ raise RuntimeError("This module should not be run directly.")
diff --git a/skills/docx/scripts/office/validators/docx.py b/skills/docx/scripts/office/validators/docx.py
new file mode 100644
index 0000000..fec405e
--- /dev/null
+++ b/skills/docx/scripts/office/validators/docx.py
@@ -0,0 +1,446 @@
+"""
+Validator for Word document XML files against XSD schemas.
+"""
+
+import random
+import re
+import tempfile
+import zipfile
+
+import defusedxml.minidom
+import lxml.etree
+
+from .base import BaseSchemaValidator
+
+
+class DOCXSchemaValidator(BaseSchemaValidator):
+
+ WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+ W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
+ W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
+
+ ELEMENT_RELATIONSHIP_TYPES = {}
+
+ def validate(self):
+ if not self.validate_xml():
+ return False
+
+ all_valid = True
+ if not self.validate_namespaces():
+ all_valid = False
+
+ if not self.validate_unique_ids():
+ all_valid = False
+
+ if not self.validate_file_references():
+ all_valid = False
+
+ if not self.validate_content_types():
+ all_valid = False
+
+ if not self.validate_against_xsd():
+ all_valid = False
+
+ if not self.validate_whitespace_preservation():
+ all_valid = False
+
+ if not self.validate_deletions():
+ all_valid = False
+
+ if not self.validate_insertions():
+ all_valid = False
+
+ if not self.validate_all_relationship_ids():
+ all_valid = False
+
+ if not self.validate_id_constraints():
+ all_valid = False
+
+ if not self.validate_comment_markers():
+ all_valid = False
+
+ self.compare_paragraph_counts()
+
+ return all_valid
+
+ def validate_whitespace_preservation(self):
+ errors = []
+
+ for xml_file in self.xml_files:
+ if xml_file.name != "document.xml":
+ continue
+
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+
+ for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"):
+ if elem.text:
+ text = elem.text
+ if re.search(r"^[ \t\n\r]", text) or re.search(
+ r"[ \t\n\r]$", text
+ ):
+ xml_space_attr = f"{{{self.XML_NAMESPACE}}}space"
+ if (
+ xml_space_attr not in elem.attrib
+ or elem.attrib[xml_space_attr] != "preserve"
+ ):
+ text_preview = (
+ repr(text)[:50] + "..."
+ if len(repr(text)) > 50
+ else repr(text)
+ )
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}"
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} whitespace preservation violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All whitespace is properly preserved")
+ return True
+
+ def validate_deletions(self):
+ errors = []
+
+ for xml_file in self.xml_files:
+ if xml_file.name != "document.xml":
+ continue
+
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
+
+ for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces):
+ if t_elem.text:
+ text_preview = (
+ repr(t_elem.text)[:50] + "..."
+ if len(repr(t_elem.text)) > 50
+ else repr(t_elem.text)
+ )
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {t_elem.sourceline}: found within : {text_preview}"
+ )
+
+ for instr_elem in root.xpath(
+ ".//w:del//w:instrText", namespaces=namespaces
+ ):
+ text_preview = (
+ repr(instr_elem.text or "")[:50] + "..."
+ if len(repr(instr_elem.text or "")) > 50
+ else repr(instr_elem.text or "")
+ )
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {instr_elem.sourceline}: found within (use ): {text_preview}"
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} deletion validation violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - No w:t elements found within w:del elements")
+ return True
+
+ def count_paragraphs_in_unpacked(self):
+ count = 0
+
+ for xml_file in self.xml_files:
+ if xml_file.name != "document.xml":
+ continue
+
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
+ count = len(paragraphs)
+ except Exception as e:
+ print(f"Error counting paragraphs in unpacked document: {e}")
+
+ return count
+
+ def count_paragraphs_in_original(self):
+ original = self.original_file
+ if original is None:
+ return 0
+
+ count = 0
+
+ try:
+ with tempfile.TemporaryDirectory() as temp_dir:
+ with zipfile.ZipFile(original, "r") as zip_ref:
+ zip_ref.extractall(temp_dir)
+
+ doc_xml_path = temp_dir + "/word/document.xml"
+ root = lxml.etree.parse(doc_xml_path).getroot()
+
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
+ count = len(paragraphs)
+
+ except Exception as e:
+ print(f"Error counting paragraphs in original document: {e}")
+
+ return count
+
+ def validate_insertions(self):
+ errors = []
+
+ for xml_file in self.xml_files:
+ if xml_file.name != "document.xml":
+ continue
+
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
+
+ invalid_elements = root.xpath(
+ ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces
+ )
+
+ for elem in invalid_elements:
+ text_preview = (
+ repr(elem.text or "")[:50] + "..."
+ if len(repr(elem.text or "")) > 50
+ else repr(elem.text or "")
+ )
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {elem.sourceline}: within : {text_preview}"
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} insertion validation violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - No w:delText elements within w:ins elements")
+ return True
+
+ def compare_paragraph_counts(self):
+ original_count = self.count_paragraphs_in_original()
+ new_count = self.count_paragraphs_in_unpacked()
+
+ diff = new_count - original_count
+ diff_str = f"+{diff}" if diff > 0 else str(diff)
+ print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})")
+
+ def _parse_id_value(self, val: str, base: int = 16) -> int:
+ return int(val, base)
+
+ def validate_id_constraints(self):
+ errors = []
+ para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId"
+ durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId"
+
+ for xml_file in self.xml_files:
+ try:
+ for elem in lxml.etree.parse(str(xml_file)).iter():
+ if val := elem.get(para_id_attr):
+ if self._parse_id_value(val, base=16) >= 0x80000000:
+ errors.append(
+ f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000"
+ )
+
+ if val := elem.get(durable_id_attr):
+ if xml_file.name == "numbering.xml":
+ try:
+ if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:
+ errors.append(
+ f" {xml_file.name}:{elem.sourceline}: "
+ f"durableId={val} >= 0x7FFFFFFF"
+ )
+ except ValueError:
+ errors.append(
+ f" {xml_file.name}:{elem.sourceline}: "
+ f"durableId={val} must be decimal in numbering.xml"
+ )
+ else:
+ if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:
+ errors.append(
+ f" {xml_file.name}:{elem.sourceline}: "
+ f"durableId={val} >= 0x7FFFFFFF"
+ )
+ except Exception:
+ pass
+
+ if errors:
+ print(f"FAILED - {len(errors)} ID constraint violations:")
+ for e in errors:
+ print(e)
+ elif self.verbose:
+ print("PASSED - All paraId/durableId values within constraints")
+ return not errors
+
+ def validate_comment_markers(self):
+ errors = []
+
+ document_xml = None
+ comments_xml = None
+ for xml_file in self.xml_files:
+ if xml_file.name == "document.xml" and "word" in str(xml_file):
+ document_xml = xml_file
+ elif xml_file.name == "comments.xml":
+ comments_xml = xml_file
+
+ if not document_xml:
+ if self.verbose:
+ print("PASSED - No document.xml found (skipping comment validation)")
+ return True
+
+ try:
+ doc_root = lxml.etree.parse(str(document_xml)).getroot()
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
+
+ range_starts = {
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
+ for elem in doc_root.xpath(
+ ".//w:commentRangeStart", namespaces=namespaces
+ )
+ }
+ range_ends = {
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
+ for elem in doc_root.xpath(
+ ".//w:commentRangeEnd", namespaces=namespaces
+ )
+ }
+ references = {
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
+ for elem in doc_root.xpath(
+ ".//w:commentReference", namespaces=namespaces
+ )
+ }
+
+ orphaned_ends = range_ends - range_starts
+ for comment_id in sorted(
+ orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0
+ ):
+ errors.append(
+ f' document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart'
+ )
+
+ orphaned_starts = range_starts - range_ends
+ for comment_id in sorted(
+ orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0
+ ):
+ errors.append(
+ f' document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd'
+ )
+
+ comment_ids = set()
+ if comments_xml and comments_xml.exists():
+ comments_root = lxml.etree.parse(str(comments_xml)).getroot()
+ comment_ids = {
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
+ for elem in comments_root.xpath(
+ ".//w:comment", namespaces=namespaces
+ )
+ }
+
+ marker_ids = range_starts | range_ends | references
+ invalid_refs = marker_ids - comment_ids
+ for comment_id in sorted(
+ invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0
+ ):
+ if comment_id:
+ errors.append(
+ f' document.xml: marker id="{comment_id}" references non-existent comment'
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(f" Error parsing XML: {e}")
+
+ if errors:
+ print(f"FAILED - {len(errors)} comment marker violations:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All comment markers properly paired")
+ return True
+
+ def repair(self) -> int:
+ repairs = super().repair()
+ repairs += self.repair_durableId()
+ return repairs
+
+ def repair_durableId(self) -> int:
+ repairs = 0
+
+ for xml_file in self.xml_files:
+ try:
+ content = xml_file.read_text(encoding="utf-8")
+ dom = defusedxml.minidom.parseString(content)
+ modified = False
+
+ for elem in dom.getElementsByTagName("*"):
+ if not elem.hasAttribute("w16cid:durableId"):
+ continue
+
+ durable_id = elem.getAttribute("w16cid:durableId")
+ needs_repair = False
+
+ if xml_file.name == "numbering.xml":
+ try:
+ needs_repair = (
+ self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF
+ )
+ except ValueError:
+ needs_repair = True
+ else:
+ try:
+ needs_repair = (
+ self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF
+ )
+ except ValueError:
+ needs_repair = True
+
+ if needs_repair:
+ value = random.randint(1, 0x7FFFFFFE)
+ if xml_file.name == "numbering.xml":
+ new_id = str(value)
+ else:
+ new_id = f"{value:08X}"
+
+ elem.setAttribute("w16cid:durableId", new_id)
+ print(
+ f" Repaired: {xml_file.name}: durableId {durable_id} → {new_id}"
+ )
+ repairs += 1
+ modified = True
+
+ if modified:
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
+
+ except Exception:
+ pass
+
+ return repairs
+
+
+if __name__ == "__main__":
+ raise RuntimeError("This module should not be run directly.")
diff --git a/skills/docx/scripts/office/validators/pptx.py b/skills/docx/scripts/office/validators/pptx.py
new file mode 100644
index 0000000..09842aa
--- /dev/null
+++ b/skills/docx/scripts/office/validators/pptx.py
@@ -0,0 +1,275 @@
+"""
+Validator for PowerPoint presentation XML files against XSD schemas.
+"""
+
+import re
+
+from .base import BaseSchemaValidator
+
+
+class PPTXSchemaValidator(BaseSchemaValidator):
+
+ PRESENTATIONML_NAMESPACE = (
+ "http://schemas.openxmlformats.org/presentationml/2006/main"
+ )
+
+ ELEMENT_RELATIONSHIP_TYPES = {
+ "sldid": "slide",
+ "sldmasterid": "slidemaster",
+ "notesmasterid": "notesmaster",
+ "sldlayoutid": "slidelayout",
+ "themeid": "theme",
+ "tablestyleid": "tablestyles",
+ }
+
+ def validate(self):
+ if not self.validate_xml():
+ return False
+
+ all_valid = True
+ if not self.validate_namespaces():
+ all_valid = False
+
+ if not self.validate_unique_ids():
+ all_valid = False
+
+ if not self.validate_uuid_ids():
+ all_valid = False
+
+ if not self.validate_file_references():
+ all_valid = False
+
+ if not self.validate_slide_layout_ids():
+ all_valid = False
+
+ if not self.validate_content_types():
+ all_valid = False
+
+ if not self.validate_against_xsd():
+ all_valid = False
+
+ if not self.validate_notes_slide_references():
+ all_valid = False
+
+ if not self.validate_all_relationship_ids():
+ all_valid = False
+
+ if not self.validate_no_duplicate_slide_layouts():
+ all_valid = False
+
+ return all_valid
+
+ def validate_uuid_ids(self):
+ import lxml.etree
+
+ errors = []
+ uuid_pattern = re.compile(
+ r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"
+ )
+
+ for xml_file in self.xml_files:
+ try:
+ root = lxml.etree.parse(str(xml_file)).getroot()
+
+ for elem in root.iter():
+ for attr, value in elem.attrib.items():
+ attr_name = attr.split("}")[-1].lower()
+ if attr_name == "id" or attr_name.endswith("id"):
+ if self._looks_like_uuid(value):
+ if not uuid_pattern.match(value):
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
+ f"Line {elem.sourceline}: ID '{value}' appears to be a UUID but contains invalid hex characters"
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} UUID ID validation errors:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All UUID-like IDs contain valid hex values")
+ return True
+
+ def _looks_like_uuid(self, value):
+ clean_value = value.strip("{}()").replace("-", "")
+ return len(clean_value) == 32 and all(c.isalnum() for c in clean_value)
+
+ def validate_slide_layout_ids(self):
+ import lxml.etree
+
+ errors = []
+
+ slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))
+
+ if not slide_masters:
+ if self.verbose:
+ print("PASSED - No slide masters found")
+ return True
+
+ for slide_master in slide_masters:
+ try:
+ root = lxml.etree.parse(str(slide_master)).getroot()
+
+ rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels"
+
+ if not rels_file.exists():
+ errors.append(
+ f" {slide_master.relative_to(self.unpacked_dir)}: "
+ f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}"
+ )
+ continue
+
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
+
+ valid_layout_rids = set()
+ for rel in rels_root.findall(
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
+ ):
+ rel_type = rel.get("Type", "")
+ if "slideLayout" in rel_type:
+ valid_layout_rids.add(rel.get("Id"))
+
+ for sld_layout_id in root.findall(
+ f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId"
+ ):
+ r_id = sld_layout_id.get(
+ f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id"
+ )
+ layout_id = sld_layout_id.get("id")
+
+ if r_id and r_id not in valid_layout_rids:
+ errors.append(
+ f" {slide_master.relative_to(self.unpacked_dir)}: "
+ f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' "
+ f"references r:id='{r_id}' which is not found in slide layout relationships"
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {slide_master.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print(f"FAILED - Found {len(errors)} slide layout ID validation errors:")
+ for error in errors:
+ print(error)
+ print(
+ "Remove invalid references or add missing slide layouts to the relationships file."
+ )
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All slide layout IDs reference valid slide layouts")
+ return True
+
+ def validate_no_duplicate_slide_layouts(self):
+ import lxml.etree
+
+ errors = []
+ slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
+
+ for rels_file in slide_rels_files:
+ try:
+ root = lxml.etree.parse(str(rels_file)).getroot()
+
+ layout_rels = [
+ rel
+ for rel in root.findall(
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
+ )
+ if "slideLayout" in rel.get("Type", "")
+ ]
+
+ if len(layout_rels) > 1:
+ errors.append(
+ f" {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references"
+ )
+
+ except Exception as e:
+ errors.append(
+ f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ if errors:
+ print("FAILED - Found slides with duplicate slideLayout references:")
+ for error in errors:
+ print(error)
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All slides have exactly one slideLayout reference")
+ return True
+
+ def validate_notes_slide_references(self):
+ import lxml.etree
+
+ errors = []
+ notes_slide_references = {}
+
+ slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
+
+ if not slide_rels_files:
+ if self.verbose:
+ print("PASSED - No slide relationship files found")
+ return True
+
+ for rels_file in slide_rels_files:
+ try:
+ root = lxml.etree.parse(str(rels_file)).getroot()
+
+ for rel in root.findall(
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
+ ):
+ rel_type = rel.get("Type", "")
+ if "notesSlide" in rel_type:
+ target = rel.get("Target", "")
+ if target:
+ normalized_target = target.replace("../", "")
+
+ slide_name = rels_file.stem.replace(
+ ".xml", ""
+ )
+
+ if normalized_target not in notes_slide_references:
+ notes_slide_references[normalized_target] = []
+ notes_slide_references[normalized_target].append(
+ (slide_name, rels_file)
+ )
+
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
+ errors.append(
+ f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
+ )
+
+ for target, references in notes_slide_references.items():
+ if len(references) > 1:
+ slide_names = [ref[0] for ref in references]
+ errors.append(
+ f" Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}"
+ )
+ for slide_name, rels_file in references:
+ errors.append(f" - {rels_file.relative_to(self.unpacked_dir)}")
+
+ if errors:
+ print(
+ f"FAILED - Found {len([e for e in errors if not e.startswith(' ')])} notes slide reference validation errors:"
+ )
+ for error in errors:
+ print(error)
+ print("Each slide may optionally have its own slide file.")
+ return False
+ else:
+ if self.verbose:
+ print("PASSED - All notes slide references are unique")
+ return True
+
+
+if __name__ == "__main__":
+ raise RuntimeError("This module should not be run directly.")
diff --git a/skills/docx/scripts/office/validators/redlining.py b/skills/docx/scripts/office/validators/redlining.py
new file mode 100644
index 0000000..71c81b6
--- /dev/null
+++ b/skills/docx/scripts/office/validators/redlining.py
@@ -0,0 +1,247 @@
+"""
+Validator for tracked changes in Word documents.
+"""
+
+import subprocess
+import tempfile
+import zipfile
+from pathlib import Path
+
+
+class RedliningValidator:
+
+ def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"):
+ self.unpacked_dir = Path(unpacked_dir)
+ self.original_docx = Path(original_docx)
+ self.verbose = verbose
+ self.author = author
+ self.namespaces = {
+ "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+ }
+
+ def repair(self) -> int:
+ return 0
+
+ def validate(self):
+ modified_file = self.unpacked_dir / "word" / "document.xml"
+ if not modified_file.exists():
+ print(f"FAILED - Modified document.xml not found at {modified_file}")
+ return False
+
+ try:
+ import xml.etree.ElementTree as ET
+
+ tree = ET.parse(modified_file)
+ root = tree.getroot()
+
+ del_elements = root.findall(".//w:del", self.namespaces)
+ ins_elements = root.findall(".//w:ins", self.namespaces)
+
+ author_del_elements = [
+ elem
+ for elem in del_elements
+ if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
+ ]
+ author_ins_elements = [
+ elem
+ for elem in ins_elements
+ if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
+ ]
+
+ if not author_del_elements and not author_ins_elements:
+ if self.verbose:
+ print(f"PASSED - No tracked changes by {self.author} found.")
+ return True
+
+ except Exception:
+ pass
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_path = Path(temp_dir)
+
+ try:
+ with zipfile.ZipFile(self.original_docx, "r") as zip_ref:
+ zip_ref.extractall(temp_path)
+ except Exception as e:
+ print(f"FAILED - Error unpacking original docx: {e}")
+ return False
+
+ original_file = temp_path / "word" / "document.xml"
+ if not original_file.exists():
+ print(
+ f"FAILED - Original document.xml not found in {self.original_docx}"
+ )
+ return False
+
+ try:
+ import xml.etree.ElementTree as ET
+
+ modified_tree = ET.parse(modified_file)
+ modified_root = modified_tree.getroot()
+ original_tree = ET.parse(original_file)
+ original_root = original_tree.getroot()
+ except ET.ParseError as e:
+ print(f"FAILED - Error parsing XML files: {e}")
+ return False
+
+ self._remove_author_tracked_changes(original_root)
+ self._remove_author_tracked_changes(modified_root)
+
+ modified_text = self._extract_text_content(modified_root)
+ original_text = self._extract_text_content(original_root)
+
+ if modified_text != original_text:
+ error_message = self._generate_detailed_diff(
+ original_text, modified_text
+ )
+ print(error_message)
+ return False
+
+ if self.verbose:
+ print(f"PASSED - All changes by {self.author} are properly tracked")
+ return True
+
+ def _generate_detailed_diff(self, original_text, modified_text):
+ error_parts = [
+ f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes",
+ "",
+ "Likely causes:",
+ " 1. Modified text inside another author's or tags",
+ " 2. Made edits without proper tracked changes",
+ " 3. Didn't nest inside when deleting another's insertion",
+ "",
+ "For pre-redlined documents, use correct patterns:",
+ " - To reject another's INSERTION: Nest inside their ",
+ " - To restore another's DELETION: Add new AFTER their ",
+ "",
+ ]
+
+ git_diff = self._get_git_word_diff(original_text, modified_text)
+ if git_diff:
+ error_parts.extend(["Differences:", "============", git_diff])
+ else:
+ error_parts.append("Unable to generate word diff (git not available)")
+
+ return "\n".join(error_parts)
+
+ def _get_git_word_diff(self, original_text, modified_text):
+ try:
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_path = Path(temp_dir)
+
+ original_file = temp_path / "original.txt"
+ modified_file = temp_path / "modified.txt"
+
+ original_file.write_text(original_text, encoding="utf-8")
+ modified_file.write_text(modified_text, encoding="utf-8")
+
+ result = subprocess.run(
+ [
+ "git",
+ "diff",
+ "--word-diff=plain",
+ "--word-diff-regex=.",
+ "-U0",
+ "--no-index",
+ str(original_file),
+ str(modified_file),
+ ],
+ capture_output=True,
+ text=True,
+ )
+
+ if result.stdout.strip():
+ lines = result.stdout.split("\n")
+ content_lines = []
+ in_content = False
+ for line in lines:
+ if line.startswith("@@"):
+ in_content = True
+ continue
+ if in_content and line.strip():
+ content_lines.append(line)
+
+ if content_lines:
+ return "\n".join(content_lines)
+
+ result = subprocess.run(
+ [
+ "git",
+ "diff",
+ "--word-diff=plain",
+ "-U0",
+ "--no-index",
+ str(original_file),
+ str(modified_file),
+ ],
+ capture_output=True,
+ text=True,
+ )
+
+ if result.stdout.strip():
+ lines = result.stdout.split("\n")
+ content_lines = []
+ in_content = False
+ for line in lines:
+ if line.startswith("@@"):
+ in_content = True
+ continue
+ if in_content and line.strip():
+ content_lines.append(line)
+ return "\n".join(content_lines)
+
+ except (subprocess.CalledProcessError, FileNotFoundError, Exception):
+ pass
+
+ return None
+
+ def _remove_author_tracked_changes(self, root):
+ ins_tag = f"{{{self.namespaces['w']}}}ins"
+ del_tag = f"{{{self.namespaces['w']}}}del"
+ author_attr = f"{{{self.namespaces['w']}}}author"
+
+ for parent in root.iter():
+ to_remove = []
+ for child in parent:
+ if child.tag == ins_tag and child.get(author_attr) == self.author:
+ to_remove.append(child)
+ for elem in to_remove:
+ parent.remove(elem)
+
+ deltext_tag = f"{{{self.namespaces['w']}}}delText"
+ t_tag = f"{{{self.namespaces['w']}}}t"
+
+ for parent in root.iter():
+ to_process = []
+ for child in parent:
+ if child.tag == del_tag and child.get(author_attr) == self.author:
+ to_process.append((child, list(parent).index(child)))
+
+ for del_elem, del_index in reversed(to_process):
+ for elem in del_elem.iter():
+ if elem.tag == deltext_tag:
+ elem.tag = t_tag
+
+ for child in reversed(list(del_elem)):
+ parent.insert(del_index, child)
+ parent.remove(del_elem)
+
+ def _extract_text_content(self, root):
+ p_tag = f"{{{self.namespaces['w']}}}p"
+ t_tag = f"{{{self.namespaces['w']}}}t"
+
+ paragraphs = []
+ for p_elem in root.findall(f".//{p_tag}"):
+ text_parts = []
+ for t_elem in p_elem.findall(f".//{t_tag}"):
+ if t_elem.text:
+ text_parts.append(t_elem.text)
+ paragraph_text = "".join(text_parts)
+ if paragraph_text:
+ paragraphs.append(paragraph_text)
+
+ return "\n".join(paragraphs)
+
+
+if __name__ == "__main__":
+ raise RuntimeError("This module should not be run directly.")
diff --git a/skills/docx/scripts/templates/comments.xml b/skills/docx/scripts/templates/comments.xml
new file mode 100644
index 0000000..cd01a7d
--- /dev/null
+++ b/skills/docx/scripts/templates/comments.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/skills/docx/scripts/templates/commentsExtended.xml b/skills/docx/scripts/templates/commentsExtended.xml
new file mode 100644
index 0000000..411003c
--- /dev/null
+++ b/skills/docx/scripts/templates/commentsExtended.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/skills/docx/scripts/templates/commentsExtensible.xml b/skills/docx/scripts/templates/commentsExtensible.xml
new file mode 100644
index 0000000..f5572d7
--- /dev/null
+++ b/skills/docx/scripts/templates/commentsExtensible.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/skills/docx/scripts/templates/commentsIds.xml b/skills/docx/scripts/templates/commentsIds.xml
new file mode 100644
index 0000000..32f1629
--- /dev/null
+++ b/skills/docx/scripts/templates/commentsIds.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/skills/docx/scripts/templates/people.xml b/skills/docx/scripts/templates/people.xml
new file mode 100644
index 0000000..3803d2d
--- /dev/null
+++ b/skills/docx/scripts/templates/people.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/skills/pdf/SKILL.md b/skills/pdf/SKILL.md
new file mode 100644
index 0000000..d145faf
--- /dev/null
+++ b/skills/pdf/SKILL.md
@@ -0,0 +1,338 @@
+---
+name: PDF 文档处理
+description: >-
+ Use this skill whenever the user wants to do anything with PDF files. This
+ includes reading or extracting text/tables from PDFs, combining or merging
+ multiple PDFs into one, splitting PDFs apart, rotating pages, adding
+ watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs,
+ extracting images, and OCR on scanned PDFs to make them searchable. If the
+ user mentions a .pdf file or asks to produce one, use this skill.
+version: 1.0.0
+type: procedural
+risk_level: low
+status: enabled
+tags:
+ - pdf
+ - document
+ - form
+ - ocr
+metadata:
+ author: anthropic
+ updated_at: '2026-04-04'
+market:
+ short_desc: 读取、创建、合并、拆分和填写 PDF 文档
+ category: productivity
+ maintainer:
+ name: DesireCore Official
+ verified: true
+ channel: latest
+---
+
+# PDF Processing Guide
+
+## Overview
+
+This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see REFERENCE.md. If you need to fill out a PDF form, read FORMS.md and follow its instructions.
+
+## Quick Start
+
+```python
+from pypdf import PdfReader, PdfWriter
+
+# Read a PDF
+reader = PdfReader("document.pdf")
+print(f"Pages: {len(reader.pages)}")
+
+# Extract text
+text = ""
+for page in reader.pages:
+ text += page.extract_text()
+```
+
+## Python Libraries
+
+### pypdf - Basic Operations
+
+#### Merge PDFs
+```python
+from pypdf import PdfWriter, PdfReader
+
+writer = PdfWriter()
+for pdf_file in ["doc1.pdf", "doc2.pdf", "doc3.pdf"]:
+ reader = PdfReader(pdf_file)
+ for page in reader.pages:
+ writer.add_page(page)
+
+with open("merged.pdf", "wb") as output:
+ writer.write(output)
+```
+
+#### Split PDF
+```python
+reader = PdfReader("input.pdf")
+for i, page in enumerate(reader.pages):
+ writer = PdfWriter()
+ writer.add_page(page)
+ with open(f"page_{i+1}.pdf", "wb") as output:
+ writer.write(output)
+```
+
+#### Extract Metadata
+```python
+reader = PdfReader("document.pdf")
+meta = reader.metadata
+print(f"Title: {meta.title}")
+print(f"Author: {meta.author}")
+print(f"Subject: {meta.subject}")
+print(f"Creator: {meta.creator}")
+```
+
+#### Rotate Pages
+```python
+reader = PdfReader("input.pdf")
+writer = PdfWriter()
+
+page = reader.pages[0]
+page.rotate(90) # Rotate 90 degrees clockwise
+writer.add_page(page)
+
+with open("rotated.pdf", "wb") as output:
+ writer.write(output)
+```
+
+### pdfplumber - Text and Table Extraction
+
+#### Extract Text with Layout
+```python
+import pdfplumber
+
+with pdfplumber.open("document.pdf") as pdf:
+ for page in pdf.pages:
+ text = page.extract_text()
+ print(text)
+```
+
+#### Extract Tables
+```python
+with pdfplumber.open("document.pdf") as pdf:
+ for i, page in enumerate(pdf.pages):
+ tables = page.extract_tables()
+ for j, table in enumerate(tables):
+ print(f"Table {j+1} on page {i+1}:")
+ for row in table:
+ print(row)
+```
+
+#### Advanced Table Extraction
+```python
+import pandas as pd
+
+with pdfplumber.open("document.pdf") as pdf:
+ all_tables = []
+ for page in pdf.pages:
+ tables = page.extract_tables()
+ for table in tables:
+ if table: # Check if table is not empty
+ df = pd.DataFrame(table[1:], columns=table[0])
+ all_tables.append(df)
+
+# Combine all tables
+if all_tables:
+ combined_df = pd.concat(all_tables, ignore_index=True)
+ combined_df.to_excel("extracted_tables.xlsx", index=False)
+```
+
+### reportlab - Create PDFs
+
+#### Basic PDF Creation
+```python
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+
+c = canvas.Canvas("hello.pdf", pagesize=letter)
+width, height = letter
+
+# Add text
+c.drawString(100, height - 100, "Hello World!")
+c.drawString(100, height - 120, "This is a PDF created with reportlab")
+
+# Add a line
+c.line(100, height - 140, 400, height - 140)
+
+# Save
+c.save()
+```
+
+#### Create PDF with Multiple Pages
+```python
+from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
+from reportlab.lib.styles import getSampleStyleSheet
+
+doc = SimpleDocTemplate("report.pdf", pagesize=letter)
+styles = getSampleStyleSheet()
+story = []
+
+# Add content
+title = Paragraph("Report Title", styles['Title'])
+story.append(title)
+story.append(Spacer(1, 12))
+
+body = Paragraph("This is the body of the report. " * 20, styles['Normal'])
+story.append(body)
+story.append(PageBreak())
+
+# Page 2
+story.append(Paragraph("Page 2", styles['Heading1']))
+story.append(Paragraph("Content for page 2", styles['Normal']))
+
+# Build PDF
+doc.build(story)
+```
+
+#### Subscripts and Superscripts
+
+**IMPORTANT**: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes.
+
+Instead, use ReportLab's XML markup tags in Paragraph objects:
+```python
+from reportlab.platypus import Paragraph
+from reportlab.lib.styles import getSampleStyleSheet
+
+styles = getSampleStyleSheet()
+
+# Subscripts: use tag
+chemical = Paragraph("H2O", styles['Normal'])
+
+# Superscripts: use tag
+squared = Paragraph("x2 + y2", styles['Normal'])
+```
+
+For canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts.
+
+## Command-Line Tools
+
+### pdftotext (poppler-utils)
+```bash
+# Extract text
+pdftotext input.pdf output.txt
+
+# Extract text preserving layout
+pdftotext -layout input.pdf output.txt
+
+# Extract specific pages
+pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5
+```
+
+### qpdf
+```bash
+# Merge PDFs
+qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf
+
+# Split pages
+qpdf input.pdf --pages . 1-5 -- pages1-5.pdf
+qpdf input.pdf --pages . 6-10 -- pages6-10.pdf
+
+# Rotate pages
+qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees
+
+# Remove password
+qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf
+```
+
+### pdftk (if available)
+```bash
+# Merge
+pdftk file1.pdf file2.pdf cat output merged.pdf
+
+# Split
+pdftk input.pdf burst
+
+# Rotate
+pdftk input.pdf rotate 1east output rotated.pdf
+```
+
+## Common Tasks
+
+### Extract Text from Scanned PDFs
+```python
+# Requires: pip install pytesseract pdf2image
+import pytesseract
+from pdf2image import convert_from_path
+
+# Convert PDF to images
+images = convert_from_path('scanned.pdf')
+
+# OCR each page
+text = ""
+for i, image in enumerate(images):
+ text += f"Page {i+1}:\n"
+ text += pytesseract.image_to_string(image)
+ text += "\n\n"
+
+print(text)
+```
+
+### Add Watermark
+```python
+from pypdf import PdfReader, PdfWriter
+
+# Create watermark (or load existing)
+watermark = PdfReader("watermark.pdf").pages[0]
+
+# Apply to all pages
+reader = PdfReader("document.pdf")
+writer = PdfWriter()
+
+for page in reader.pages:
+ page.merge_page(watermark)
+ writer.add_page(page)
+
+with open("watermarked.pdf", "wb") as output:
+ writer.write(output)
+```
+
+### Extract Images
+```bash
+# Using pdfimages (poppler-utils)
+pdfimages -j input.pdf output_prefix
+
+# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc.
+```
+
+### Password Protection
+```python
+from pypdf import PdfReader, PdfWriter
+
+reader = PdfReader("input.pdf")
+writer = PdfWriter()
+
+for page in reader.pages:
+ writer.add_page(page)
+
+# Add password
+writer.encrypt("userpassword", "ownerpassword")
+
+with open("encrypted.pdf", "wb") as output:
+ writer.write(output)
+```
+
+## Quick Reference
+
+| Task | Best Tool | Command/Code |
+|------|-----------|--------------|
+| Merge PDFs | pypdf | `writer.add_page(page)` |
+| Split PDFs | pypdf | One page per file |
+| Extract text | pdfplumber | `page.extract_text()` |
+| Extract tables | pdfplumber | `page.extract_tables()` |
+| Create PDFs | reportlab | Canvas or Platypus |
+| Command line merge | qpdf | `qpdf --empty --pages ...` |
+| OCR scanned PDFs | pytesseract | Convert to image first |
+| Fill PDF forms | pdf-lib or pypdf (see FORMS.md) | See FORMS.md |
+
+## Next Steps
+
+- For advanced pypdfium2 usage, see REFERENCE.md
+- For JavaScript libraries (pdf-lib), see REFERENCE.md
+- If you need to fill out a PDF form, follow the instructions in FORMS.md
+- For troubleshooting guides, see REFERENCE.md
diff --git a/skills/pdf/forms.md b/skills/pdf/forms.md
new file mode 100644
index 0000000..6e7e1e0
--- /dev/null
+++ b/skills/pdf/forms.md
@@ -0,0 +1,294 @@
+**CRITICAL: You MUST complete these steps in order. Do not skip ahead to writing code.**
+
+If you need to fill out a PDF form, first check to see if the PDF has fillable form fields. Run this script from this file's directory:
+ `python scripts/check_fillable_fields `, and depending on the result go to either the "Fillable fields" or "Non-fillable fields" and follow those instructions.
+
+# Fillable fields
+If the PDF has fillable form fields:
+- Run this script from this file's directory: `python scripts/extract_form_field_info.py `. It will create a JSON file with a list of fields in this format:
+```
+[
+ {
+ "field_id": (unique ID for the field),
+ "page": (page number, 1-based),
+ "rect": ([left, bottom, right, top] bounding box in PDF coordinates, y=0 is the bottom of the page),
+ "type": ("text", "checkbox", "radio_group", or "choice"),
+ },
+ // Checkboxes have "checked_value" and "unchecked_value" properties:
+ {
+ "field_id": (unique ID for the field),
+ "page": (page number, 1-based),
+ "type": "checkbox",
+ "checked_value": (Set the field to this value to check the checkbox),
+ "unchecked_value": (Set the field to this value to uncheck the checkbox),
+ },
+ // Radio groups have a "radio_options" list with the possible choices.
+ {
+ "field_id": (unique ID for the field),
+ "page": (page number, 1-based),
+ "type": "radio_group",
+ "radio_options": [
+ {
+ "value": (set the field to this value to select this radio option),
+ "rect": (bounding box for the radio button for this option)
+ },
+ // Other radio options
+ ]
+ },
+ // Multiple choice fields have a "choice_options" list with the possible choices:
+ {
+ "field_id": (unique ID for the field),
+ "page": (page number, 1-based),
+ "type": "choice",
+ "choice_options": [
+ {
+ "value": (set the field to this value to select this option),
+ "text": (display text of the option)
+ },
+ // Other choice options
+ ],
+ }
+]
+```
+- Convert the PDF to PNGs (one image for each page) with this script (run from this file's directory):
+`python scripts/convert_pdf_to_images.py `
+Then analyze the images to determine the purpose of each form field (make sure to convert the bounding box PDF coordinates to image coordinates).
+- Create a `field_values.json` file in this format with the values to be entered for each field:
+```
+[
+ {
+ "field_id": "last_name", // Must match the field_id from `extract_form_field_info.py`
+ "description": "The user's last name",
+ "page": 1, // Must match the "page" value in field_info.json
+ "value": "Simpson"
+ },
+ {
+ "field_id": "Checkbox12",
+ "description": "Checkbox to be checked if the user is 18 or over",
+ "page": 1,
+ "value": "/On" // If this is a checkbox, use its "checked_value" value to check it. If it's a radio button group, use one of the "value" values in "radio_options".
+ },
+ // more fields
+]
+```
+- Run the `fill_fillable_fields.py` script from this file's directory to create a filled-in PDF:
+`python scripts/fill_fillable_fields.py