diff --git a/.fdignore b/.fdignore new file mode 100644 index 000000000..41bdd3828 --- /dev/null +++ b/.fdignore @@ -0,0 +1,7 @@ +.tox +Session.vim +build/ +docs/.build +features/_scratch +__pycache__/ +src/*.egg-info diff --git a/.gitignore b/.gitignore index de25a6f76..5aabfd8cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ +/build/ .coverage /dist/ /docs/.build/ -/*.egg-info +/src/*.egg-info *.pyc +.pytest_cache/ _scratch/ Session.vim /.tox/ diff --git a/.projections.json b/.projections.json new file mode 100644 index 000000000..7d68dd4c5 --- /dev/null +++ b/.projections.json @@ -0,0 +1,14 @@ +{ + "src/docx/*.py" : { + "alternate" : [ + "tests/{dirname}/test_{basename}.py" + ], + "type" : "source" + }, + "tests/**/test_*.py" : { + "alternate" : [ + "src/docx/{dirname}/{basename}.py" + ], + "type" : "test" + } +} diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..125538586 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +version: 2 + +# -- set the OS, Python version and other tools you might need -- +build: + os: ubuntu-22.04 + tools: + python: "3.9" + +# -- build documentation in the "docs/" directory with Sphinx -- +sphinx: + configuration: docs/conf.py + # -- fail on all warnings to avoid broken references -- + # fail_on_warning: true + +# -- package versions required to build your documentation -- +# -- see https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -- +python: + install: + - requirements: requirements-docs.txt diff --git a/.rgignore b/.rgignore new file mode 100644 index 000000000..12d71b5b4 --- /dev/null +++ b/.rgignore @@ -0,0 +1,9 @@ +.tox +Session.vim +build/ +docs/.build +features/_scratch +__pycache__/ +ref/ +src/*.egg-info +tests/test_files diff --git a/.travis.yml b/.travis.yml index 3345ff24f..6ce09e8e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,8 @@ language: python python: - - "3.4" - - "3.3" + - "3.8" + - "3.6" - "2.7" - - "2.6" # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors install: pip install -r requirements.txt # command to run tests, e.g. python setup.py test diff --git a/HISTORY.rst b/HISTORY.rst index 925cd95be..69bba4161 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,207 @@ Release History --------------- +1.2.0 (2025-06-16) +++++++++++++++++++ + +- Add support for comments +- Drop support for Python 3.8, add testing for Python 3.13 + + +1.1.2 (2024-05-01) +++++++++++++++++++ + +- Fix #1383 Revert lxml<=4.9.2 pin that breaks Python 3.12 install +- Fix #1385 Support use of Part._rels by python-docx-template +- Add support and testing for Python 3.12 + + +1.1.1 (2024-04-29) +++++++++++++++++++ + +- Fix #531, #1146 Index error on table with misaligned borders +- Fix #1335 Tolerate invalid float value in bottom-margin +- Fix #1337 Do not require typing-extensions at runtime + + +1.1.0 (2023-11-03) +++++++++++++++++++ + +- Add BlockItemContainer.iter_inner_content() + + +1.0.1 (2023-10-12) +++++++++++++++++++ + +- Fix #1256: parse_xml() and OxmlElement moved. +- Add Hyperlink.fragment and .url + + +1.0.0 (2023-10-01) ++++++++++++++++++++ + +- Remove Python 2 support. Supported versions are 3.7+ +- Fix #85: Paragraph.text includes hyperlink text +- Add #1113: Hyperlink.address +- Add Hyperlink.contains_page_break +- Add Hyperlink.runs +- Add Hyperlink.text +- Add Paragraph.contains_page_break +- Add Paragraph.hyperlinks +- Add Paragraph.iter_inner_content() +- Add Paragraph.rendered_page_breaks +- Add RenderedPageBreak.following_paragraph_fragment +- Add RenderedPageBreak.preceding_paragraph_fragment +- Add Run.contains_page_break +- Add Run.iter_inner_content() +- Add Section.iter_inner_content() + + +0.8.11 (2021-05-15) ++++++++++++++++++++ + +- Small build changes and Python 3.8 version changes like collections.abc location. + + +0.8.10 (2019-01-08) ++++++++++++++++++++ + +- Revert use of expanded package directory for default.docx to work around setup.py + problem with filenames containing square brackets. + + +0.8.9 (2019-01-08) +++++++++++++++++++ + +- Fix gap in MANIFEST.in that excluded default document template directory + + +0.8.8 (2019-01-07) +++++++++++++++++++ + +- Add support for headers and footers + + +0.8.7 (2018-08-18) +++++++++++++++++++ + +- Add _Row.height_rule +- Add _Row.height +- Add _Cell.vertical_alignment +- Fix #455: increment next_id, don't fill gaps +- Add #375: import docx failure on --OO optimization +- Add #254: remove default zoom percentage +- Add #266: miscellaneous documentation fixes +- Add #175: refine MANIFEST.ini +- Add #168: Unicode error on core-props in Python 2 + + +0.8.6 (2016-06-22) +++++++++++++++++++ + +- Add #257: add Font.highlight_color +- Add #261: add ParagraphFormat.tab_stops +- Add #303: disallow XML entity expansion + + +0.8.5 (2015-02-21) +++++++++++++++++++ + +- Fix #149: KeyError on Document.add_table() +- Fix #78: feature: add_table() sets cell widths +- Add #106: feature: Table.direction (i.e. right-to-left) +- Add #102: feature: add CT_Row.trPr + + +0.8.4 (2015-02-20) +++++++++++++++++++ + +- Fix #151: tests won't run on PyPI distribution +- Fix #124: default to inches on no TIFF resolution unit + + +0.8.3 (2015-02-19) +++++++++++++++++++ + +- Add #121, #135, #139: feature: Font.color + + +0.8.2 (2015-02-16) +++++++++++++++++++ + +- Fix #94: picture prints at wrong size when scaled +- Extract `docx.document.Document` object from `DocumentPart` + + Refactor `docx.Document` from an object into a factory function for new + `docx.document.Document object`. Extract methods from prior `docx.Document` + and `docx.parts.document.DocumentPart` to form the new API class and retire + `docx.Document` class. + +- Migrate `Document.numbering_part` to `DocumentPart.numbering_part`. The + `numbering_part` property is not part of the published API and is an + interim internal feature to be replaced in a future release, perhaps with + something like `Document.numbering_definitions`. In the meantime, it can + now be accessed using ``Document.part.numbering_part``. + + +0.8.1 (2015-02-10) +++++++++++++++++++ + +- Fix #140: Warning triggered on Document.add_heading/table() + + +0.8.0 (2015-02-08) +++++++++++++++++++ + +- Add styles. Provides general capability to access and manipulate paragraph, + character, and table styles. + +- Add ParagraphFormat object, accessible on Paragraph.paragraph_format, and + providing the following paragraph formatting properties: + + + paragraph alignment (justfification) + + space before and after paragraph + + line spacing + + indentation + + keep together, keep with next, page break before, and widow control + +- Add Font object, accessible on Run.font, providing character-level + formatting including: + + + typeface (e.g. 'Arial') + + point size + + underline + + italic + + bold + + superscript and subscript + +The following issues were retired: + +- Add feature #56: superscript/subscript +- Add feature #67: lookup style by UI name +- Add feature #98: Paragraph indentation +- Add feature #120: Document.styles + +**Backward incompatibilities** + +Paragraph.style now returns a Style object. Previously it returned the style +name as a string. The name can now be retrieved using the Style.name +property, for example, `paragraph.style.name`. + + +0.7.6 (2014-12-14) +++++++++++++++++++ + +- Add feature #69: Table.alignment +- Add feature #29: Document.core_properties + + +0.7.5 (2014-11-29) +++++++++++++++++++ + +- Add feature #65: _Cell.merge() + + 0.7.4 (2014-07-18) ++++++++++++++++++ diff --git a/MANIFEST.in b/MANIFEST.in index 2c4f97c0d..b2d3fadcf 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,10 @@ include HISTORY.rst LICENSE README.rst tox.ini -include tests/*.py -recursive-include features * -recursive-include docx/templates * -recursive-include tests/test_files * - +include requirements*.txt +graft src/docx/templates +graft features +graft tests +graft docs +prune docs/.build +global-exclude .DS_Store +global-exclude __pycache__ +global-exclude *.py[co] diff --git a/Makefile b/Makefile index 2e8969154..2b2fb4121 100644 --- a/Makefile +++ b/Makefile @@ -1,47 +1,64 @@ BEHAVE = behave MAKE = make PYTHON = python -SETUP = $(PYTHON) ./setup.py +TWINE = $(PYTHON) -m twine -.PHONY: accept clean coverage docs readme register sdist test upload +.PHONY: accept build clean cleandocs coverage docs install opendocs sdist test +.PHONY: test-upload wheel help: @echo "Please use \`make ' where is one or more of" - @echo " accept run acceptance tests using behave" - @echo " clean delete intermediate work product and start fresh" - @echo " coverage run nosetests with coverage" - @echo " docs generate documentation" - @echo " readme update README.html from README.rst" - @echo " register update metadata (README.rst) on PyPI" - @echo " test run tests using setup.py" - @echo " sdist generate a source distribution into dist/" - @echo " upload upload distribution tarball to PyPI" + @echo " accept run acceptance tests using behave" + @echo " build generate both sdist and wheel suitable for upload to PyPI" + @echo " clean delete intermediate work product and start fresh" + @echo " cleandocs delete intermediate documentation files" + @echo " coverage run pytest with coverage" + @echo " docs generate documentation" + @echo " opendocs open browser to local version of documentation" + @echo " register update metadata (README.rst) on PyPI" + @echo " sdist generate a source distribution into dist/" + @echo " test run unit tests using pytest" + @echo " test-upload upload distribution to TestPyPI" + @echo " upload upload distribution tarball to PyPI" + @echo " wheel generate a binary distribution into dist/" accept: - $(BEHAVE) --stop + uv run $(BEHAVE) --stop + +build: + uv build clean: - find . -type f -name \*.pyc -exec rm {} \; + # find . -type f -name \*.pyc -exec rm {} \; + fd -e pyc -I -x rm rm -rf dist *.egg-info .coverage .DS_Store +cleandocs: + $(MAKE) -C docs clean + coverage: - py.test --cov-report term-missing --cov=docx tests/ + uv run pytest --cov-report term-missing --cov=docx tests/ docs: - $(MAKE) -C docs clean html + $(MAKE) -C docs html -readme: - rst2html README.rst >README.html - open README.html +install: + pip install -Ue . -register: - $(SETUP) register +opendocs: + open docs/.build/html/index.html sdist: - $(SETUP) sdist + uv build --sdist test: - $(SETUP) test + uv run pytest -x + +test-upload: sdist wheel + uv run $(TWINE) upload --repository testpypi dist/* + +upload: clean sdist wheel + uv run $(TWINE) upload dist/* -upload: - $(SETUP) sdist upload +wheel: + uv build --wheel diff --git a/README.md b/README.md new file mode 100644 index 000000000..c35cf0200 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# python-docx + +*python-docx* is a Python library for reading, creating, and updating Microsoft Word 2007+ (.docx) files. + +## Installation + +``` +pip install python-docx +``` + +## Example + +```python +>>> from docx import Document + +>>> document = Document() +>>> document.add_paragraph("It was a dark and stormy night.") + +>>> document.save("dark-and-stormy.docx") + +>>> document = Document("dark-and-stormy.docx") +>>> document.paragraphs[0].text +'It was a dark and stormy night.' +``` + +More information is available in the [python-docx documentation](https://python-docx.readthedocs.org/en/latest/) diff --git a/README.rst b/README.rst deleted file mode 100644 index 82d1f0bd7..000000000 --- a/README.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. image:: https://travis-ci.org/python-openxml/python-docx.svg?branch=master - :target: https://travis-ci.org/python-openxml/python-docx - -*python-docx* is a Python library for creating and updating Microsoft Word -(.docx) files. - -More information is available in the `python-docx documentation`_. - -.. _`python-docx documentation`: - https://python-docx.readthedocs.org/en/latest/ diff --git a/docs/_static/img/comment-parts.png b/docs/_static/img/comment-parts.png new file mode 100644 index 000000000..c7db1be54 Binary files /dev/null and b/docs/_static/img/comment-parts.png differ diff --git a/docs/_static/img/hdrftr-01.png b/docs/_static/img/hdrftr-01.png new file mode 100644 index 000000000..5309ef2a3 Binary files /dev/null and b/docs/_static/img/hdrftr-01.png differ diff --git a/docs/_static/img/hdrftr-02.png b/docs/_static/img/hdrftr-02.png new file mode 100644 index 000000000..01af2981a Binary files /dev/null and b/docs/_static/img/hdrftr-02.png differ diff --git a/docs/api/comments.rst b/docs/api/comments.rst new file mode 100644 index 000000000..a54ecc9ce --- /dev/null +++ b/docs/api/comments.rst @@ -0,0 +1,27 @@ + +.. _comments_api: + +Comment-related objects +======================= + +.. currentmodule:: docx.comments + + +|Comments| objects +------------------ + +.. autoclass:: Comments() + :members: + :inherited-members: + :exclude-members: + part + + +|Comment| objects +------------------ + +.. autoclass:: Comment() + :members: + :inherited-members: + :exclude-members: + part diff --git a/docs/api/dml.rst b/docs/api/dml.rst new file mode 100644 index 000000000..79b314844 --- /dev/null +++ b/docs/api/dml.rst @@ -0,0 +1,16 @@ + +.. _dml_api: + +DrawingML objects +================= + +Low-level drawing elements like color that appear in various document +contexts. + + +|ColorFormat| objects +--------------------- + +.. autoclass:: docx.dml.color.ColorFormat() + :members: + :undoc-members: diff --git a/docs/api/document.rst b/docs/api/document.rst index accab05b3..42ec0211f 100644 --- a/docs/api/document.rst +++ b/docs/api/document.rst @@ -7,24 +7,111 @@ Document objects The main Document and related objects. -.. currentmodule:: docx.api +|Document| constructor +---------------------- + +.. autofunction:: docx.Document |Document| objects ------------------ - -.. autoclass:: Document +.. autoclass:: docx.document.Document() :members: - :exclude-members: numbering_part, styles_part + :exclude-members: styles_part -.. currentmodule:: docx.parts.document +|CoreProperties| objects +------------------------- +Each |Document| object provides access to its |CoreProperties| object via its +:attr:`core_properties` attribute. A |CoreProperties| object provides +read/write access to the so-called *core properties* for the document. The +core properties are author, category, comments, content_status, created, +identifier, keywords, language, last_modified_by, last_printed, modified, +revision, subject, title, and version. -|Sections| objects ------------------- +Each property is one of three types, |str|, |datetime|, or |int|. String +properties are limited in length to 255 characters and return an empty string +('') if not set. Date properties are assigned and returned as |datetime| +objects without timezone, i.e. in UTC. Any timezone conversions are the +responsibility of the client. Date properties return |None| if not set. +|docx| does not automatically set any of the document core properties other +than to add a core properties part to a presentation that doesn't have one +(very uncommon). If |docx| adds a core properties part, it contains default +values for the title, last_modified_by, revision, and modified properties. +Client code should update properties like revision and last_modified_by +if that behavior is desired. -.. autoclass:: Sections - :members: +.. currentmodule:: docx.opc.coreprops + +.. class:: CoreProperties + + .. attribute:: author + + `string` -- An entity primarily responsible for making the content of the + resource. + + .. attribute:: category + + `string` -- A categorization of the content of this package. Example + values might include: Resume, Letter, Financial Forecast, Proposal, + or Technical Presentation. + + .. attribute:: comments + + `string` -- An account of the content of the resource. + + .. attribute:: content_status + + `string` -- completion status of the document, e.g. 'draft' + + .. attribute:: created + + `datetime` -- time of intial creation of the document + + .. attribute:: identifier + + `string` -- An unambiguous reference to the resource within a given + context, e.g. ISBN. + + .. attribute:: keywords + + `string` -- descriptive words or short phrases likely to be used as + search terms for this document + + .. attribute:: language + + `string` -- language the document is written in + + .. attribute:: last_modified_by + + `string` -- name or other identifier (such as email address) of person + who last modified the document + + .. attribute:: last_printed + + `datetime` -- time the document was last printed + + .. attribute:: modified + + `datetime` -- time the document was last modified + + .. attribute:: revision + + `int` -- number of this revision, incremented by Word each time the + document is saved. Note however |docx| does not automatically increment + the revision number when it saves a document. + + .. attribute:: subject + + `string` -- The topic of the content of the resource. + + .. attribute:: title + + `string` -- The name given to the resource. + + .. attribute:: version + + `string` -- free-form version string diff --git a/docs/api/enum/MsoColorType.rst b/docs/api/enum/MsoColorType.rst new file mode 100644 index 000000000..62a94d6aa --- /dev/null +++ b/docs/api/enum/MsoColorType.rst @@ -0,0 +1,23 @@ +.. _MsoColorType: + +``MSO_COLOR_TYPE`` +================== + +Specifies the color specification scheme + +Example:: + + from docx.enum.dml import MSO_COLOR_TYPE + + assert font.color.type == MSO_COLOR_TYPE.THEME + +---- + +RGB + Color is specified by an |RGBColor| value. + +THEME + Color is one of the preset theme colors. + +AUTO + Color is determined automatically be the application. diff --git a/docs/api/enum/MsoThemeColorIndex.rst b/docs/api/enum/MsoThemeColorIndex.rst new file mode 100644 index 000000000..02436f2c1 --- /dev/null +++ b/docs/api/enum/MsoThemeColorIndex.rst @@ -0,0 +1,71 @@ +.. _MsoThemeColorIndex: + +``MSO_THEME_COLOR_INDEX`` +========================= + +Indicates the Office theme color, one of those shown in the color gallery on +the formatting ribbon. + +Alias: ``MSO_THEME_COLOR`` + +Example:: + + from docx.enum.dml import MSO_THEME_COLOR + + font.color.theme_color = MSO_THEME_COLOR.ACCENT_1 + +---- + +NOT_THEME_COLOR + Indicates the color is not a theme color. + +ACCENT_1 + Specifies the Accent 1 theme color. + +ACCENT_2 + Specifies the Accent 2 theme color. + +ACCENT_3 + Specifies the Accent 3 theme color. + +ACCENT_4 + Specifies the Accent 4 theme color. + +ACCENT_5 + Specifies the Accent 5 theme color. + +ACCENT_6 + Specifies the Accent 6 theme color. + +BACKGROUND_1 + Specifies the Background 1 theme color. + +BACKGROUND_2 + Specifies the Background 2 theme color. + +DARK_1 + Specifies the Dark 1 theme color. + +DARK_2 + Specifies the Dark 2 theme color. + +FOLLOWED_HYPERLINK + Specifies the theme color for a clicked hyperlink. + +HYPERLINK + Specifies the theme color for a hyperlink. + +LIGHT_1 + Specifies the Light 1 theme color. + +LIGHT_2 + Specifies the Light 2 theme color. + +TEXT_1 + Specifies the Text 1 theme color. + +TEXT_2 + Specifies the Text 2 theme color. + +MIXED + Indicates multiple theme colors are used. diff --git a/docs/api/enum/WdBuiltinStyle.rst b/docs/api/enum/WdBuiltinStyle.rst new file mode 100644 index 000000000..b7aa682d4 --- /dev/null +++ b/docs/api/enum/WdBuiltinStyle.rst @@ -0,0 +1,415 @@ +.. _WdBuiltinStyle: + +``WD_BUILTIN_STYLE`` +==================== + +alias: **WD_STYLE** + +Specifies a built-in Microsoft Word style. + +Example:: + + from docx import Document + from docx.enum.style import WD_STYLE + + document = Document() + styles = document.styles + style = styles[WD_STYLE.BODY_TEXT] + +---- + +BLOCK_QUOTATION + Block Text. + +BODY_TEXT + Body Text. + +BODY_TEXT_2 + Body Text 2. + +BODY_TEXT_3 + Body Text 3. + +BODY_TEXT_FIRST_INDENT + Body Text First Indent. + +BODY_TEXT_FIRST_INDENT_2 + Body Text First Indent 2. + +BODY_TEXT_INDENT + Body Text Indent. + +BODY_TEXT_INDENT_2 + Body Text Indent 2. + +BODY_TEXT_INDENT_3 + Body Text Indent 3. + +BOOK_TITLE + Book Title. + +CAPTION + Caption. + +CLOSING + Closing. + +COMMENT_REFERENCE + Comment Reference. + +COMMENT_TEXT + Comment Text. + +DATE + Date. + +DEFAULT_PARAGRAPH_FONT + Default Paragraph Font. + +EMPHASIS + Emphasis. + +ENDNOTE_REFERENCE + Endnote Reference. + +ENDNOTE_TEXT + Endnote Text. + +ENVELOPE_ADDRESS + Envelope Address. + +ENVELOPE_RETURN + Envelope Return. + +FOOTER + Footer. + +FOOTNOTE_REFERENCE + Footnote Reference. + +FOOTNOTE_TEXT + Footnote Text. + +HEADER + Header. + +HEADING_1 + Heading 1. + +HEADING_2 + Heading 2. + +HEADING_3 + Heading 3. + +HEADING_4 + Heading 4. + +HEADING_5 + Heading 5. + +HEADING_6 + Heading 6. + +HEADING_7 + Heading 7. + +HEADING_8 + Heading 8. + +HEADING_9 + Heading 9. + +HTML_ACRONYM + HTML Acronym. + +HTML_ADDRESS + HTML Address. + +HTML_CITE + HTML Cite. + +HTML_CODE + HTML Code. + +HTML_DFN + HTML Definition. + +HTML_KBD + HTML Keyboard. + +HTML_NORMAL + Normal (Web). + +HTML_PRE + HTML Preformatted. + +HTML_SAMP + HTML Sample. + +HTML_TT + HTML Typewriter. + +HTML_VAR + HTML Variable. + +HYPERLINK + Hyperlink. + +HYPERLINK_FOLLOWED + Followed Hyperlink. + +INDEX_1 + Index 1. + +INDEX_2 + Index 2. + +INDEX_3 + Index 3. + +INDEX_4 + Index 4. + +INDEX_5 + Index 5. + +INDEX_6 + Index 6. + +INDEX_7 + Index 7. + +INDEX_8 + Index 8. + +INDEX_9 + Index 9. + +INDEX_HEADING + Index Heading + +INTENSE_EMPHASIS + Intense Emphasis. + +INTENSE_QUOTE + Intense Quote. + +INTENSE_REFERENCE + Intense Reference. + +LINE_NUMBER + Line Number. + +LIST + List. + +LIST_2 + List 2. + +LIST_3 + List 3. + +LIST_4 + List 4. + +LIST_5 + List 5. + +LIST_BULLET + List Bullet. + +LIST_BULLET_2 + List Bullet 2. + +LIST_BULLET_3 + List Bullet 3. + +LIST_BULLET_4 + List Bullet 4. + +LIST_BULLET_5 + List Bullet 5. + +LIST_CONTINUE + List Continue. + +LIST_CONTINUE_2 + List Continue 2. + +LIST_CONTINUE_3 + List Continue 3. + +LIST_CONTINUE_4 + List Continue 4. + +LIST_CONTINUE_5 + List Continue 5. + +LIST_NUMBER + List Number. + +LIST_NUMBER_2 + List Number 2. + +LIST_NUMBER_3 + List Number 3. + +LIST_NUMBER_4 + List Number 4. + +LIST_NUMBER_5 + List Number 5. + +LIST_PARAGRAPH + List Paragraph. + +MACRO_TEXT + Macro Text. + +MESSAGE_HEADER + Message Header. + +NAV_PANE + Document Map. + +NORMAL + Normal. + +NORMAL_INDENT + Normal Indent. + +NORMAL_OBJECT + Normal (applied to an object). + +NORMAL_TABLE + Normal (applied within a table). + +NOTE_HEADING + Note Heading. + +PAGE_NUMBER + Page Number. + +PLAIN_TEXT + Plain Text. + +QUOTE + Quote. + +SALUTATION + Salutation. + +SIGNATURE + Signature. + +STRONG + Strong. + +SUBTITLE + Subtitle. + +SUBTLE_EMPHASIS + Subtle Emphasis. + +SUBTLE_REFERENCE + Subtle Reference. + +TABLE_COLORFUL_GRID + Colorful Grid. + +TABLE_COLORFUL_LIST + Colorful List. + +TABLE_COLORFUL_SHADING + Colorful Shading. + +TABLE_DARK_LIST + Dark List. + +TABLE_LIGHT_GRID + Light Grid. + +TABLE_LIGHT_GRID_ACCENT_1 + Light Grid Accent 1. + +TABLE_LIGHT_LIST + Light List. + +TABLE_LIGHT_LIST_ACCENT_1 + Light List Accent 1. + +TABLE_LIGHT_SHADING + Light Shading. + +TABLE_LIGHT_SHADING_ACCENT_1 + Light Shading Accent 1. + +TABLE_MEDIUM_GRID_1 + Medium Grid 1. + +TABLE_MEDIUM_GRID_2 + Medium Grid 2. + +TABLE_MEDIUM_GRID_3 + Medium Grid 3. + +TABLE_MEDIUM_LIST_1 + Medium List 1. + +TABLE_MEDIUM_LIST_1_ACCENT_1 + Medium List 1 Accent 1. + +TABLE_MEDIUM_LIST_2 + Medium List 2. + +TABLE_MEDIUM_SHADING_1 + Medium Shading 1. + +TABLE_MEDIUM_SHADING_1_ACCENT_1 + Medium Shading 1 Accent 1. + +TABLE_MEDIUM_SHADING_2 + Medium Shading 2. + +TABLE_MEDIUM_SHADING_2_ACCENT_1 + Medium Shading 2 Accent 1. + +TABLE_OF_AUTHORITIES + Table of Authorities. + +TABLE_OF_FIGURES + Table of Figures. + +TITLE + Title. + +TOAHEADING + TOA Heading. + +TOC_1 + TOC 1. + +TOC_2 + TOC 2. + +TOC_3 + TOC 3. + +TOC_4 + TOC 4. + +TOC_5 + TOC 5. + +TOC_6 + TOC 6. + +TOC_7 + TOC 7. + +TOC_8 + TOC 8. + +TOC_9 + TOC 9. diff --git a/docs/api/enum/WdCellVerticalAlignment.rst b/docs/api/enum/WdCellVerticalAlignment.rst new file mode 100644 index 000000000..8a2f76022 --- /dev/null +++ b/docs/api/enum/WdCellVerticalAlignment.rst @@ -0,0 +1,32 @@ +.. _WdCellVerticalAlignment: + +``WD_CELL_VERTICAL_ALIGNMENT`` +============================== + +alias: **WD_ALIGN_VERTICAL** + +Specifies the vertical alignment of text in one or more cells of a table. + +Example:: + + from docx.enum.table import WD_ALIGN_VERTICAL + + table = document.add_table(3, 3) + table.cell(0, 0).vertical_alignment = WD_ALIGN_VERTICAL.BOTTOM + +---- + +TOP + Text is aligned to the top border of the cell. + +CENTER + Text is aligned to the center of the cell. + +BOTTOM + Text is aligned to the bottom border of the cell. + +BOTH + This is an option in the OpenXml spec, but not in Word itself. It's not + clear what Word behavior this setting produces. If you find out please let + us know and we'll update this documentation. Otherwise, probably best to + avoid this option. diff --git a/docs/api/enum/WdColorIndex.rst b/docs/api/enum/WdColorIndex.rst new file mode 100644 index 000000000..f8adb2d51 --- /dev/null +++ b/docs/api/enum/WdColorIndex.rst @@ -0,0 +1,62 @@ +.. _WdColorIndex: + +``WD_COLOR_INDEX`` +================== + +alias: **WD_COLOR** + +Specifies a standard preset color to apply. Used for font highlighting and +perhaps other applications. + +---- + +AUTO + Automatic color. Default; usually black. + +BLACK + Black color. + +BLUE + Blue color + +BRIGHT_GREEN + Bright green color. + +DARK_BLUE + Dark blue color. + +DARK_RED + Dark red color. + +DARK_YELLOW + Dark yellow color. + +GRAY_25 + 25% shade of gray color. + +GRAY_50 + 50% shade of gray color. + +GREEN + Green color. + +PINK + Pink color. + +RED + Red color. + +TEAL + Teal color. + +TURQUOISE + Turquoise color. + +VIOLET + Violet color. + +WHITE + White color. + +YELLOW + Yellow color. diff --git a/docs/api/enum/WdLineSpacing.rst b/docs/api/enum/WdLineSpacing.rst new file mode 100644 index 000000000..f28142e2d --- /dev/null +++ b/docs/api/enum/WdLineSpacing.rst @@ -0,0 +1,36 @@ +.. _WdLineSpacing: + +``WD_LINE_SPACING`` +=================== + +Specifies a line spacing format to be applied to a paragraph. + +Example:: + + from docx.enum.text import WD_LINE_SPACING + + paragraph = document.add_paragraph() + paragraph.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY + +---- + +ONE_POINT_FIVE + Space-and-a-half line spacing. + +AT_LEAST + Line spacing is always at least the specified amount. The amount is + specified separately. + +DOUBLE + Double spaced. + +EXACTLY + Line spacing is exactly the specified amount. The amount is specified + separately. + +MULTIPLE + Line spacing is specified as a multiple of line heights. Changing the font + size will change the line spacing proportionately. + +SINGLE + Single spaced (default). diff --git a/docs/api/enum/WdRowAlignment.rst b/docs/api/enum/WdRowAlignment.rst new file mode 100644 index 000000000..4459df5d3 --- /dev/null +++ b/docs/api/enum/WdRowAlignment.rst @@ -0,0 +1,24 @@ +.. _WdRowAlignment: + +``WD_TABLE_ALIGNMENT`` +====================== + +Specifies table justification type. + +Example:: + + from docx.enum.table import WD_TABLE_ALIGNMENT + + table = document.add_table(3, 3) + table.alignment = WD_TABLE_ALIGNMENT.CENTER + +---- + +LEFT + Left-aligned + +CENTER + Center-aligned. + +RIGHT + Right-aligned. diff --git a/docs/api/enum/WdRowHeightRule.rst b/docs/api/enum/WdRowHeightRule.rst new file mode 100644 index 000000000..a72d4bdae --- /dev/null +++ b/docs/api/enum/WdRowHeightRule.rst @@ -0,0 +1,26 @@ +.. _WdRowHeightRule: + +``WD_ROW_HEIGHT_RULE`` +====================== + +alias: **WD_ROW_HEIGHT** + +Specifies the rule for determining the height of a table row + +Example:: + + from docx.enum.table import WD_ROW_HEIGHT_RULE + + table = document.add_table(3, 3) + table.rows[0].height_rule = WD_ROW_HEIGHT_RULE.EXACTLY + +---- + +AUTO + The row height is adjusted to accommodate the tallest value in the row. + +AT_LEAST + The row height is at least a minimum specified value. + +EXACTLY + The row height is an exact value. diff --git a/docs/api/enum/WdStyleType.rst b/docs/api/enum/WdStyleType.rst new file mode 100644 index 000000000..4a4a3213b --- /dev/null +++ b/docs/api/enum/WdStyleType.rst @@ -0,0 +1,29 @@ +.. _WdStyleType: + +``WD_STYLE_TYPE`` +================= + +Specifies one of the four style types: paragraph, character, list, or +table. + +Example:: + + from docx import Document + from docx.enum.style import WD_STYLE_TYPE + + styles = Document().styles + assert styles[0].type == WD_STYLE_TYPE.PARAGRAPH + +---- + +CHARACTER + Character style. + +LIST + List style. + +PARAGRAPH + Paragraph style. + +TABLE + Table style. diff --git a/docs/api/enum/WdTabAlignment.rst b/docs/api/enum/WdTabAlignment.rst new file mode 100644 index 000000000..a4adb0fc9 --- /dev/null +++ b/docs/api/enum/WdTabAlignment.rst @@ -0,0 +1,38 @@ +.. _WdTabAlignment: + +``WD_TAB_ALIGNMENT`` +==================== + +Specifies the tab stop alignment to apply. + +---- + +LEFT + Left-aligned. + +CENTER + Center-aligned. + +RIGHT + Right-aligned. + +DECIMAL + Decimal-aligned. + +BAR + Bar-aligned. + +LIST + List-aligned. (deprecated) + +CLEAR + Clear an inherited tab stop. + +END + Right-aligned. (deprecated) + +NUM + Left-aligned. (deprecated) + +START + Left-aligned. (deprecated) diff --git a/docs/api/enum/WdTabLeader.rst b/docs/api/enum/WdTabLeader.rst new file mode 100644 index 000000000..73990eeef --- /dev/null +++ b/docs/api/enum/WdTabLeader.rst @@ -0,0 +1,26 @@ +.. _WdTabLeader: + +``WD_TAB_LEADER`` +================= + +Specifies the character to use as the leader with formatted tabs. + +---- + +SPACES + Spaces. Default. + +DOTS + Dots. + +DASHES + Dashes. + +LINES + Double lines. + +HEAVY + A heavy line. + +MIDDLE_DOT + A vertically-centered dot. diff --git a/docs/api/enum/WdTableDirection.rst b/docs/api/enum/WdTableDirection.rst new file mode 100644 index 000000000..9a7b66c45 --- /dev/null +++ b/docs/api/enum/WdTableDirection.rst @@ -0,0 +1,24 @@ +.. _WdTableDirection: + +``WD_TABLE_DIRECTION`` +====================== + +Specifies the direction in which an application orders cells in the +specified table or row. + +Example:: + + from docx.enum.table import WD_TABLE_DIRECTION + + table = document.add_table(3, 3) + table.direction = WD_TABLE_DIRECTION.RTL + +---- + +LTR + The table or row is arranged with the first column in the leftmost + position. + +RTL + The table or row is arranged with the first column in the rightmost + position. diff --git a/docs/api/enum/index.rst b/docs/api/enum/index.rst index 576f45856..ce76e7f51 100644 --- a/docs/api/enum/index.rst +++ b/docs/api/enum/index.rst @@ -8,7 +8,19 @@ can be found here: .. toctree:: :titlesonly: + MsoColorType + MsoThemeColorIndex WdAlignParagraph + WdBuiltinStyle + WdCellVerticalAlignment + WdColorIndex + WdLineSpacing WdOrientation + WdRowAlignment + WdRowHeightRule WdSectionStart + WdStyleType + WdTabAlignment + WdTabLeader + WdTableDirection WdUnderline diff --git a/docs/api/section.rst b/docs/api/section.rst index 478f80423..e2d547c75 100644 --- a/docs/api/section.rst +++ b/docs/api/section.rst @@ -1,18 +1,41 @@ .. _section_api: + Section objects =============== Provides access to section properties such as margins and page orientation. +|Sections| objects +------------------ + .. currentmodule:: docx.section +.. autoclass:: Sections + :members: + |Section| objects ----------------- .. autoclass:: Section - :members: + :members: + + +|_Header| and |_Footer| objects +------------------------------- + + +.. autoclass:: _Header() + :inherited-members: + :members: + :exclude-members: part + + +.. autoclass:: _Footer() + :inherited-members: + :members: + :exclude-members: part diff --git a/docs/api/settings.rst b/docs/api/settings.rst new file mode 100644 index 000000000..509b925b5 --- /dev/null +++ b/docs/api/settings.rst @@ -0,0 +1,13 @@ + +.. _settings_api: + +Document |Settings| objects +--------------------------- + +.. currentmodule:: docx.settings + +.. autoclass:: Settings() + :members: + :inherited-members: + :exclude-members: + part diff --git a/docs/api/shape.rst b/docs/api/shape.rst index 0ce406b3d..200b34977 100644 --- a/docs/api/shape.rst +++ b/docs/api/shape.rst @@ -4,7 +4,7 @@ Shape-related objects ===================== -.. currentmodule:: docx.parts.document +.. currentmodule:: docx.shape |InlineShapes| objects @@ -12,9 +12,7 @@ Shape-related objects .. autoclass:: InlineShapes :members: - - -.. currentmodule:: docx.shape + :exclude-members: add_picture |InlineShape| objects diff --git a/docs/api/shared.rst b/docs/api/shared.rst index 161abfb4f..161b8bac4 100644 --- a/docs/api/shared.rst +++ b/docs/api/shared.rst @@ -35,5 +35,25 @@ allowing values to be expressed in the units most appropriate to the context. .. autoclass:: Mm :members: +.. autoclass:: Pt + :members: + +.. autoclass:: Twips + :members: + .. autoclass:: Emu :members: + + +|RGBColor| objects +------------------ + +.. autoclass:: RGBColor(r, g, b) + :members: + :undoc-members: + + `r`, `g`, and `b` are each an integer in the range 0-255 inclusive. Using + the hexidecimal integer notation, e.g. `0x42` may enhance readability + where hex RGB values are in use:: + + >>> lavender = RGBColor(0xff, 0x99, 0xcc) diff --git a/docs/api/style.rst b/docs/api/style.rst new file mode 100644 index 000000000..afee95c00 --- /dev/null +++ b/docs/api/style.rst @@ -0,0 +1,97 @@ + +.. _style_api: + +Style-related objects +===================== + +A style is used to collect a set of formatting properties under a single name +and apply those properties to a content object all at once. This promotes +formatting consistency throughout a document and across related documents +and allows formatting changes to be made globally by changing the definition +in the appropriate style. + + +|Styles| objects +---------------- + +.. currentmodule:: docx.styles.styles + +.. autoclass:: Styles() + :members: + :inherited-members: + :exclude-members: + get_by_id, get_style_id, part + + +|BaseStyle| objects +------------------- + +.. currentmodule:: docx.styles.style + +.. autoclass:: BaseStyle() + :members: + :inherited-members: + :exclude-members: + part, style_id + + +|CharacterStyle| objects +------------------------- + +.. autoclass:: CharacterStyle() + :show-inheritance: + :members: + :inherited-members: + :exclude-members: + element, part, style_id, type + + +|ParagraphStyle| objects +------------------------- + +.. autoclass:: ParagraphStyle() + :show-inheritance: + :members: + :inherited-members: + :exclude-members: + element, part, style_id, type + + +|_TableStyle| objects +--------------------- + +.. autoclass:: _TableStyle() + :show-inheritance: + :members: + :inherited-members: + :exclude-members: + element, part, style_id, type + + +|_NumberingStyle| objects +------------------------- + +.. autoclass:: _NumberingStyle() + :members: + + +|LatentStyles| objects +---------------------- + +.. currentmodule:: docx.styles.latent + +.. autoclass:: LatentStyles() + :members: + :inherited-members: + :exclude-members: + part + + +|_LatentStyle| objects +---------------------- + +.. autoclass:: _LatentStyle() + :members: + :inherited-members: + :exclude-members: + part diff --git a/docs/api/table.rst b/docs/api/table.rst index e3c9da952..6f27670fa 100644 --- a/docs/api/table.rst +++ b/docs/api/table.rst @@ -15,13 +15,16 @@ Table objects are constructed using the ``add_table()`` method on |Document|. .. autoclass:: Table :members: + :exclude-members: table |_Cell| objects ------------------------ .. autoclass:: _Cell + :inherited-members: :members: + :exclude-members: part |_Row| objects diff --git a/docs/api/text.rst b/docs/api/text.rst index cdb55ff61..f76e3ba33 100644 --- a/docs/api/text.rst +++ b/docs/api/text.rst @@ -4,18 +4,60 @@ Text-related objects ==================== -.. currentmodule:: docx.text - |Paragraph| objects ------------------- -.. autoclass:: Paragraph +.. autoclass:: docx.text.paragraph.Paragraph() + :members: + + +|ParagraphFormat| objects +------------------------- + +.. autoclass:: docx.text.parfmt.ParagraphFormat() + :members: + + +|Hyperlink| objects +------------------- + +.. autoclass:: docx.text.hyperlink.Hyperlink() :members: |Run| objects ------------- -.. autoclass:: Run +.. autoclass:: docx.text.run.Run() + :members: + + +|Font| objects +-------------- + +.. autoclass:: docx.text.run.Font() :members: + + +|RenderedPageBreak| objects +--------------------------- + +.. autoclass:: docx.text.pagebreak.RenderedPageBreak() + :members: + + +|TabStop| objects +----------------- + +.. autoclass:: docx.text.tabstops.TabStop() + :members: + + +|TabStops| objects +------------------ + +.. autoclass:: docx.text.tabstops.TabStops() + :members: clear_all + + .. automethod:: docx.text.tabstops.TabStops.add_tab_stop(position, alignment=WD_TAB_ALIGNMENT.LEFT, leader=WD_TAB_LEADER.SPACES) diff --git a/docs/conf.py b/docs/conf.py index 5fb91ca12..883ecb81d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,9 +18,9 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) -from docx import __version__ +from docx import __version__ # noqa # -- General configuration --------------------------------------------------- @@ -31,28 +31,28 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode' + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'python-docx' -copyright = u'2013, Steve Canny' +project = "python-docx" +copyright = "2013, Steve Canny" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -69,61 +69,135 @@ rst_epilog = """ .. |api-Document| replace:: :class:`docx.api.Document` -.. |_Body| replace:: :class:`_Body` +.. |AttributeError| replace:: :exc:`.AttributeError` -.. |_Cell| replace:: :class:`_Cell` +.. |BaseStyle| replace:: :class:`.BaseStyle` -.. |_Column| replace:: :class:`_Column` +.. |BlockItemContainer| replace:: :class:`.BlockItemContainer` -.. |_Columns| replace:: :class:`_Columns` +.. |_Body| replace:: :class:`._Body` + +.. |_Cell| replace:: :class:`._Cell` + +.. |_CharacterStyle| replace:: :class:`.CharacterStyle` + +.. |CharacterStyle| replace:: :class:`.CharacterStyle` + +.. |Cm| replace:: :class:`.Cm` + +.. |ColorFormat| replace:: :class:`.ColorFormat` + +.. |_Column| replace:: :class:`._Column` + +.. |_Columns| replace:: :class:`._Columns` + +.. |Comment| replace:: :class:`.Comment` + +.. |Comments| replace:: :class:`.Comments` + +.. |CoreProperties| replace:: :class:`.CoreProperties` + +.. |datetime| replace:: :class:`.datetime.datetime` .. |Document| replace:: :class:`.Document` +.. |DocumentPart| replace:: :class:`.DocumentPart` + .. |docx| replace:: ``python-docx`` .. |Emu| replace:: :class:`.Emu` -.. |False| replace:: ``False`` +.. |False| replace:: :class:`False` + +.. |float| replace:: :class:`.float` + +.. |Font| replace:: :class:`.Font` + +.. |_Footer| replace:: :class:`._Footer` + +.. |FooterPart| replace:: :class:`.FooterPart` + +.. |_Header| replace:: :class:`._Header` + +.. |HeaderPart| replace:: :class:`.HeaderPart` + +.. |Hyperlink| replace:: :class:`.Hyperlink` + +.. |ImageParts| replace:: :class:`.ImageParts` + +.. |Inches| replace:: :class:`.Inches` .. |InlineShape| replace:: :class:`.InlineShape` .. |InlineShapes| replace:: :class:`.InlineShapes` -.. |int| replace:: :class:`int` +.. |InvalidSpanError| replace:: :class:`.InvalidSpanError` + +.. |int| replace:: :class:`.int` + +.. |_LatentStyle| replace:: :class:`._LatentStyle` + +.. |LatentStyles| replace:: :class:`.LatentStyles` .. |Length| replace:: :class:`.Length` -.. |OpcPackage| replace:: :class:`OpcPackage` +.. |None| replace:: :class:`.None` -.. |None| replace:: ``None`` +.. |NumberingPart| replace:: :class:`.NumberingPart` -.. |NumberingPart| replace:: :class:`NumberingPart` +.. |_NumberingStyle| replace:: :class:`._NumberingStyle` + +.. |OpcPackage| replace:: :class:`.OpcPackage` .. |Paragraph| replace:: :class:`.Paragraph` -.. |Part| replace:: :class:`Part` +.. |ParagraphFormat| replace:: :class:`.ParagraphFormat` + +.. |_ParagraphStyle| replace:: :class:`.ParagraphStyle` + +.. |ParagraphStyle| replace:: :class:`.ParagraphStyle` + +.. |Part| replace:: :class:`.Part` + +.. |Pt| replace:: :class:`.Pt` + +.. |_Relationship| replace:: :class:`._Relationship` + +.. |Relationships| replace:: :class:`._Relationships` -.. |_Relationship| replace:: :class:`_Relationship` +.. |RenderedPageBreak| replace:: :class:`.RenderedPageBreak` -.. |Relationships| replace:: :class:`_Relationships` +.. |RGBColor| replace:: :class:`.RGBColor` -.. |_Row| replace:: :class:`_Row` +.. |_Row| replace:: :class:`._Row` -.. |_Rows| replace:: :class:`_Rows` +.. |_Rows| replace:: :class:`._Rows` -.. |Run| replace:: :class:`Run` +.. |Run| replace:: :class:`.Run` .. |Section| replace:: :class:`.Section` .. |Sections| replace:: :class:`.Sections` +.. |Settings| replace:: :class:`.Settings` + +.. |str| replace:: :class:`.str` + +.. |Styles| replace:: :class:`.Styles` + .. |StylesPart| replace:: :class:`.StylesPart` .. |Table| replace:: :class:`.Table` -.. |Text| replace:: :class:`Text` +.. |_TableStyle| replace:: :class:`._TableStyle` + +.. |TabStop| replace:: :class:`.TabStop` + +.. |TabStops| replace:: :class:`.TabStops` -.. |True| replace:: ``True`` +.. |_Text| replace:: :class:`._Text` + +.. |True| replace:: :class:`True` .. |ValueError| replace:: :class:`ValueError` """ @@ -131,7 +205,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['.build'] +exclude_patterns = [".build"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -149,7 +223,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -159,7 +233,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'armstrong' +html_theme = "armstrong" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -167,7 +241,7 @@ # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['_themes'] +html_theme_path = ["_themes"] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -188,7 +262,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -200,10 +274,7 @@ # Custom sidebar templates, maps document names to template names. # html_sidebars = {} -html_sidebars = { - '**': ['localtoc.html', 'relations.html', 'sidebarlinks.html', - 'searchbox.html'] -} +html_sidebars = {"**": ["localtoc.html", "relations.html", "sidebarlinks.html", "searchbox.html"]} # Additional templates that should be rendered to pages, maps page names to # template names. @@ -236,7 +307,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'python-docxdoc' +htmlhelp_basename = "python-docxdoc" # -- Options for LaTeX output ----------------------------------------------- @@ -244,10 +315,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'preamble': '', } @@ -259,8 +328,7 @@ # author, # documentclass [howto/manual]). latex_documents = [ - ('index', 'python-docx.tex', u'python-docx Documentation', - u'Steve Canny', 'manual'), + ("index", "python-docx.tex", "python-docx Documentation", "Steve Canny", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -288,10 +356,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'python-docx', u'python-docx Documentation', - [u'Steve Canny'], 1) -] +man_pages = [("index", "python-docx", "python-docx Documentation", ["Steve Canny"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -303,9 +368,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'python-docx', u'python-docx Documentation', - u'Steve Canny', 'python-docx', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "python-docx", + "python-docx Documentation", + "Steve Canny", + "python-docx", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. @@ -319,4 +390,4 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} +intersphinx_mapping = {"http://docs.python.org/3/": None} diff --git a/docs/dev/analysis/features/char-style.rst b/docs/dev/analysis/features/char-style.rst deleted file mode 100644 index 9b62b9983..000000000 --- a/docs/dev/analysis/features/char-style.rst +++ /dev/null @@ -1,138 +0,0 @@ - -Character Style -=============== - -Word allows a set of run-level properties to be given a name. The set of -properties is called a *character style*. All the settings may be applied to -a run in a single action by setting the style of the run. - -Example: - - The normal font of a document is 10 point Times Roman. From time to time, - a Python class name appears in-line in the text. These short runs of - Python text are to appear in 9 point Courier. A character style named "Code" - is defined such that these words or phrases can be set to the distinctive - font and size in a single step. - - Later, it is decided that 10 point Menlo should be used for inline code - instead. The "Code" character style is updated to the new settings and all - instances of inline code in the document immediately appear in the new - font. - - -Protocol --------- - -There are two call protocols related to character style: getting and setting -the character style of a run, and specifying a style when creating a run. - -Getting and setting the style of a run:: - - >>> run = p.add_run() - >>> run.style - None - >>> run.style = 'Emphasis' - >>> run.style - 'Emphasis' - >>> run.style = None - >>> run.style - None - -Assigning |None| to ``Run.style`` causes any applied character style to be -removed. A run without a character style inherits the character style of its -containing paragraph. - -Specifying the style of a run on creation:: - - >>> run = p.add_run() - >>> run.style - None - >>> run = p.add_run(style='Emphasis') - >>> run.style - 'Emphasis' - >>> run = p.add_run('text in this run', 'Strong') - >>> run.style - 'Strong' - - - -Specimen XML ------------- - -.. highlight:: xml - -A baseline regular run:: - - - - This is a regular paragraph. - - - -Adding *Emphasis* character style:: - - - - - - - This paragraph appears in Emphasis character style. - - - -A style that appears in the Word user interface (UI) with one or more spaces -in its name, such as "Subtle Emphasis", will generally have a style ID with -those spaces removed. In this example, "Subtle Emphasis" becomes -"SubtleEmphasis":: - - - - - - - a few words in Subtle Emphasis style - - - - - -Schema excerpt --------------- - -.. highlight:: xml - -:: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/dev/analysis/features/comments.rst b/docs/dev/analysis/features/comments.rst new file mode 100644 index 000000000..153079caf --- /dev/null +++ b/docs/dev/analysis/features/comments.rst @@ -0,0 +1,419 @@ + +Comments +======== + +Word allows *comments* to be added to a document. This is an aspect of the *reviewing* +feature-set and is typically used by a second party to provide feedback to the author +without changing the document itself. + +The procedure is simple: + +- You select some range of text with the mouse or Shift+Arrow keys +- You press the *New Comment* button (Review toolbar) +- You type or paste in your comment + +.. image:: /_static/img/comment-parts.png + +**Comment Anatomy.** Each comment has two parts, the *comment-reference* and the +*comment-content*: + +The *comment-refererence*, sometimes *comment-anchor*, is the text you selected before +pressing the *New Comment* button. It is a *range* in the document content delimited by +a start marker and an end marker, and containing the *id* of the comment that refers to +it. + +The *comment-content* is whatever content you typed or pasted in. The content for each +comment is stored in the separate *comments-part* (part-name ``word/comments.xml``) as a +distinct comment object. Each comment has a unique id, allowing a comment reference to +be associated with its content and vice versa. + +**Comment Reference.** The comment-reference is a *range*. A range must both start and +end at an even *run* boundary. Intuitively, a range corresponds to a *selection* of text +in the Word UI, one formed by dragging with the mouse or using the *Shift-Arrow* keys. + +In general a range can span "run containers", such as paragraphs, such that the range +begins in one paragraph and ends in a later paragraph. However, a range must enclose +*contiguous* runs, such that a range that contains only two vertically adjacent cells in +a multi-column table is not possible (even though such a selection with the mouse is +possible). + +**Comment Content.** Interestingly, although commonly used to contain a single line of +plain text, the comment-content can contain essentially any content that can appear in +the document body. This includes rich text with emphasis, runs with a different typeface +and size, both paragraph and character styles, hyperlinks, images, and tables. Note that +tables do not appear in the comment as displayed in the *comment-sidebar* although they +do apper in the *reviewing-pane*. + +**Comment Metadata.** Each comment can be assigned *author*, *initals*, and *date* +metadata. In Word, these fields are assigned automatically based on values in ``Settings +> User`` of the installed Word application. These may be configured automatically in an +enterprise installation, based on the user account, but by default they are empty. + +*author* metadata is required, although silently assigned the empty string by Word if +the user name is not configured. *initials* is optional, but always set by Word, to the +empty string if not configured. *date* is also optional, but always set by Word to the +date and time the comment was added (seconds resolution, UTC). + +**Additional Features.** Later versions of Word allow a comment to be *resolved*. A +comment in this state will appear grayed-out in the Word UI. Later versions of Word also +allow a comment to be *replied to*, forming a *comment thread*. Neither of these +features is supported by the initial implementation of comments in *python-docx*. + +The resolved-status and replies features are implemented as *extensions* and involve two +additional comment-related parts: + +- `commentsExtended.xml` - contains completion (resolved) status and parent-id for + threading comment responses; keys to `w15:paraId` of comment paragraph in + `comments.xml` +- `commentsIds.xml` - maps `w16cid:paraId` to `w16cid:durableId`, not sure what that is + exactly. + +**Applicability.** Note that comments cannot be added to a header or footer and cannot +be nested inside a comment itself. In general the *python-docx* API will not allow these +operations but if you outsmart it then the resulting comment will either be silently +removed or trigger a repair error when the document is loaded by Word. + + +Word Behavior +------------- + +- A DOCX package does not contain a ``comments.xml`` part by default. It is added to the + package when the first comment is added to the document. + +- A newly-created comment contains a single paragraph + +- Word starts `w:id` at 0 and increments from there. It appears to use a + `max(comment_ids) + 1` algorithm rather than aggressively filling in id numbering + gaps. + +- Word-behavior: looks like Word doesn't allow a "zero-length" comment reference; if you + insert a comment when no text is selected, the word prior to the insertion-point is + selected. + +- Word allows a comment to be applied to a range that starts before any character and + ends after any later character. However, the XML range-markers can only be placed + between runs. Word accommodates this be breaking runs as necessary to start and stop + at the desired character positions. + + +MS API +------ + +.. highlight:: python + +**Document**:: + + Document.Comments + +**Comments** + +https://learn.microsoft.com/en-us/office/vba/api/word.comments:: + + Comments.Add(Range, Text) -> Comment + + # -- retrieve comment by array idx, not comment_id key -- + Comments.Item(idx: Long) -> Comment + + Comments.Count() -> Long + + # -- restrict visible comments to those by a particular reviewer + Comments.ShowBy = "Travis McGuillicuddy" + +**Comment** + +https://learn.microsoft.com/en-us/office/vba/api/word.comment:: + + # -- delete comment and all replies to it -- + Comment.DeleteRecursively() -> void + + # -- open OLE object embedded in comment for editing -- + Comment.Edit() -> void + + # -- get the "parent" comment when this comment is a reply -- + Comment.Ancestor() -> Comment | Nothing + + # -- author of this comment, with email and name fields -- + Comment.Contact -> CoAuthor + + Comment.Date -> Date + Comment.Done -> bool + Comment.IsInk -> bool + + # -- content of the comment, contrast with `Reference` below -- + Comment.Range -> Range + + # -- content within document this comment refers to -- + Comment.Reference -> Range + + Comment.Replies -> Comments + + # -- described in API docs like the same thing as `Reference` -- + Comment.Scope -> Range + + +Candidate Protocol +------------------ + +.. highlight:: python + +The critical required reference for adding a comment is the *range* referred to by the +comment; i.e. the "selection" of text that is being commented on. Because this range +must start and end at an even run boundary, it is enough to specify the first and last +run in the range, where a single run can be both the start and end run:: + + >>> paragraph = document.add_paragraph("Hello, world!") + >>> document.add_comment( + ... runs=paragraph.runs, + ... text="I have this to say about that" + ... author="Steve Canny", + ... initials="SC", + ... ) + + +A single run can be provided when that is more convenient:: + + >>> paragraph = document.add_paragraph("Summary: ") + >>> run = paragraph.add_run("{{place-summary-here}} + >>> document.add_comment( + ... run, text="The AI model will replace this placeholder with a summary" + ... ) + + +Note that `author` and `initials` are optional parameters; both default to the empty +string. + +`text` is also an optional parameter and also defaults to the empty string. Omitting a +`text` argument (or passing `text=""`) produces a comment containing a single paragraph +you can immediately add runs to and add additional paragraphs after: + + >>> paragraph = document.add_paragraph("Summary: ") + >>> run = paragraph.add_run("{{place-summary-here}}") + >>> comment = document.add_comment(run) + >>> paragraph = comment.paragraphs[0] + >>> paragraph.add_run("The ") + >>> paragraph.add_run("AI model").bold = True + >>> paragraph.add_run(" will replace this placeholder with a ") + >>> paragraph.add_run("summary").bold = True + + +A method directly on |Run| may also be convenient, since you will always have the first +run of the range in hand when adding a comment but may not have ready access to the +``document`` object:: + + >>> runs = find_sequence_of_one_or_more_runs_to_comment_on() + >>> runs[0].add_comment( + ... last_run=runs[-1], + ... text="The AI model will replace this placeholder with a summary", + ... ) + + +However, in this situation we would need to qualify the runs as being inside the +document part and not in a header or footer or comment, and perhaps other invalid +comment locations. I believe comments can be applied to footnotes and endnotes though. + + +Specimen XML +------------ + +.. highlight:: xml + +``comments.xml`` (namespace declarations may vary):: + + + + > + + + + + + + + + + I have this to say about that + + + + + + +Comment reference in document body:: + + + + + Hello, world! + + + + + + + + + + + +**Notes** + +- `w:comment` is a *block-item* container, and can contain any content that can appear + in a document body or table cell, including both paragraphs and tables (and whatever + can go inside those, like images, hyperlinks, etc. + +- Word places the `w:annotationRef`-containing run as the first run in the first + paragraph of the comment. I haven't been able to detect any behavior change caused by + leaving this out or placing it elsewhere in the comment content. + +- Relationships referenced from within `w:comment` content are relationships *from the + comments part* to the image part, hyperlink, etc. + +- `w:commentRangeStart` and `w:commentRangeEnd` elements are *optional*. The + authoritative position of the comment is the required `w:commentReference` element. + This means the *ending* location of a comment anchor can be efficiently found using + XPath. + + +Schema Excerpt +-------------- + +**Notes:** + +- `commentRangeStart` and `commentRangeEnd` are both type `CT_MarkupRange` and both + belong to `EG_RunLevelElts` (peers of `w:r`) which gives them their positioning in the + document structure. + +- These two markers can occur at the *block* level, at the *run* level, or at the *table + row* or *cell* level. However Word only seems to use them as peers of `w:r`. These can + occur as a sibling to: + + - a *paragraph* (`w:p`) + - a *table* (`w:tbl`) + - a *run* (`w:r`) + - a *table row* (`w:tr`) + - a *table cell* (`w:tc`) + +.. code-block:: xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/coreprops.rst b/docs/dev/analysis/features/coreprops.rst new file mode 100644 index 000000000..d4100864d --- /dev/null +++ b/docs/dev/analysis/features/coreprops.rst @@ -0,0 +1,199 @@ + +Core Document Properties +======================== + +The Open XML format provides for a set of descriptive properties to be +maintained with each document. One of these is the *core file properties*. +The core properties are common to all Open XML formats and appear in +document, presentation, and spreadsheet files. The 'Core' in core document +properties refers to `Dublin Core`_, a metadata standard that defines a core +set of elements to describe resources. + +The core properties are described in Part 2 of the ISO/IEC 29500 spec, in +Section 11. The names of some core properties in |docx| are changed from +those in the spec to conform to the MS API. + +Other properties such as company name are custom properties, held in +``app.xml``. + + +Candidate Protocol +------------------ + +:: + + >>> document = Document() + >>> core_properties = document.core_properties + >>> core_properties.author + 'python-docx' + >>> core_properties.author = 'Brian' + >>> core_properties.author + 'Brian' + + +Properties +---------- + +15 properties are supported. All unicode values are limited to 255 characters +(not bytes). + +author *(unicode)* + Note: named 'creator' in spec. An entity primarily responsible for making + the content of the resource. (Dublin Core) + +category *(unicode)* + A categorization of the content of this package. Example values for this + property might include: Resume, Letter, Financial Forecast, Proposal, + Technical Presentation, and so on. (Open Packaging Conventions) + +comments *(unicode)* + Note: named 'description' in spec. An explanation of the content of the + resource. Values might include an abstract, table of contents, reference + to a graphical representation of content, and a free-text account of the + content. (Dublin Core) + +content_status *(unicode)* + The status of the content. Values might include “Draft”, “Reviewed”, and + “Final”. (Open Packaging Conventions) + +created *(datetime)* + Date of creation of the resource. (Dublin Core) + +identifier *(unicode)* + An unambiguous reference to the resource within a given context. + (Dublin Core) + +keywords *(unicode)* + A delimited set of keywords to support searching and indexing. This is + typically a list of terms that are not available elsewhere in the + properties. (Open Packaging Conventions) + +language *(unicode)* + The language of the intellectual content of the resource. (Dublin Core) + +last_modified_by *(unicode)* + The user who performed the last modification. The identification is + environment-specific. Examples include a name, email address, or employee + ID. It is recommended that this value be as concise as possible. + (Open Packaging Conventions) + +last_printed *(datetime)* + The date and time of the last printing. (Open Packaging Conventions) + +modified *(datetime)* + Date on which the resource was changed. (Dublin Core) + +revision *(int)* + The revision number. This value might indicate the number of saves or + revisions, provided the application updates it after each revision. + (Open Packaging Conventions) + +subject *(unicode)* + The topic of the content of the resource. (Dublin Core) + +title *(unicode)* + The name given to the resource. (Dublin Core) + +version *(unicode)* + The version designator. This value is set by the user or by the + application. (Open Packaging Conventions) + + +Specimen XML +------------ + +.. highlight:: xml + +core.xml produced by Microsoft Word:: + + + + Core Document Properties Exploration + PowerPoint core document properties + Steve Canny + powerpoint; open xml; dublin core; microsoft office + + One thing I'd like to discover is just how line wrapping is handled + in the comments. This paragraph is all on a single + line._x000d__x000d_This is a second paragraph separated from the + first by two line feeds. + + Steve Canny + 2 + 2013-04-06T06:03:36Z + 2013-06-15T06:09:18Z + analysis + + + +Schema Excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. _Dublin Core: + http://en.wikipedia.org/wiki/Dublin_Core diff --git a/docs/dev/analysis/features/header.rst b/docs/dev/analysis/features/header.rst new file mode 100644 index 000000000..1fe75f316 --- /dev/null +++ b/docs/dev/analysis/features/header.rst @@ -0,0 +1,291 @@ +.. _header: + +Header and Footer +================= + +In a WordprocessingML document, a page header is text that is separated from the main +body of text and appears at the top of a printed page. The page headers in a document +are often the same from page to page, with only small differences in content, such as +a section title or page number. Such a header is also known as a running head. + +A page footer is analogous in every way to a page header except that it appears at the +bottom of a page. It should not be confused with a footnote, which is not uniform +between pages. For brevity's sake, the term `header` is often used here to refer to what +may be either a header or footer object, trusting the reader to understand its +applicability to both object types. + +In book-printed documents, where pages are printed on both sides, when opened, the front +or `recto` side of each page appears to the right of the bound edge and the back or +`verso` side of each page appears on the left. The first printed page receives the +page-number "1", and is always a recto page. Because pages are numbered consecutively, +each recto page receives an `odd` page number and each verso page receives an `even` +page number. + +The header appearing on a recto page often differs from that on a verso page. Supporting +this difference gives rise to the option to have an even-page header that differs from +the default odd-page header in a document. This "both odd-and-even headers" option is +applied at the document level and affects all sections of the document. + +The header appearing on the first page of a section (e.g. a chapter) may differ from +that appearing on subsequent pages. Supporting this difference gives rise to the option +to set a distinct first-page header. This "different first-page-header" option is +applied at the section level and may differ from section-to-section in the document. + +In WordprocessingML, a header or footer appears within the margin area of a page. With +a few exceptions, a header or footer may contain all the types of content that can +appear in the main body, including text and images. Each header and footer has access to +the styles defined in ``/word/styles.xml``. + +Each section has its own set of headers and footers, although a section can be +configured to "inherit" headers and footers from the prior section. Each section can +have three header definitions, the default header, even header, and first page header. +When different even/odd headers are not enabled, the default header appears on both even +and odd numbered pages. If even/odd headers are enabled, the default header is used for +odd pages. A corresponding set of three footer definitions are also possible. All +header/footer definitions are optional. + + +Open Questions +-------------- + +* What about a continuous section break? What is the header/footer behavior there? + + +Candidate Protocol +------------------ + +Every section has a header; it is never None:: + + >>> header = section.header + >>> header + + + +There are three header properties on |Section|: `.header`, +`.even_page_header`, and `.first_page_header`. All header objects share the +same properties and methods. There are three corresponding properties for the +footers. + +Header is a subclass of |BlockItemContainer|, from which it inherits the same +content editing capabilities as |Document|, such as `.add_paragraph()`. + +If the `w:headerReference` element for a header is not present, the +definition for that header is "inherited" from the prior section. This action +is recursive, such that, for example, the header definition from the first +section could be applied to the third section. A header that inherits its +definition is said to be "linked to previous". Perhaps counterintuitively, +a header for the first section can be "linked to previous", even though no +previous section exists. The `.is_linked_to_previous` property is simply +a test for the existence of a header definition in the current section:: + + >>> header.is_linked_to_previous + True + +Editing operations transparently operate on the source header, the one in the +first prior section having a header of that type (when one is not present in +the current section). If no prior sections have a header, one is created in +the first section of the document on the first constructive edit call:: + + >>> header = document.sections[0].header + >>> header.is_linked_to_previous + True + >>> header.text = 'foobar' + >>> header.is_linked_to_previous + False + +Assigning False to `.is_linked_to_previous` creates a blank header for that +section when one does not already exist:: + + >>> header.is_linked_to_previous + True + >>> header.is_linked_to_previous = False + >>> header.is_linked_to_previous + False + +Conversely, an existing header is deleted from a section by assigning True to +`.is_linked_to_previous`:: + + >>> header.is_linked_to_previous + False + >>> header.is_linked_to_previous = True + >>> header.is_linked_to_previous + True + +The document settings object has a read/write `.odd_and_even_pages_header_footer` +property that indicates verso and recto pages will have a different header. Any existing +even page header definitions are preserved when `.odd_and_even_pages_header_footer` is +False; they are simply not rendered by Word. Assigning `True` to +`.odd_and_even_pages_header_footer` does not automatically create new even header +definitions:: + + >>> document.settings.odd_and_even_pages_header_footer + False + >>> document.settings.odd_and_even_pages_header_footer = True + >>> section.even_page_header.is_linked_to_previous + True + +`Section` has a read/write `.different_first_page_header_footer` property +that indicates whether the first page of the section should have a distinct +header. Assigning `True` to `.different_first_page_header_footer` does not +automatically create a new first page header definition:: + + >>> section.different_first_page_header_footer + False + >>> section.different_first_page_header_footer = True + >>> section.different_first_page_header_footer + True + >>> section.first_page_header.is_linked_to_previous + True + + +Specimen XML +------------ + +.. highlight:: xml + +There are seven different permutations of headers: + +The same header on all pages of the document:: + + + + ... + + + +Only an odd header. The section is exactly the same as above but +`settings.xml` has the the `` property:: + + + ... + + ... + + +Different even and odd headers:: + + + + + ... + + +Distinct first page header, subsequent pages all have the same header:: + + + + + + ... + + +Distinct first, even, and odd page headers:: + + + + + + + ... + + +A header part:: + + + + + + + + Header for section-1 + + + + + +Word Behavior +------------- + +* When you turn off even/odd headers, Word sets the value of + `w:evenAndOddHeaders` to 0, but does not actually remove the even header. + +* When you turn off first page header, Word sets the value of `w:titlePg` to + 0, but does not actually remove the even header. + +* Word will load a file with an even page header but no odd page header. + + +MS API +------ + +.. highlight:: python + +WdHeaderFooterIndex Enumeration:: + + EVEN_PAGES = 3 + FIRST_PAGE = 2 + PRIMARY = 1 + +Create footer in MS API:: + + section = Document.Sections(1) + footers = section.Footers # a HeadersFooters collection object + default_footer = footers(wdHeaderFooterPrimary) + default_footer.Range.Text = "Footer text" + +PageSetup object:: + + DifferentFirstPageHeaderFooter: Read/write {True, False, WD_UNDEFINED} + OddAndEvenPagesHeaderFooter: Read/write {True, False, WD_UNDEFINED} + + +Schema Excerpt +-------------- + +.. code-block:: xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/par-alignment.rst b/docs/dev/analysis/features/par-alignment.rst deleted file mode 100644 index 48c29cf67..000000000 --- a/docs/dev/analysis/features/par-alignment.rst +++ /dev/null @@ -1,174 +0,0 @@ - -Paragraph alignment -=================== - -In Word, each paragraph has an *alignment* attribute that specifies how to -justify the lines of the paragraph when the paragraph is laid out on the -page. Common values are left, right, centered, and justified. - - -Protocol --------- - -The protocol for getting and setting paragraph alignment is illustrated in -this interactive session:: - - >>> paragraph = body.add_paragraph() - >>> paragraph.alignment - None - >>> paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT - >>> paragraph.alignment - RIGHT (2) - >>> paragraph.alignment = None - >>> paragraph.alignment - None - - -Semantics ---------- - -If the ```` element is not present on a paragraph, the alignment value -for that paragraph is inherited from its style hierarchy. If the element is -present, its value overrides any inherited value. From the API, a value of -|None| on the ``Paragraph.alignment`` property corresponds to no ```` -element being present. If |None| is assigned to ``Paragraph.alignment``, the -```` element is removed. - - -Enumerations ------------- - -WD_ALIGN_PARAGRAPH -~~~~~~~~~~~~~~~~~~ - -`WdParagraphAlignment Enumeration on MSDN`_ - -+--------------+------+----------------+ -| Name | enum | attr | -+==============+======+================+ -| LEFT | 0 | left | -+--------------+------+----------------+ -| CENTER | 1 | center | -+--------------+------+----------------+ -| RIGHT | 2 | right | -+--------------+------+----------------+ -| JUSTIFY | 3 | both | -+--------------+------+----------------+ -| DISTRIBUTE | 4 | distribute | -+--------------+------+----------------+ -| JUSTIFY_MED | 5 | mediumKashida | -+--------------+------+----------------+ -| JUSTIFY_HI | 7 | highKashida | -+--------------+------+----------------+ -| JUSTIFY_LOW | 8 | lowKashida | -+--------------+------+----------------+ -| THAI_JUSTIFY | 9 | thaiDistribute | -+--------------+------+----------------+ - -.. _WdParagraphAlignment Enumeration on MSDN: - http://msdn.microsoft.com/en-us/library/office/ff835817(v=office.15).aspx - - -Specimen XML ------------- - -.. highlight:: xml - -A paragraph with inherited alignment:: - - - - Inherited paragraph alignment. - - - -A right-aligned paragraph:: - - - - - - - Right-aligned paragraph. - - - - -Schema excerpt --------------- - -:: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/dev/analysis/features/sections.rst b/docs/dev/analysis/features/sections.rst index f57c0b4bf..7f9dce91f 100644 --- a/docs/dev/analysis/features/sections.rst +++ b/docs/dev/analysis/features/sections.rst @@ -2,7 +2,7 @@ Sections ======== -Word supports the notion of a *section*, having distinct page layout settings. +Word supports the notion of a `section`, having distinct page layout settings. This is how, for example, a document can contain some pages in portrait layout and others in landscape. Section breaks are implemented completely differently from line, page, and column breaks. The former adds a ```` diff --git a/docs/dev/analysis/features/settings.rst b/docs/dev/analysis/features/settings.rst new file mode 100644 index 000000000..46c816fba --- /dev/null +++ b/docs/dev/analysis/features/settings.rst @@ -0,0 +1,196 @@ + +Settings part +============= + +In WordprocessingML, document-level settings are defined in the +`settings.xml` part. There are 98 distinct settings, all of which are +optional (according to the spec at least). + +The API does not provide for direct access to the settings part. A |Settings| +proxy object is available on the :attr:`.Document.settings` property and +provides access to the document-level settings. The |Document| object obtains +access via its document part. |DocumentPart| brokers all access to the +settings part. + + +Candidate Protocol +------------------ + +.. highlight:: python + +:: + + >>> document = Document() + >>> document.settings + + + +Specimen XML +------------ + +.. highlight:: xml + +Default `settings.xml` part for a new document in Word 2016:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schema Excerpts +--------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/shapes.rst b/docs/dev/analysis/features/shapes/index.rst similarity index 87% rename from docs/dev/analysis/features/shapes.rst rename to docs/dev/analysis/features/shapes/index.rst index 6cb6c077e..19e42de0e 100644 --- a/docs/dev/analysis/features/shapes.rst +++ b/docs/dev/analysis/features/shapes/index.rst @@ -2,12 +2,8 @@ Shapes (in general) =================== - -Overview --------- - -A graphical object that appears in a Word document is known as a *shape*. -A shape can be *inline* or *floating*. An inline shape appears on a text +A graphical object that appears in a Word document is known as a `shape`. +A shape can be `inline` or `floating`. An inline shape appears on a text baseline as though it were a character glyph and affects the line height. A floating shape appears at an arbitrary location on the document and text may wrap around it. Several types of shape can be placed, including a picture, a @@ -18,6 +14,16 @@ that determines the placement of the graphic. The same graphical object can be placed inline or floating by changing its container. The graphic itself is unaffected. +In addition to this overview, there are the following more specialized +feature analyses: + +.. toctree:: + :titlesonly: + + shapes-inline + shapes-inline-size + picture + MS API ------ diff --git a/docs/dev/analysis/features/picture.rst b/docs/dev/analysis/features/shapes/picture.rst similarity index 98% rename from docs/dev/analysis/features/picture.rst rename to docs/dev/analysis/features/shapes/picture.rst index e98fed4bc..ca327512a 100644 --- a/docs/dev/analysis/features/picture.rst +++ b/docs/dev/analysis/features/shapes/picture.rst @@ -2,10 +2,6 @@ Picture ======= - -Overview --------- - Word allows a picture to be placed in a graphical object container, either an inline shape or a floating shape. @@ -16,7 +12,7 @@ Candidate protocol :: >>> run = paragraph.add_run() - >>> inline_shape = run.add_inline_picture(file_like_image, MIME_type=None) + >>> inline_shape = run.add_picture(file_like_image, MIME_type=None) >>> inline_shape.width = width >>> inline_shape.height = height diff --git a/docs/dev/analysis/features/shapes-inline-size.rst b/docs/dev/analysis/features/shapes/shapes-inline-size.rst similarity index 99% rename from docs/dev/analysis/features/shapes-inline-size.rst rename to docs/dev/analysis/features/shapes/shapes-inline-size.rst index dd69c03c8..6e21e14ad 100644 --- a/docs/dev/analysis/features/shapes-inline-size.rst +++ b/docs/dev/analysis/features/shapes/shapes-inline-size.rst @@ -2,10 +2,6 @@ Inline shape size ================= - -Overview --------- - The position of an inline shape is completely determined by the text it is inline with, however its dimensions can be specified. For some shape types, both the contained shape and the shape container specify a width and height. diff --git a/docs/dev/analysis/features/shapes-inline.rst b/docs/dev/analysis/features/shapes/shapes-inline.rst similarity index 99% rename from docs/dev/analysis/features/shapes-inline.rst rename to docs/dev/analysis/features/shapes/shapes-inline.rst index c6e952dfb..cb6a0bbfb 100644 --- a/docs/dev/analysis/features/shapes-inline.rst +++ b/docs/dev/analysis/features/shapes/shapes-inline.rst @@ -2,10 +2,6 @@ Inline shape ============ - -Overview --------- - Word allows a graphical object to be placed into a document as an inline object. An inline shape appears as a ```` element as a child of a ```` element. diff --git a/docs/dev/analysis/features/styles/character-style.rst b/docs/dev/analysis/features/styles/character-style.rst new file mode 100644 index 000000000..1779872fa --- /dev/null +++ b/docs/dev/analysis/features/styles/character-style.rst @@ -0,0 +1,161 @@ + +Character Style +=============== + +Word allows a set of run-level properties to be given a name. The set of +properties is called a *character style*. All the settings may be applied to +a run in a single action by setting the style of the run. + + +Protocol +-------- + +There are two call protocols related to character style: getting and setting +the character style of a run, and specifying a style when creating a run. + +Get run style:: + + >>> run = p.add_run() + + >>> run.style + + >>> run.style.name + 'Default Paragraph Font' + +Set run style using character style name:: + + >>> run.style = 'Emphasis' + >>> run.style.name + 'Emphasis' + +Set run style using character style object:: + + >>> run.style = document.styles['Strong'] + >>> run.style.name + 'Strong' + +Assigning |None| to :attr:`.Run.style` causes any applied character style to +be removed. A run without a character style inherits the default character +style of the document:: + + >>> run.style = None + >>> run.style.name + 'Default Paragraph Font' + +Specifying the style of a run on creation:: + + >>> run = p.add_run(style='Strong') + >>> run.style.name + 'Strong' + + +Specimen XML +------------ + +.. highlight:: xml + +A baseline regular run:: + + + + This is a regular paragraph. + + + +Adding `Emphasis` character style:: + + + + + + + This paragraph appears in Emphasis character style. + + + +A style that appears in the Word user interface (UI) with one or more spaces +in its name, such as "Subtle Emphasis", will generally have a style ID with +those spaces removed. In this example, "Subtle Emphasis" becomes +"SubtleEmphasis":: + + + + + + + a few words in Subtle Emphasis style + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/styles/index.rst b/docs/dev/analysis/features/styles/index.rst new file mode 100644 index 000000000..ddcec1c1b --- /dev/null +++ b/docs/dev/analysis/features/styles/index.rst @@ -0,0 +1,330 @@ + +Styles +====== + +.. toctree:: + :titlesonly: + + styles + style + paragraph-style + character-style + latent-styles + +Word supports the definition of `styles` to allow a group of formatting +properties to be easily and consistently applied to a paragraph, run, table, +or numbering scheme, all at once. The mechanism is similar to how Cascading +Style Sheets (CSS) works with HTML. + +Styles are defined in the ``styles.xml`` package part and are keyed to +a paragraph, run, or table using the `styleId` string. + +Style visual behavior +--------------------- + +* **Sort order.** Built-in styles appear in order of the effective value of + their `uiPriority` attribute. By default, a custom style will not receive + a `uiPriority` attribute, causing its effective value to default to 0. This + will generlly place custom styles at the top of the sort order. A set of + styles having the same `uiPriority` value will be sub-sorted in + alphabetical order. + + If a `uiPriority` attribute is defined for a custom style, that style is + interleaved with the built-in styles, according to their `uiPriority` + value. The `uiPriority` attribute takes a signed integer, and accepts + negative numbers. Note that Word does not allow the use of negative + integers via its UI; rather it allows the `uiPriority` number of built-in + types to be increased to produce the desired sorting behavior. + +* **Identification.** A style is identified by its name, not its styleId + attribute. The styleId is used only for internal linking of an object like + a paragraph to a style. The styleId may be changed by the application, and + in fact is routinely changed by Word on each save to be a transformation of + the name. + + *Hypothesis.* Word calculates the `styleId` by removing all spaces from the + style name. + +* **List membership.** There are four style list options in the styles panel: + + + *Recommended.* The recommended list contains all latent and defined + styles that have `semiHidden` == |False|. + + + *Styles in Use.* The styles-in-use list contains all styles that have + been applied to content in the document (implying they are defined) that + also have `semiHidden` == |False|. + + + *In Current Document.* The in-current-document list contains all defined + styles in the document having `semiHidden` == |False|. + + + *All Styles.* The all-styles list contains all latent and defined + styles in the document. + +* **Definition of built-in style.** When a built-in style is added to + a document (upon first use), the value of each of the `locked`, + `uiPriority` and `qFormat` attributes from its latent style definition (the + `latentStyles` attributes overridden by those of any `lsdException` + element) is used to override the corresponding value in the inserted style + definition from their built-in defaults. + +* Each built-in style has default attributes that can be revealed by setting + the `latentStyles/@count` attribute to 0 and inspecting the style in the + style manager. This may include default behavioral properties. + +* Anomaly. Style "No Spacing" does not appear in the recommended list even + though its behavioral attributes indicate it should. (Google indicates it + may be a legacy style from Word 2003). + +* Word has 267 built-in styles, listed here: + http://www.thedoctools.com/downloads/DocTools_List_Of_Built-in_Style_English_Danish_German_French.pdf + + Note that at least one other sources has the number at 276 rather than 267. + +* **Appearance in the Style Gallery.** A style appears in the style gallery + when: `semiHidden` == |False| and `qFormat` == |True| + + +Glossary +-------- + +built-in style + One of a set of standard styles known to Word, such as "Heading 1". + Built-in styles are presented in Word's style panel whether or not they + are actually defined in the styles part. + +latent style + A built-in style having no definition in a particular document is known + as a *latent style* in that document. + +style definition + A ```` element in the styles part that explicitly defines the + attributes of a style. + +recommended style list + A list of styles that appears in the styles toolbox or panel when + "Recommended" is selected from the "List:" dropdown box. + + +Word behavior +------------- + +If no style having an assigned style id is defined in the styles part, the +style application has no effect. + +Word does not add a formatting definition (```` element) for a +built-in style until it is used. + +Once present in the styles part, Word does not remove a built-in style +definition if it is no longer applied to any content. The definition of each +of the styles ever used in a document are accumulated in its ``styles.xml``. + + +Related MS API *(partial)* +-------------------------- + +* Document.Styles +* Styles.Add, .Item, .Count, access by name, e.g. Styles("Foobar") +* Style.BaseStyle +* Style.Builtin +* Style.Delete() +* Style.Description +* Style.Font +* Style.Linked +* Style.LinkStyle +* Style.LinkToListTemplate() +* Style.ListLevelNumber +* Style.ListTemplate +* Style.Locked +* Style.NameLocal +* Style.NameParagraphStyle +* Style.NoSpaceBetweenParagraphsOfSameStyle +* Style.ParagraphFormat +* Style.Priority +* Style.QuickStyle +* Style.Shading +* Style.Table(Style) +* Style.Type +* Style.UnhideWhenUsed +* Style.Visibility + + +Enumerations +------------ + +* WdBuiltinStyle + + +Example XML +----------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schema excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/styles/latent-styles.rst b/docs/dev/analysis/features/styles/latent-styles.rst new file mode 100644 index 000000000..497b0b9f9 --- /dev/null +++ b/docs/dev/analysis/features/styles/latent-styles.rst @@ -0,0 +1,266 @@ + +Latent Styles +============= + +Latent style definitions are a "stub" style definition specifying behavioral +(UI display) attributes for built-in styles. + + +Latent style collection +----------------------- + +The latent style collection for a document is accessed using the +:attr:`~.Styles.latent_styles` property on |Styles|:: + + >>> latent_styles = document.styles.latent_styles + >>> latent_styles + + +**Iteration.** |LatentStyles| should support iteration of contained +|_LatentStyle| objects in document order. + +**Latent style access.** A latent style can be accessed by name using +dictionary-style notation. + +**len().** |LatentStyles| supports :meth:`len`, reporting the number of +|_LatentStyle| objects it contains. + + +|LatentStyles| properties +------------------------- + + +default_priority +~~~~~~~~~~~~~~~~ + +**XML semantics**. According to ISO 29500, the default value if the +`w:defUIPriority` attribute is omitted is 99. 99 is explictly set in the +default Word `styles.xml`, so will generally be what one finds. + +**Protocol**:: + + >>> # return None if attribute is omitted + >>> latent_styles.default_priority + None + >>> # but expect is will almost always be explicitly 99 + >>> latent_styles.default_priority + 99 + >>> latent_styles.default_priority = 42 + >>> latent_styles.default_priority + 42 + + +load_count +~~~~~~~~~~ + +**XML semantics**. No default is stated in the spec. Don't allow assignment +of |None|. + +**Protocol**:: + + >>> latent_styles.load_count + 276 + >>> latent_styles.load_count = 242 + >>> latent_styles.load_count + 242 + + +Boolean properties +~~~~~~~~~~~~~~~~~~ + +There are four boolean properties that all share the same protocol: + +* default_to_hidden +* default_to_locked +* default_to_quick_style +* default_to_unhide_when_used + +**XML semantics**. Defaults to |False| if the attribute is omitted. However, +the attribute should always be written explicitly on update. + +**Protocol**:: + + >>> latent_styles.default_to_hidden + False + >>> latent_styles.default_to_hidden = True + >>> latent_styles.default_to_hidden + True + + +Specimen XML +~~~~~~~~~~~~ + +.. highlight:: xml + +The `w:latentStyles` element used in the default Word 2011 template:: + + + + +|_LatentStyle| properties +------------------------- + +.. highlight:: python + +:: + + >>> latent_style = latent_styles.latent_styles[0] + + >>> latent_style.name + 'Normal' + + >>> latent_style.priority + None + >>> latent_style.priority = 10 + >>> latent_style.priority + 10 + + >>> latent_style.locked + None + >>> latent_style.locked = True + >>> latent_style.locked + True + + >>> latent_style.quick_style + None + >>> latent_style.quick_style = True + >>> latent_style.quick_style + True + + +Latent style behavior +--------------------- + +* A style has two categories of attribute, `behavioral` and `formatting`. + Behavioral attributes specify where and when the style should appear in the + user interface. Behavioral attributes can be specified for latent styles + using the ```` element and its ```` child + elements. The 5 behavioral attributes are: + + + locked + + uiPriority + + semiHidden + + unhideWhenUsed + + qFormat + +* **locked**. The `locked` attribute specifies that the style should not + appear in any list or the gallery and may not be applied to content. This + behavior is only active when restricted formatting is turned on. + + Locking is turned on via the menu: Developer Tab > Protect Document > + Formatting Restrictions (Windows only). + +* **uiPriority**. The `uiPriority` attribute acts as a sort key for + sequencing style names in the user interface. Both the lists in the styles + panel and the Style Gallery are sensitive to this setting. Its effective + value is 0 if not specified. + +* **semiHidden**. The `semiHidden` attribute causes the style to be excluded + from the recommended list. The notion of `semi` in this context is that + while the style is hidden from the recommended list, it still appears in + the "All Styles" list. This attribute is removed on first application of + the style if an `unhideWhenUsed` attribute set |True| is also present. + +* **unhideWhenUsed**. The `unhideWhenUsed` attribute causes any `semiHidden` + attribute to be removed when the style is first applied to content. Word + does `not` remove the `semiHidden` attribute just because there exists an + object in the document having that style. The `unhideWhenUsed` attribute is + not removed along with the `semiHidden` attribute when the style is + applied. + + The `semiHidden` and `unhideWhenUsed` attributes operate in combination to + produce *hide-until-used* behavior. + + *Hypothesis.* The persistance of the `unhideWhenUsed` attribute after + removing the `semiHidden` attribute on first application of the style is + necessary to produce appropriate behavior in style inheritance situations. + In that case, the `semiHidden` attribute may be explictly set to |False| to + override an inherited value. Or it could allow the `semiHidden` attribute + to be re-set to |True| later while preserving the hide-until-used behavior. + +* **qFormat**. The `qFormat` attribute specifies whether the style should + appear in the Style Gallery when it appears in the recommended list. + A style will never appear in the gallery unless it also appears in the + recommended list. + +* Latent style attributes are only operative for latent styles. Once a style + is defined, the attributes of the definition exclusively determine style + behavior; no attributes are inherited from its corresponding latent style + definition. + + +Specimen XML +------------ + +.. highlight:: xml + +:: + + + + + + + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/styles/paragraph-style.rst b/docs/dev/analysis/features/styles/paragraph-style.rst new file mode 100644 index 000000000..cc134b236 --- /dev/null +++ b/docs/dev/analysis/features/styles/paragraph-style.rst @@ -0,0 +1,142 @@ + +Paragraph Style +=============== + +A paragraph style provides character formatting (font) as well as paragraph +formatting properties. Character formatting is inherited from +|_CharacterStyle| and is predominantly embodied in the :attr:`font` property. +Likewise, most paragraph-specific properties come from the |ParagraphFormat| +object available on the :attr:`paragraph_format` property. + +A handful of other properties are specific to a paragraph style. + + +next_paragraph_style +-------------------- + +The `next_paragraph_style` property provides access to the style that will +automatically be assigned by Word to a new paragraph inserted after +a paragraph with this style. This property is most useful for a style that +would normally appear only once in a sequence, such as a heading. + +The default is to use the same style for an inserted paragraph. This +addresses the most common case; for example, a body paragraph having `Body +Text` style would normally be followed by a paragraph of the same style. + + +Expected usage +~~~~~~~~~~~~~~ + +The priority use case for this property is to provide a working style that +can be assigned to a paragraph. The property will always provide a valid +paragraph style, defaulting to the current style whenever a more specific one +cannot be determined. + +While this obscures some specifics of the situation from the API, it +addresses the expected most common use case. Developers needing to detect, +for example, missing styles can readily use the oxml layer to inspect the +XML and further features can be added if those use cases turn out to be more +common than expected. + + +Behavior +~~~~~~~~ + +**Default.** The default next paragraph style is the same paragraph style. + +The default is used whenever the next paragraph style is not specified or is +invalid, including these conditions: + +* No `w:next` child element is present +* A style having the styleId specified in `w:next/@w:val` is not present in + the document. +* The style specified in `w:next/@w:val` is not a paragraph style. + +In all these cases the current style (`self`) is returned. + + +Example XML +~~~~~~~~~~~ + +.. highlight:: xml + +paragraph_style.next_paragraph_style is styles['Bar']:: + + + + + + +**Semantics.** The `w:next` child element is optional. + +* When omitted, the next style is the same as the current style. +* If no style with a matching styleId exists, the `w:next` element is ignored + and the next style is the same as the current style. +* If a style is found but is of a style type other than paragraph, the + `w:next` element is ignored and the next style is the same as the current + style. + + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +.. highlight:: python + +:: + + >>> styles = document.styles + + >>> paragraph_style = styles['Foo'] + >>> paragraph_style.next_paragraph_style == paragraph_style + True + + >>> paragraph_style.next_paragraph_style = styles['Bar'] + >>> paragraph_style.next_paragraph_style == styles['Bar'] + True + + >>> paragraph_style.next_paragraph_style = None + >>> paragraph_style.next_paragraph_style == paragraph_style + True + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/styles/style.rst b/docs/dev/analysis/features/styles/style.rst new file mode 100644 index 000000000..a00ede05d --- /dev/null +++ b/docs/dev/analysis/features/styles/style.rst @@ -0,0 +1,501 @@ + +Style objects +============= + +A style is one of four types; character, paragraph, table, or numbering. All +style objects have behavioral properties and formatting properties. The set of +formatting properties varies depending on the style type. In general, +formatting properties are inherited along this hierarchy: character -> +paragraph -> table. A numbering style has no formatting properties and does +not inherit. + +Behavioral properties +--------------------- + +There are six behavior properties: + +hidden + Style operates to assign formatting properties, but does not appear in + the UI under any circumstances. Used for `internal` styles assigned by an + application that should not be under the control of an end-user. + +priority + Determines the sort order of the style in sequences presented by the UI. + +semi-hidden + The style is hidden from the so-called "main" user interface. In Word + this means the *recommended list* and the style gallery. The style still + appears in the *all styles* list. + +unhide_when_used + Flag to the application to set semi-hidden False when the style is next + used. + +quick_style + Show the style in the style gallery when it is not hidden. + +locked + Style is hidden and cannot be applied when document formatting protection + is active. + + +hidden +------ + +The `hidden` attribute doesn't work on built-in styles and its behavior on +custom styles is spotty. Skipping this attribute for now. Will reconsider if +someone requests it and can provide a specific use case. + +Behavior +~~~~~~~~ + +**Scope.** `hidden` doesn't work at all on 'Normal' or 'Heading 1' style. It +doesn't work on Salutation either. There is no `w:defHidden` attribute on +`w:latentStyles`, lending credence to the hypothesis it is not enabled for +built-in styles. *Hypothesis:* Doesn't work on built-in styles. + +**UI behavior.** A custom style having `w:hidden` set |True| is hidden from +the gallery and all styles pane lists. It does however appear in the "Current +style of selected text" box in the styles pane when the cursor is on +a paragraph of that style. The style can be modified by the user from this +current style UI element. The user can assign a new style to a paragraph +having a hidden style. + + +priority +-------- + +The `priority` attribute is the integer primary sort key determining the +position of a style in a UI list. The secondary sort is alphabetical by name. +Negative values are valid, although not assigned by Word itself and appear to +be treated as 0. + +Behavior +~~~~~~~~ + +**Default.** Word behavior appears to default priority to 0 for custom +styles. The spec indicates the effective default value is conceptually +infinity, such that the style appears at the end of the styles list, +presumably alphabetically among other styles having no priority assigned. + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +:: + + >>> style = document.styles['Foobar'] + >>> style.priority + None + >>> style.priority = 7 + >>> style.priority + 7 + >>> style.priority = -42 + >>> style.priority + 0 + + +semi-hidden +----------- + +The `w:semiHidden` element specifies visibility of the style in the so-called +`main` user interface. For Word, this means the style gallery and the +recommended, styles-in-use, and in-current-document lists. The all-styles +list and current-style dropdown in the styles pane would then be considered +part of an `advanced` user interface. + +Behavior +~~~~~~~~ + +**Default.** If the `w:semiHidden` element is omitted, its effective value is +|False|. There is no inheritance of this value. + +**Scope.** Works on both built-in and custom styles. + +**Word behavior.** Word does not use the `@w:val` attribute. It writes +`` for |True| and omits the element for |False|. + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +:: + + >>> style = document.styles['Foo'] + >>> style.hidden + False + >>> style.hidden = True + >>> style.hidden + True + +Example XML +~~~~~~~~~~~ + +.. highlight:: xml + +style.hidden = True:: + + + + + + +style.hidden = False:: + + + + + +Alternate constructions should also report the proper value but not be +used when writing XML:: + + + + + + + + + + + + +unhide-when-used +---------------- + +The `w:unhideWhenUsed` element signals an application that this style should +be made visibile the next time it is used. + +Behavior +~~~~~~~~ + +**Default.** If the `w:unhideWhenUsed` element is omitted, its effective +value is |False|. There is no inheritance of this value. + +**Word behavior.** The `w:unhideWhenUsed` element is not changed or removed +when the style is next used. Only the `w:semiHidden` element is affected, if +present. Presumably this is so a style can be re-hidden, to be unhidden on +the subsequent use. + +Note that this behavior in Word is only triggered by a user actually applying +a style. Merely loading a document having the style applied somewhere in its +contents does not cause the `w:semiHidden` element to be removed. + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +.. highlight:: python + +:: + + >>> style = document.styles['Foo'] + >>> style.unhide_when_used + False + >>> style.unhide_when_used = True + >>> style.unhide_when_used + True + +Example XML +~~~~~~~~~~~ + +.. highlight:: xml + +style.unhide_when_used = True:: + + + + + + + +style.unhide_when_used = False:: + + + + + +Alternate constructions should also report the proper value but not be +used when writing XML:: + + + + + + + + + + + + +quick-style +----------- + +The `w:qFormat` element specifies whether Word should display this style in +the style gallery. In order to appear in the gallery, this attribute must be +|True| and `hidden` must be |False|. + +Behavior +~~~~~~~~ + +**Default.** If the `w:qFormat` element is omitted, its effective value is +|False|. There is no inheritance of this value. + +**Word behavior.** If `w:qFormat` is |True| and the style is not hidden, it +will appear in the gallery in the order specified by `w:uiPriority`. + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +.. highlight:: python + +:: + + >>> style = document.styles['Foo'] + >>> style.quick_style + False + >>> style.quick_style = True + >>> style.quick_style + True + +Example XML +~~~~~~~~~~~ + +.. highlight:: xml + +style.quick_style = True:: + + + + + + +style.quick_style = False:: + + + + + +Alternate constructions should also report the proper value but not be +used when writing XML:: + + + + + + + + + + + + +locked +------ + +The `w:locked` element specifies whether Word should prevent this style from +being applied to content. This behavior is only active if formatting +protection is turned on. + +Behavior +~~~~~~~~ + +**Default.** If the `w:locked` element is omitted, its effective value is +|False|. There is no inheritance of this value. + +Candidate protocol +~~~~~~~~~~~~~~~~~~ + +.. highlight:: python + +:: + + >>> style = document.styles['Foo'] + >>> style.locked + False + >>> style.locked = True + >>> style.locked + True + +Example XML +~~~~~~~~~~~ + +.. highlight:: xml + +style.locked = True:: + + + + + + +style.locked = False:: + + + + + +Alternate constructions should also report the proper value but not be +used when writing XML:: + + + + + + + + + + + + +Candidate protocols +------------------- + +.. highlight:: python + +Identification:: + + >>> style = document.styles['Body Text'] + >>> style.name + 'Body Text' + >>> style.style_id + 'BodyText' + >>> style.type + WD_STYLE_TYPE.PARAGRAPH (1) + +`delete()`:: + + >>> len(styles) + 6 + >>> style.delete() + >>> len(styles) + 5 + >>> styles['Citation'] + KeyError: no style with id or name 'Citation' + +Style.base_style:: + + >>> style = styles.add_style('Citation', WD_STYLE_TYPE.PARAGRAPH) + >>> style.base_style + None + >>> style.base_style = styles['Normal'] + >>> style.base_style + + >>> style.base_style.name + 'Normal' + + +Example XML +----------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schema excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/styles/styles.rst b/docs/dev/analysis/features/styles/styles.rst new file mode 100644 index 000000000..96bdd3243 --- /dev/null +++ b/docs/dev/analysis/features/styles/styles.rst @@ -0,0 +1,222 @@ + +Styles collection +================= + + +Candidate protocols +------------------- + +Access:: + + >>> styles = document.styles # default styles part added if not present + >>> styles + + +Iteration and length:: + + >>> len(styles) + 10 + >>> list_styles = [s for s in styles if s.type == WD_STYLE_TYPE.LIST] + >>> len(list_styles) + 3 + +Access style by name (or style id):: + + >>> styles['Normal'] + + + >>> styles['undefined-style'] + KeyError: no style with id or name 'undefined-style' + +:meth:`.Styles.add_style()`:: + + >>> style = styles.add_style('Citation', WD_STYLE_TYPE.PARAGRAPH) + >>> style.name + 'Citation' + >>> style.type + PARAGRAPH (1) + >>> style.builtin + False + + +Feature Notes +------------- + +* could add a default builtin style from known specs on first access via + WD_BUILTIN_STYLE enumeration:: + + >>> style = document.styles['Heading1'] + KeyError: no style with id or name 'Heading1' + >>> style = document.styles[WD_STYLE.HEADING_1] + >>> assert style == document.styles['Heading1'] + + +Example XML +----------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schema excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/table/cell-merge.rst b/docs/dev/analysis/features/table/cell-merge.rst new file mode 100644 index 000000000..31451cd5b --- /dev/null +++ b/docs/dev/analysis/features/table/cell-merge.rst @@ -0,0 +1,572 @@ + +Table - Merge Cells +=================== + +Word allows contiguous table cells to be merged, such that two or more cells +appear to be a single cell. Cells can be merged horizontally (spanning +multple columns) or vertically (spanning multiple rows). Cells can also be +merged both horizontally and vertically at the same time, producing a cell +that spans both rows and columns. Only rectangular ranges of cells can be +merged. + + +Table diagrams +-------------- + +Diagrams like the one below are used to depict tables in this analysis. +Horizontal spans are depicted as a continuous horizontal cell without +vertical dividers within the span. Vertical spans are depicted as a vertical +sequence of cells of the same width where continuation cells are separated by +a dashed top border and contain a caret ('^') to symbolize the continuation +of the cell above. Cell 'addresses' are depicted at the column and row grid +lines. This is conceptually convenient as it reuses the notion of list +indices (and slices) and makes certain operations more intuitive to specify. +The merged cell `A` below has top, left, bottom, and right values of 0, 0, 2, +and 2 respectively:: + + \ 0 1 2 3 + 0 +---+---+---+ + | A | | + 1 + - - - +---+ + | ^ | | + 2 +---+---+---+ + | | | | + 3 +---+---+---+ + + +Basic cell access protocol +-------------------------- + +There are three ways to access a table cell: + +* ``Table.cell(row_idx, col_idx)`` +* ``Row.cells[col_idx]`` +* ``Column.cells[col_idx]`` + + +Accessing the middle cell of a 3 x 3 table:: + + >>> table = document.add_table(3, 3) + >>> middle_cell = table.cell(1, 1) + >>> table.rows[1].cells[1] == middle_cell + True + >>> table.columns[1].cells[1] == middle_cell + True + + +Basic merge protocol +-------------------- + +A merge is specified using two diagonal cells:: + + >>> table = document.add_table(3, 3) + >>> a = table.cell(0, 0) + >>> b = table.cell(1, 1) + >>> A = a.merge(b) + +:: + + \ 0 1 2 3 + 0 +---+---+---+ +---+---+---+ + | a | | | | A | | + 1 +---+---+---+ + - - - +---+ + | | b | | --> | ^ | | + 2 +---+---+---+ +---+---+---+ + | | | | | | | | + 3 +---+---+---+ +---+---+---+ + + +Accessing a merged cell +----------------------- + +A cell is accessed by its "layout grid" position regardless of any spans that +may be present. A grid address that falls in a span returns the top-leftmost +cell in that span. This means a span has as many addresses as layout grid +cells it spans. For example, the merged cell `A` above can be addressed as +(0, 0), (0, 1), (1, 0), or (1, 1). This addressing scheme leads to desirable +access behaviors when spans are present in the table. + +The length of Row.cells is always equal to the number of grid columns, +regardless of any spans that are present. Likewise, the length of +Column.cells is always equal to the number of table rows, regardless of any +spans. + +:: + + >>> table = document.add_table(2, 3) + >>> row = table.rows[0] + >>> len(row.cells) + 3 + >>> row.cells[0] == row.cells[1] + False + + >>> a, b = row.cells[:2] + >>> a.merge(b) + + >>> len(row.cells) + 3 + >>> row.cells[0] == row.cells[1] + True + +:: + + \ 0 1 2 3 + 0 +---+---+---+ +---+---+---+ + | a | b | | | A | | + 1 +---+---+---+ --> +---+---+---+ + | | | | | | | | + 2 +---+---+---+ +---+---+---+ + + +Cell content behavior on merge +------------------------------ + +When two or more cells are merged, any existing content is concatenated and +placed in the resulting merged cell. Content from each original cell is +separated from that in the prior original cell by a paragraph mark. An +original cell having no content is skipped in the contatenation process. In +Python, the procedure would look roughly like this:: + + merged_cell_text = '\n'.join( + cell.text for cell in original_cells if cell.text + ) + +Merging four cells with content ``'a'``, ``'b'``, ``''``, and ``'d'`` +respectively results in a merged cell having text ``'a\nb\nd'``. + + +Cell size behavior on merge +--------------------------- + +Cell width and height, if present, are added when cells are merged:: + + >>> a, b = row.cells[:2] + >>> a.width.inches, b.width.inches + (1.0, 1.0) + >>> A = a.merge(b) + >>> A.width.inches + 2.0 + + +Removing a redundant row or column +---------------------------------- + +**Collapsing a column.** When all cells in a grid column share the same +``w:gridSpan`` specification, the spanned columns can be collapsed into +a single column by removing the ``w:gridSpan`` attributes. + + +Word behavior +------------- + +* Row and Column access in the MS API just plain breaks when the table is not + uniform. `Table.Rows(n)` and `Cell.Row` raise `EnvironmentError` when + a table contains a vertical span, and `Table.Columns(n)` and `Cell.Column` + unconditionally raise `EnvironmentError` when the table contains + a horizontal span. We can do better. + +* `Table.Cell(n, m)` works on any non-uniform table, although it uses + a *visual grid* that greatly complicates access. It raises an error for `n` + or `m` out of visual range, and provides no way other than try/except to + determine what that visual range is, since `Row.Count` and `Column.Count` + are unavailable. + +* In a merge operation, the text of the continuation cells is appended to + that of the origin cell as separate paragraph(s). + +* If a merge range contains previously merged cells, the range must + completely enclose the merged cells. + +* Word resizes a table (adds rows) when a cell is referenced by an + out-of-bounds row index. If the column identifier is out of bounds, an + exception is raised. This behavior will not be implemented in |docx|. + + +Glossary +-------- + +layout grid + The regular two-dimensional matrix of rows and columns that determines + the layout of cells in the table. The grid is primarily defined by the + `w:gridCol` elements that define the layout columns for the table. Each + row essentially duplicates that layout for an additional row, although + its height can differ from other rows. Every actual cell in the table + must begin and end on a layout grid "line", whether the cell is merged or + not. + +span + The single "combined" cell occupying the area of a set of merged cells. + +skipped cell + The WordprocessingML (WML) spec allows for 'skipped' cells, where + a layout cell location contains no actual cell. I can't find a way to + make a table like this using the Word UI and haven't experimented yet to + see whether Word will load one constructed by hand in the XML. + +uniform table + A table in which each cell corresponds exactly to a layout cell. + A uniform table contains no spans or skipped cells. + +non-uniform table + A table that contains one or more spans, such that not every cell + corresponds to a single layout cell. I suppose it would apply when there + was one or more skipped cells too, but in this analysis the term is only + used to indicate a table with one or more spans. + +uniform cell + A cell not part of a span, occupying a single cell in the layout grid. + +origin cell + The top-leftmost cell in a span. Contrast with *continuation cell*. + +continuation cell + A layout cell that has been subsumed into a span. A continuation cell is + mostly an abstract concept, although a actual `w:tc` element will always + exist in the XML for each continuation cell in a vertical span. + + +Understanding merge XML intuitively +----------------------------------- + +A key insight is that merged cells always look like the diagram below. +Horizontal spans are accomplished with a single `w:tc` element in each row, +using the `gridSpan` attribute to span additional grid columns. Vertical +spans are accomplished with an identical cell in each continuation row, +having the same `gridSpan` value, and having vMerge set to `continue` (the +default). These vertical continuation cells are depicted in the diagrams +below with a dashed top border and a caret ('^') in the left-most grid column +to symbolize the continuation of the cell above.:: + + \ 0 1 2 3 + 0 +---+---+---+ + | A | | + 1 + - - - +---+ + | ^ | | + 2 +---+---+---+ + | | | | + 3 +---+---+---+ + +.. highlight:: xml + +The table depicted above corresponds to this XML (minimized for clarity):: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +XML Semantics +------------- + +In a horizontal merge, the ```` attribute indicates the +number of columns the cell should span. Only the leftmost cell is preserved; +the remaining cells in the merge are deleted. + +For merging vertically, the ``w:vMerge`` table cell property of the uppermost +cell of the column is set to the value "restart" of type ``w:ST_Merge``. The +following, lower cells included in the vertical merge must have the +``w:vMerge`` element present in their cell property (``w:TcPr``) element. Its +value should be set to "continue", although it is not necessary to +explicitely define it, as it is the default value. A vertical merge ends as +soon as a cell ``w:TcPr`` element lacks the ``w:vMerge`` element. Similarly +to the ``w:gridSpan`` element, the ``w:vMerge`` elements are only required +when the table's layout is not uniform across its different columns. In the +case it is, only the topmost cell is kept; the other lower cells in the +merged area are deleted along with their ``w:vMerge`` elements and the +``w:trHeight`` table row property is used to specify the combined height of +the merged cells. + + +len() implementation for Row.cells and Column.cells +--------------------------------------------------- + +Each ``Row`` and ``Column`` object provides access to the collection of cells +it contains. The length of these cell collections is unaffected by the +presence of merged cells. + +`len()` always bases its count on the layout grid, as though there were no +merged cells. + +* ``len(Table.columns)`` is the number of `w:gridCol` elements, representing + the number of grid columns, without regard to the presence of merged cells + in the table. + +* ``len(Table.rows)`` is the number of `w:tr` elements, regardless of any + merged cells that may be present in the table. + +* ``len(Row.cells)`` is the number of grid columns, regardless of whether any + cells in the row are merged. + +* ``len(Column.cells)`` is the number of rows in the table, regardless of + whether any cells in the column are merged. + + +Merging a cell already containing a span +---------------------------------------- + +One or both of the "diagonal corner" cells in a merge operation may itself be +a merged cell, as long as the specified region is rectangular. + +For example:: + + \ 0 1 2 3 + +---+---+---+---+ +---+---+---+---+ + 0 | a | b | | | a\nb\nC | | + + - - - +---+---+ + - - - - - +---+ + 1 | ^ | C | | | ^ | | + +---+---+---+---+ --> +---+---+---+---+ + 2 | | | | | | | | | | + +---+---+---+---+ +---+---+---+---+ + 3 | | | | | | | | | | + +---+---+---+---+ +---+---+---+---+ + + cell(0, 0).merge(cell(1, 2)) + +or:: + + 0 1 2 3 4 + +---+---+---+---+---+ +---+---+---+---+---+ + 0 | a | b | c | | | abcD | | + + - - - +---+---+---+ + - - - - - - - +---+ + 1 | ^ | D | | | ^ | | + +---+---+---+---+---+ --> +---+---+---+---+---+ + 2 | | | | | | | | | | | | + +---+ - - - +---+---+ +---+---+---+---+---+ + 3 | | | | | | | | | | | | + +---+---+---+---+---+ +---+---+---+---+---+ + + cell(0, 0).merge(cell(1, 2)) + + +Conversely, either of these two merge operations would be illegal:: + + \ 0 1 2 3 4 0 1 2 3 4 + 0 +---+---+---+---+ 0 +---+---+---+---+ + | | | b | | | | | | | + 1 +---+---+ - +---+ 1 +---+---+---+---+ + | | a | ^ | | | | a | | | + 2 +---+---+ - +---+ 2 +---+---+---+---+ + | | | ^ | | | b | | + 3 +---+---+---+---+ 3 +---+---+---+---+ + | | | | | | | | | | + 4 +---+---+---+---+ 4 +---+---+---+---+ + + a.merge(b) + + +General algorithm +~~~~~~~~~~~~~~~~~ + +* find top-left and target width, height +* for each tr in target height, tc.grow_right(target_width) + + +Specimen XML +------------ + +.. highlight:: xml + +A 3 x 3 table where an area defined by the 2 x 2 topleft cells has been +merged, demonstrating the combined use of the ``w:gridSpan`` as well as the +``w:vMerge`` elements, as produced by Word:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Open Issues +----------- + +* Does Word allow "skipped" cells at the beginning of a row (`w:gridBefore` + element)? These are described in the spec, but I don't see a way in the + Word UI to create such a table. + + +Ressources +---------- + +* `Cell.Merge Method on MSDN`_ + +.. _`Cell.Merge Method on MSDN`: + http://msdn.microsoft.com/en-us/library/office/ff821310%28v=office.15%29.aspx + +Relevant sections in the ISO Spec +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* 17.4.17 gridSpan (Grid Columns Spanned by Current Table Cell) +* 17.4.84 vMerge (Vertically Merged Cell) +* 17.18.57 ST_Merge (Merged Cell Type) diff --git a/docs/dev/analysis/features/table.rst b/docs/dev/analysis/features/table/index.rst similarity index 96% rename from docs/dev/analysis/features/table.rst rename to docs/dev/analysis/features/table/index.rst index c934ddbb9..e1212b7be 100644 --- a/docs/dev/analysis/features/table.rst +++ b/docs/dev/analysis/features/table/index.rst @@ -8,6 +8,17 @@ correspond directly to the visual columns. All table content is contained in its cells. +In addition to this overview, there are the following more specialized +feature analyses: + +.. toctree:: + :titlesonly: + + table-props + table-row + table-cell + cell-merge + Specimen XML ------------ @@ -199,19 +210,3 @@ Schema Definitions - - -Resources ---------- - -* `Tables.Add Method on MSDN`_ - -.. _Tables.Add Method on MSDN: - http://msdn.microsoft.com/en-us/library/office/microsoft.office.interop.wo - rd.tables.add(v=office.14).aspx - -* `Table Members on MSDN`_ - -.. _`Table Members on MSDN`: - http://msdn.microsoft.com/en-us/library/office/microsoft.office.interop.wo - od.table_members(v=office.14).aspx diff --git a/docs/dev/analysis/features/table-cell.rst b/docs/dev/analysis/features/table/table-cell.rst similarity index 82% rename from docs/dev/analysis/features/table-cell.rst rename to docs/dev/analysis/features/table/table-cell.rst index 40be36b32..e7d177719 100644 --- a/docs/dev/analysis/features/table-cell.rst +++ b/docs/dev/analysis/features/table/table-cell.rst @@ -7,6 +7,22 @@ properties affecting its size, appearance, and how the content it contains is formatted. +Candidate protocol +------------------ + +Cell.vertical_alignment:: + + >>> from docx.enum.table import WD_CELL_ALIGN_VERTICAL + >>> cell = table.add_row().cells[0] + >>> cell + + >>> cell.vertical_alignment + None + >>> cell.vertical_alignment = WD_CELL_ALIGN_VERTICAL.CENTER + >>> print(cell.vertical_alignment) + CENTER (1) + + MS API - Partial Summary ------------------------ @@ -29,6 +45,25 @@ MS API - Partial Summary * WordWrap +WD_ALIGN_VERTICAL Enumeration +--------------------------------- + +wdAlignVerticalBoth (101) + This is an option in the OpenXml spec, but not in Word itself. It's not + clear what Word behavior this setting produces. If you find out please let + us know and we'll update the documentation. Otherwise, probably best to + avoid this option. + +wdAlignVerticalBottom (3) + Text is aligned to the bottom border of the cell. + +wdAlignVerticalCenter (1) + Text is aligned to the center of the cell. + +wdAlignVerticalTop (0) + Text is aligned to the top border of the cell. + + Specimen XML ------------ @@ -39,6 +74,7 @@ Specimen XML + @@ -127,22 +163,18 @@ Schema Definitions - - - + + + + + - - - - - - - - + + @@ -160,6 +192,25 @@ Schema Definitions + + + + + + + + + + + + + + + + + + + .. _`WdRowHeightRule`: http://msdn.microsoft.com/en-us/library/office/ff193620(v=office.15).aspx diff --git a/docs/dev/analysis/features/table-props.rst b/docs/dev/analysis/features/table/table-props.rst similarity index 87% rename from docs/dev/analysis/features/table-props.rst rename to docs/dev/analysis/features/table/table-props.rst index b2f8fbeba..73e97449e 100644 --- a/docs/dev/analysis/features/table-props.rst +++ b/docs/dev/analysis/features/table/table-props.rst @@ -3,10 +3,27 @@ Table Properties ================ +Alignment +--------- + +Word allows a table to be aligned between the page margins either left, +right, or center. + +The read/write :attr:`Table.alignment` property specifies the alignment for +a table:: + + >>> table = document.add_table(rows=2, cols=2) + >>> table.alignment + None + >>> table.alignment = WD_TABLE_ALIGNMENT.RIGHT + >>> table.alignment + RIGHT (2) + + Autofit ------- -Word has two algorithms for laying out a table, *fixed-width* or *autofit*. +Word has two algorithms for laying out a table, *fixed-width* or `autofit`. The default is autofit. Word will adjust column widths in an autofit table based on cell contents. A fixed-width table retains its column widths regardless of the contents. Either algorithm will adjust column widths @@ -28,12 +45,13 @@ Specimen XML .. highlight:: xml -The following XML is generated by Word when inserting a 2x2 table:: +The following XML represents a 2x2 table:: + @@ -151,6 +169,22 @@ Schema Definitions + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/table/table-row.rst b/docs/dev/analysis/features/table/table-row.rst new file mode 100644 index 000000000..9593e6db3 --- /dev/null +++ b/docs/dev/analysis/features/table/table-row.rst @@ -0,0 +1,133 @@ + +Table Row +========= + +A table row has certain properties such as height. + + +Row.height +---------- + +Candidate protocol:: + + >>> from docx.enum.table import WD_ROW_HEIGHT + >>> row = table.add_row() + >>> row + + >>> row.height_rule + None + >>> row.height_rule = WD_ROW_HEIGHT.EXACTLY + >>> row.height + None + >>> row.height = Pt(24) + + +MS API +------ + +https://msdn.microsoft.com/en-us/library/office/ff193915.aspx + +Methods +~~~~~~~ + +* Delete() +* SetHeight() +* SetLeftIndent() + +Properties +~~~~~~~~~~ + +* Alignment +* AllowBreakAcrossPages +* Borders +* Cells +* HeadingFormat +* Height +* HeightRule +* Index +* IsFirst +* IsLast +* LeftIndent +* NestingLevel +* Next +* Previous +* Shading +* SpaceBetweenColumns + + +WD_ROW_HEIGHT_RULE Enumeration +------------------------------ + +Alias: WD_ROW_HEIGHT + +* wdRowHeightAtLeast (1) The row height is at least a minimum specified value. +* wdRowHeightAuto (0) The row height is adjusted to accommodate the tallest + value in the row. +* wdRowHeightExactly (2) The row height is an exact value. + + +Schema Definitions +------------------ + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/breaks.rst b/docs/dev/analysis/features/text/breaks.rst similarity index 100% rename from docs/dev/analysis/features/breaks.rst rename to docs/dev/analysis/features/text/breaks.rst diff --git a/docs/dev/analysis/features/text/font-color.rst b/docs/dev/analysis/features/text/font-color.rst new file mode 100644 index 000000000..443bc3af0 --- /dev/null +++ b/docs/dev/analysis/features/text/font-color.rst @@ -0,0 +1,288 @@ + +Font Color +========== + +Color, as a topic, extends beyond the |Font| object; font color is just the +first place it's come up. Accordingly, it bears a little deeper thought than +usual since we'll want to reuse the same objects and protocol to specify +color in the other contexts; it makes sense to craft a general solution that +will bear the expected reuse. + +There are three historical sources to draw from for this API. + +1. The `w:rPr/w:color` element. This is used by default when applying color + directly to text or when setting the text color of a style. This + corresponds to the `Font.Color` property (undocumented, unfortunately). + This element supports RGB colors, theme colors, and a tint or shade of + a theme color. + +2. The `w:rPr/w14:textFill` element. This is used by Word for fancy text like + gradient and shadow effects. This corresponds to the `Font.Fill` property. + +3. The PowerPoint font color UI. This seems like a reasonable compromise + between the prior two, allowing direct-ish access to common color options + while holding the door open for the `Font.fill` operations to be added + later if required. + +Candidate Protocol +~~~~~~~~~~~~~~~~~~ + +:class:`docx.text.run.Run` has a font property:: + + >>> from docx import Document + >>> from docx.text.run import Font, Run + >>> run = Document().add_paragraph().add_run() + >>> isinstance(run, Run) + True + >>> font = run.font + >>> isinstance(font, Font) + True + +:class:`docx.text.run.Font` has a read-only color property, returning +a :class:`docx.dml.color.ColorFormat` object:: + + >>> from docx.dml.color import ColorFormat + >>> color = font.color + >>> isinstance(font.color, ColorFormat) + True + >>> font.color = 'anything' + AttributeError: can't set attribute + + +:class:`docx.dml.color.ColorFormat` has a read-only :attr:`type` property and +read/write :attr:`rgb`, :attr:`theme_color`, and :attr:`brightness` +properties. + +:attr:`ColorFormat.type` returns one of `MSO_COLOR_TYPE.RGB`, +`MSO_COLOR_TYPE.THEME`, `MSO_COLOR_TYPE.AUTO`, or |None|, the latter +indicating font has no directly-applied color:: + + >>> font.color.type + None + +:attr:`ColorFormat.rgb` returns an |RGBColor| object when `type` is +`MSO_COLOR_TYPE.RGB`. It may also report an RGBColor value when `type` is +`MSO_COLOR_TYPE.THEME`, since an RGB color may also be present in that case. +According to the spec, the RGB color value is ignored when a theme color is +specified, but Word writes the current RGB value of the theme color along +with the theme color name (e.g. 'accent1') when assigning a theme color; +perhaps as a convenient value for a file browser to use. The value of `.type` +must be consulted to determine whether the RGB value is operative or +a "best-guess":: + + >>> font.color.type + RGB (1) + >>> font.color.rgb + RGBColor(0x3f, 0x2c, 0x36) + +Assigning an |RGBColor| value to :attr:`ColorFormat.rgb` causes +:attr:`ColorFormat.type` to become `MSO_COLOR_TYPE.RGB`:: + + >>> font.color.type + None + >>> font.color.rgb = RGBColor(0x3f, 0x2c, 0x36) + >>> font.color.type + RGB (1) + >>> font.color.rgb + RGBColor(0x3f, 0x2c, 0x36) + +:attr:`ColorFormat.theme_color` returns a member of :ref:`MsoThemeColorIndex` +when `type` is `MSO_COLOR_TYPE.THEME`:: + + >>> font.color.type + THEME (2) + >>> font.color.theme_color + ACCENT_1 (5) + +Assigning a member of :ref:`MsoThemeColorIndex` to +:attr:`ColorFormat.theme_color` causes :attr:`ColorFormat.type` to become +`MSO_COLOR_TYPE.THEME`:: + + >>> font.color.type + RGB (1) + >>> font.color.theme_color = MSO_THEME_COLOR.ACCENT_2 + >>> font.color.type + THEME (2) + >>> font.color.theme_color + ACCENT_2 (6) + +The :attr:`ColorFormat.brightness` attribute can be used to select a tint or +shade of a theme color. Assigning the value 0.1 produces a color 10% brighter +(a tint); assigning -0.1 produces a color 10% darker (a shade):: + + >>> font.color.type + None + >>> font.color.brightness + 0.0 + >>> font.color.brightness = 0.4 + ValueError: not a theme color + + >>> font.color.theme_color = MSO_THEME_COLOR.TEXT_1 + >>> font.color.brightness = 0.4 + >>> font.color.brightness + 0.4 + + +Specimen XML +------------ + +.. highlight:: xml + +Baseline paragraph with no font color:: + + + + Text with no color. + + + +Paragraph with directly-applied RGB color:: + + + + + + + + + + + + Directly-applied color Blue. + + + +Run with directly-applied theme color:: + + + + + + Theme color Accent 1. + + +Run with 40% tint of Text 2 theme color:: + + + + + + Theme color with 40% tint. + + +Run with 25% shade of Accent 2 theme color:: + + + + + + Theme color with 25% shade. + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/font-highlight-color.rst b/docs/dev/analysis/features/text/font-highlight-color.rst new file mode 100644 index 000000000..81dc1acc5 --- /dev/null +++ b/docs/dev/analysis/features/text/font-highlight-color.rst @@ -0,0 +1,177 @@ + +Font highlight color +==================== + +Text in a Word document can be "highlighted" with a number of colors, +providing text background color. The visual effect is similar to that +produced using a highlighter (often fluorescent yellow) on a printed page. + + +Protocol +-------- + +Text is highlighted by assigning a member of `WD_COLOR_INDEX` to +`Font.highlight_color`. + + >>> font = paragraph.add_run().font + >>> font.highlight_color + None + >>> font.highlight_color = WD_COLOR_INDEX.YELLOW + >>> font.highlight_color + YELLOW (7) + >>> font.highlight_color = WD_COLOR_INDEX.TURQUOISE + >>> font.highlight_color + TURQUOISE (3) + >>> font.highlight_color = None + >>> font.highlight_color + None + + +Enumerations +------------ + +* `WdColorIndex Enumeration on MSDN`_ + +.. _WdColorIndex Enumeration on MSDN: https://msdn.microsoft.com/EN-US/library/office/ff195343.aspx + + +XML Semantics +------------- + +Mapping of `WD_COLOR_INDEX` members to `ST_Highlight` values:: + + AUTO = 'default' + BLACK = 'black' + BLUE = 'blue' + BRIGHTGREEN = 'green' + DARKBLUE = 'darkBlue' + DARKRED = 'darkRed' + DARKYELLOW = 'darkYellow' + GRAY25 = 'lightGray' + GRAY50 = 'darkGray' + GREEN = 'darkGreen' + PINK = 'magenta' + RED = 'red' + TEAL = 'darkCyan' + TURQUOISE = 'cyan' + VOILET = 'darkMagenta' + WHITE = 'white' + YELLOW = 'yellow' + + +Specimen XML +------------ + +.. highlight:: xml + +Baseline run:: + + + Black text on white background + + +Blue text, Bright Green Highlight:: + + + + + + Blue text on bright green background + + +Red text, Green Highlight:: + + + + + + Red text on green background + + + +Schema excerpt +-------------- + +.. highlight:: xml + +According to the schema, run properties may appear in any order and may +appear multiple times each. Not sure what the semantics of that would be or +why one would want to do it, but something to note. Word seems to place them +in the order below when it writes the file.:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/bool-run-props.rst b/docs/dev/analysis/features/text/font.rst similarity index 65% rename from docs/dev/analysis/features/bool-run-props.rst rename to docs/dev/analysis/features/text/font.rst index d811f2e49..626065006 100644 --- a/docs/dev/analysis/features/bool-run-props.rst +++ b/docs/dev/analysis/features/text/font.rst @@ -1,6 +1,61 @@ -Boolean Run properties -====================== +Font +==== + +Word supports a rich variety of character formatting. Character formatting +can be applied at various levels in the *style hierarchy*. At the lowest +level, it can be applied directly to a run of text content. Above that, it +can be applied to character, paragraph and table styles. It can also be +applied to an abstract numbering definition. At the highest levels it can be +applied via a theme or document defaults. + + +Typeface name +------------- + +Word allows multiple typefaces to be specified for character content in +a single run. This allows different Unicode character ranges such as ASCII +and Arabic to be used in a single run, each being rendered in the typeface +specified for that range. + +Up to eight distinct typefaces may be specified for a font. Four are used to +specify a typeface for a distinct code point range. These are: + +* `w:ascii` - used for the first 128 Unicode code points +* `w:cs` - used for complex script code points +* `w:eastAsia` - used for East Asian code points +* `w:hAnsi` - standing for *high ANSI*, but effectively the catch-all for any + code points not specified by one of the other three. + +The other four, `w:asciiTheme`, `w:csTheme`, `w:eastAsiaTheme`, and +`w:hAnsiTheme` are used to indirectly specify a theme-defined font. This +allows the typeface to be set centrally in the document. These four attributes +have lower precedence than the first four, so for example the value of +`w:asciiTheme` is ignored if a `w:ascii` attribute is also present. + +The typeface name used for a run is specified in the `w:rPr/w:rFonts` +element. There are 8 attributes that in combination specify the typeface to +be used. + +Protocol +~~~~~~~~ + +Initially, only the base typeface name is supported by the API, using the +:attr:`~.Font.name` property. Its value is the that of the `w:rFonts/@w:ascii` +attribute or |None| if not present. Assignment to this property sets both the +`w:ascii` and the `w:hAnsi` attribute to the assigned string or removes them +both if |None| is assigned:: + + >>> font = document.styles['Normal'].font + >>> font.name + None + >>> font.name = 'Arial' + >>> font.name + 'Arial' + + +Boolean run properties +---------------------- Character formatting that is either on or off, such as bold, italic, and small caps. Certain of these properties are *toggle properties* that may @@ -83,19 +138,68 @@ The semantics of the three values are as follows: +-------+---------------------------------------------------------------+ | value | meaning | +=======+===============================================================+ -| True | The effective value of the property is unconditionally *on*. | +| True | The effective value of the property is unconditionally `on`. | | | Contrary settings in the style hierarchy have no effect. | +-------+---------------------------------------------------------------+ -| False | The effective value of the property is unconditionally *off*. | +| False | The effective value of the property is unconditionally `off`. | | | Contrary settings in the style hierarchy have no effect. | +-------+---------------------------------------------------------------+ | None | The element is not present. The effective value is | | | inherited from the style hierarchy. If no value for this | | | property is present in the style hierarchy, the effective | -| | value is *off*. | +| | value is `off`. | +-------+---------------------------------------------------------------+ +Toggle properties +----------------- + +Certain of the boolean run properties are *toggle properties*. A toggle +property is one that behaves like a `toggle` at certain places in the style +hierarchy. Toggle here means that setting the property on has the effect of +reversing the prior setting rather than unconditionally setting the property +on. + +This behavior allows these properties to be overridden (turned off) in +inheriting styles. For example, consider a character style `emphasized` that +sets bold on. Another style, `strong` inherits from `emphasized`, but should +display in italic rather than bold. Setting bold off has no effect because it +is overridden by the bold in `strong` (I think). Because bold is a toggle +property, setting bold on in `emphasized` causes its value to be toggled, to +False, achieving the desired effect. See §17.7.3 for more details on toggle +properties. + +The following run properties are toggle properties: + ++----------------+------------+-------------------------------------------+ +| element | spec | name | ++================+============+===========================================+ +| `` | §17.3.2.1 | Bold | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.2 | Complex Script Bold | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.5 | Display All Characters as Capital Letters | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.13 | Embossing | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.16 | Italics | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.17 | Complex Script Italics | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.18 | Imprinting | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.23 | Display Character Outline | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.31 | Shadow | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.33 | Small Caps | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.37 | Single Strikethrough | ++----------------+------------+-------------------------------------------+ +| `` | §17.3.2.41 | Hidden Text | ++----------------+------------+-------------------------------------------+ + + Specimen XML ------------ @@ -103,7 +207,7 @@ Specimen XML :: - + @@ -113,8 +217,7 @@ Specimen XML - bold, italic, small caps, strike, size, and underline, applied in - reverse order but not to paragraph mark + bold, italic, small caps, strike, 14 pt, and underline @@ -128,16 +231,6 @@ times each. Not sure what the semantics of that would be or why one would want to do it, but something to note. Word seems to place them in the order below when it writes the file.:: - - - - - - - - - - @@ -185,10 +278,61 @@ below when it writes the file.:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -200,64 +344,60 @@ below when it writes the file.:: + + + + + -Toggle properties ------------------ - -Certain of the boolean run properties are *toggle properties*. A toggle -property is one that behaves like a *toggle* at certain places in the style -hierarchy. Toggle here means that setting the property on has the effect of -reversing the prior setting rather than unconditionally setting the property -on. - -This behavior allows these properties to be overridden (turned off) in -inheriting styles. For example, consider a character style `emphasized` that -sets bold on. Another style, `strong` inherits from `emphasized`, but should -display in italic rather than bold. Setting bold off has no effect because it -is overridden by the bold in `strong` (I think). Because bold is a toggle -property, setting bold on in `emphasized` causes its value to be toggled, to -False, achieving the desired effect. See §17.7.3 for more details on toggle -properties. - -The following run properties are toggle properties: - -+----------------+------------+-------------------------------------------+ -| element | spec | name | -+================+============+===========================================+ -| `` | §17.3.2.1 | Bold | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.2 | Complex Script Bold | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.5 | Display All Characters as Capital Letters | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.13 | Embossing | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.16 | Italics | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.17 | Complex Script Italics | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.18 | Imprinting | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.23 | Display Character Outline | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.31 | Shadow | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.33 | Small Caps | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.37 | Single Strikethrough | -+----------------+------------+-------------------------------------------+ -| `` | §17.3.2.41 | Hidden Text | -+----------------+------------+-------------------------------------------+ - + + + -Resources ---------- + + + + + + + + + + + + -* `WdBreakType Enumeration on MSDN`_ -* `Range.InsertBreak Method (Word) on MSDN`_ + + + + + + + + + + + + + + + + + + + + + + -.. _WdBreakType Enumeration on MSDN: - http://msdn.microsoft.com/en-us/library/office/ff195905.aspx + + + -.. _Range.InsertBreak Method (Word) on MSDN: - http://msdn.microsoft.com/en-us/library/office/ff835132.aspx + + + + + + + diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..cfd451fe1 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,383 @@ + +Hyperlink +========= + +Word allows a hyperlink to be placed in a document wherever a paragraph can appear. The +actual hyperlink element is a peer of |Run|. + +The link may be to an external resource such as a web site, or internal, to another +location in the document. The link may also be a `mailto:` URI or a reference to a file +on an accessible local or network filesystem. + +The visible text of a hyperlink is held in one or more runs. Technically a hyperlink can +have zero runs, but this occurs only in contrived cases (otherwise there would be +nothing to click on). As usual, each run can have its own distinct text formatting +(font), so for example one word in the hyperlink can be bold, etc. By default, Word +applies the built-in `Hyperlink` character style to a newly inserted hyperlink. Like +other text, the hyperlink text may often be broken into multiple runs as a result of +edits in different "revision-save" editing sessions (between "Save" commands). + +Note that rendered page-breaks can occur in the middle of a hyperlink. + +A |Hyperlink| is a child of |Paragraph|, a peer of |Run|. + + +TODO: What about URL-encoding/decoding (like %20) behaviors, if any? + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal hyperlink has +only an anchor. An anchor is more precisely known as a *URI fragment* in a web URL and +follows a hash mark ("#"). The fragment-separator hash character is not stored in the +XML. + +Note that the anchor and address are stored in two distinct attributes, so you need to +concatenate `.address` and `.anchor` like `f"{address}#{anchor}"` if you want the whole +thing. + +Also note that Word does not rigorously separate a fragment in a web URI so it may +appear as part of the address or separately in the anchor attribute, depending on how +the hyperlink was authored. Hyperlinks inserted using the dialog-box seem to separate it +and addresses typed into the document directly don't, based on my limited experience. + +.. highlight:: python + +**Access hyperlinks in a paragraph**:: + + >>> hyperlinks = paragraph.hyperlinks + [] + +**Access hyperlinks in a paragraph in document order with runs**:: + + >>> list(paragraph.iter_inner_content()) + [ + + + + ] + +**Access hyperlink address**:: + + >>> hyperlink.address + 'https://google.com/' + +**Access hyperlink fragment**:: + + >>> hyperlink.fragment + 'introduction' + +**Access hyperlink history (visited or not, True means not visited yet)**:: + + >>> hyperlink.history + True + +**Access hyperlinks runs**:: + + >>> hyperlink.runs + [ + + + + ] + +**Access hyperlink URL**:: + + >>> hyperlink.url + 'https://us.com#introduction' + +**Determine whether a hyperlink contains a rendered page-break**:: + + >>> hyperlink.contains_page_break + False + +**Access visible text of a hyperlink**:: + + >>> hyperlink.text + 'an excellent Wikipedia article on ferrets' + +**Add an external hyperlink** (not yet implemented):: + + >>> hyperlink = paragraph.add_hyperlink( + ... 'About', address='http://us.com', fragment='about' + ... ) + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.fragment + 'about' + >>> hyperlink.url + 'http://us.com#about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', fragment='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.fragment + 'Section_1' + >>> hyperlink.address + '' + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other stuff, at least +as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst new file mode 100644 index 000000000..b1e2fa7f8 --- /dev/null +++ b/docs/dev/analysis/features/text/index.rst @@ -0,0 +1,16 @@ + +Text +==== + +.. toctree:: + :titlesonly: + + hyperlink + tab-stops + font-highlight-color + paragraph-format + font + font-color + underline + run-content + breaks diff --git a/docs/dev/analysis/features/text/paragraph-format.rst b/docs/dev/analysis/features/text/paragraph-format.rst new file mode 100644 index 000000000..6e5398a13 --- /dev/null +++ b/docs/dev/analysis/features/text/paragraph-format.rst @@ -0,0 +1,473 @@ + +Paragraph formatting +==================== + +WordprocessingML supports a variety of paragraph formatting attributes to +control layout characteristics such as justification, indentation, line +spacing, space before and after, and widow/orphan control. + + +Alignment (justification) +------------------------- + +In Word, each paragraph has an `alignment` attribute that specifies how to +justify the lines of the paragraph when the paragraph is laid out on the +page. Common values are left, right, centered, and justified. + +Protocol +~~~~~~~~ + +Getting and setting paragraph alignment:: + + >>> paragraph = body.add_paragraph() + >>> paragraph.alignment + None + >>> paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT + >>> paragraph.alignment + RIGHT (2) + >>> paragraph.alignment = None + >>> paragraph.alignment + None + +XML Semantics +~~~~~~~~~~~~~ + +If the ```` element is not present on a paragraph, the alignment value +for that paragraph is inherited from its style hierarchy. If the element is +present, its value overrides any inherited value. From the API, a value of +|None| on the ``Paragraph.alignment`` property corresponds to no ```` +element being present. If |None| is assigned to ``Paragraph.alignment``, the +```` element is removed. + + +Paragraph spacing +----------------- + +Spacing between subsequent paragraphs is controlled by the paragraph spacing +attributes. Spacing can be applied either before the paragraph, after it, or +both. The concept is similar to that of `padding` or `margin` in CSS. +WordprocessingML supports paragraph spacing specified as either a length +value or as a multiple of the line height; however only a length value is +supported via the Word UI. Inter-paragraph spacing "overlaps", such that the +rendered spacing between two paragraphs is the maximum of the space after the +first paragraph and the space before the second. + +Protocol +~~~~~~~~ + +Getting and setting paragraph spacing:: + + >>> paragraph_format = document.styles['Normal'].paragraph_format + >>> paragraph_format.space_before + None + >>> paragraph_format.space_before = Pt(12) + >>> paragraph_format.space_before.pt + 12.0 + +XML Semantics +~~~~~~~~~~~~~ + +* Paragraph spacing is specified using the `w:pPr/w:spacing` element, which + also controls line spacing. Spacing is specified in twips. +* If the `w:spacing` element is not present, paragraph spacing is inherited + from the style hierarchy. +* If not present in the style hierarchy, the paragraph will have no spacing. +* If the `w:spacing` element is present but the specific attribute (e.g. + `w:before`) is not, its value is inherited. + +Specimen XML +~~~~~~~~~~~~ + +.. highlight:: xml + +12 pt space before, 0 after:: + + + + + + +Line spacing +------------ + +Line spacing can be specified either as a specific length or as a multiple of +the line height (font size). Line spacing is specified by the combination of +values in `w:spacing/@w:line` and `w:spacing/@w:lineRule`. The +:attr:`.ParagraphFormat.line_spacing` property determines which method to use +based on whether the assigned value is an instance of |Length|. + +Protocol +~~~~~~~~ + +.. highlight:: python + +Getting and setting line spacing:: + + >>> paragraph_format.line_spacing, paragraph_format.line_spacing_rule + (None, None) + + >>> paragraph_format.line_spacing = Pt(18) + >>> paragraph_format.line_spacing, paragraph_format.line_spacing_rule + (228600, WD_LINE_SPACING.EXACTLY (4)) + + >>> paragraph_format.line_spacing = 1 + >>> paragraph_format.line_spacing, paragraph_format.line_spacing_rule + (152400, WD_LINE_SPACING.SINGLE (0)) + + >>> paragraph_format.line_spacing = 0.9 + >>> paragraph_format.line_spacing, paragraph_format.line_spacing_rule + (137160, WD_LINE_SPACING.MULTIPLE (5)) + +XML Semantics +~~~~~~~~~~~~~ + +* Line spacing is specified by the combination of the values in + `w:spacing/@w:line` and `w:spacing/@w:lineRule`. +* `w:spacing/@w:line` is specified in twips. If `@w:lineRule` is 'auto' (or + missing), `@w:line` is interpreted as 240ths of a line. For all other + values of `@w:lineRule`, the value of `@w:line` is interpreted as + a specific length in twips. +* If the `w:spacing` element is not present, line spacing is inherited. +* If `@w:line` is not present, line spacing is inherited. +* If not present, `@w:lineRule` defaults to 'auto'. +* If not present in the style hierarchy, line spacing defaults to single + spaced. +* The 'atLeast' value for `@w:lineRule` indicates the line spacing will be + `@w:line` twips or single spaced, whichever is greater. + +Specimen XML +~~~~~~~~~~~~ + +.. highlight:: xml + +14 points:: + + + + + +double-spaced:: + + + + + + +Indentation +----------- + +Paragraph indentation is specified using the `w:pPr/w:ind` element. Left, +right, first line, and hanging indent can be specified. Indentation can be +specified as a length or in hundredths of a character width. Only length is +supported by |docx|. Both first line indent and hanging indent are specified +using the :attr:`.ParagraphFormat.first_line_indent` property. Assigning +a positive value produces an indented first line. A negative value produces +a hanging indent. + +Protocol +~~~~~~~~ + +.. highlight:: python + +Getting and setting indentation:: + + >>> paragraph_format.left_indent + None + >>> paragraph_format.right_indent + None + >>> paragraph_format.first_line_indent + None + + >>> paragraph_format.left_indent = Pt(36) + >>> paragraph_format.left_indent.pt + 36.0 + + >>> paragraph_format.right_indent = Inches(0.25) + >>> paragraph_format.right_indent.pt + 18.0 + + >>> paragraph_format.first_line_indent = Pt(-18) + >>> paragraph_format.first_line_indent.pt + -18.0 + +XML Semantics +~~~~~~~~~~~~~ + +* Indentation is specified by `w:ind/@w:start`, `w:ind/@w:end`, + `w:ind/@w:firstLine`, and `w:ind/@w:hanging`. + +* `w:firstLine` and `w:hanging` are mutually exclusive, if both are + specified, `w:firstLine` is ignored. + +* All four attributes are specified in twips. + +* `w:start` controls left indent for a left-to-right paragraph or right + indent for a right-to-left paragraph. `w:end` controls the other side. If + mirrorIndents is specified, `w:start` controls the inside margin and + `w:end` the outside. Negative values are permitted and cause the text to + move past the text margin. + +* If `w:ind` is not present, indentation is inherited. + +* Any omitted attributes are inherited. + +* If not present in the style hierarchy, indentation values default to zero. + +Specimen XML +~~~~~~~~~~~~ + +.. highlight:: xml + +1 inch left, 0.5 inch (additional) first line, 0.5 inch right:: + + + + + +0.5 inch left, 0.5 inch hanging indent:: + + + + + + +Page placement +-------------- + +There are a handful of page placement properties that control such things as +keeping the lines of a paragraph together on the same page, keeing +a paragraph (such as a heading) on the same page as the subsequent paragraph, +and placing the paragraph at the top of a new page. Each of these are +tri-state boolean properties where |None| indicates "inherit". + +Protocol +~~~~~~~~ + +.. highlight:: python + +Getting and setting indentation:: + + >>> paragraph_format.keep_with_next + None + >>> paragraph_format.keep_together + None + >>> paragraph_format.page_break_before + None + >>> paragraph_format.widow_control + None + + >>> paragraph_format.keep_with_next = True + >>> paragraph_format.keep_with_next + True + + >>> paragraph_format.keep_together = False + >>> paragraph_format.keep_together + False + + >>> paragraph_format.page_break_before = True + >>> paragraph_format.widow_control = None + + +XML Semantics +~~~~~~~~~~~~~ + +* All four elements have "On/Off" semantics. + +* If not present, their value is inherited. + +* If not present in the style hierarchy, values default to False. + +Specimen XML +~~~~~~~~~~~~ + +.. highlight:: xml + +keep with next, keep together, no page break before, and widow/orphan +control:: + + + + + + + + + +Enumerations +------------ + +* :ref:`WdLineSpacing` +* :ref:`WdParagraphAlignment` + + +Specimen XML +------------ + +.. highlight:: xml + +A paragraph with inherited alignment:: + + + + Inherited paragraph alignment. + + + +A right-aligned paragraph:: + + + + + + + Right-aligned paragraph. + + + + + +Schema excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/run-content.rst b/docs/dev/analysis/features/text/run-content.rst similarity index 100% rename from docs/dev/analysis/features/run-content.rst rename to docs/dev/analysis/features/text/run-content.rst diff --git a/docs/dev/analysis/features/text/tab-stops.rst b/docs/dev/analysis/features/text/tab-stops.rst new file mode 100644 index 000000000..c29496e85 --- /dev/null +++ b/docs/dev/analysis/features/text/tab-stops.rst @@ -0,0 +1,256 @@ + +Tab Stops +========= + +WordprocessingML allows for custom specification of tab stops at the +paragraph level. Tab stop spacing is a subset of paragraph formatting in +this system, so will be implemented within the +docx.text.parfmt.ParagraphFormatting object. Tab stops will be handled as +a List-like TabStops object made up of TabStop objects. + +A TabStop object has three properties, alignment, leader, and position. +Alignment is a WD_TAB_ALIGNMENT member and position is a Length() object. + +Tab stops are always sorted in position order. Alignment defaults to +WD_TAB_ALIGNMENT.LEFT, and leader defaults to WD_TAB_LEADER.SPACES. + +Tab stops specify how tab characters in a paragraph are rendered. Insertion +of tab characters is accomplished using the Run object. + + +Protocol +-------- + +.. highlight:: python + +Getting and setting tab stops:: + + >>> tab_stops = paragraph.paragraph_format.tab_stops + >>> tab_stops + + + >>> tab_stop = tab_stops.add_tab_stop(Inches(2), WD_TAB_ALIGNMENT.LEFT, WD_TAB_LEADER.DOTS) + + # add_tab_stop defaults to WD_TAB_ALIGNMENT.LEFT, WD_TAB_LEADER.SPACES + + >>> tab_stop = tab_stops.add_tab_stop(Inches(0.5)) + >>> tab_stop.alignment + WD_TAB_ALIGNMENT.LEFT + >>> tab_stop.leader + WD_TAB_LEADER.SPACES + + # TabStop properties are read/write + + >>> tab_stop.position = Inches(2.5) + >>> tab_stop.alignment = WD_TAB_ALIGNMENT.CENTER + >>> tab_stop.leader = WD_TAB_LEADER.DASHES + + # Tab stops are sorted into position order as created or modified + + >>> [(t.position, t.alignment) for t in tab_stops] + [(914400, WD_TAB_ALIGNMENT.LEFT), (2286000, WD_TAB_ALIGNMENT.CENTER)] + + # A tab stop is deleted using del statement + + >>> len(tab_stops) + 2 + >>> del tab_stops[1] + >>> len(tab_stops) + 1 + + # Restore default tabs + + >>> tab_stops.clear() + + +Word Behavior +------------- + +When the w:tabs element is empty or not present, Word uses default tab stops +(typically every half inch). + +Word resumes using default tab stops following the last specified tab stop. + +TabStops must be in position order within the XML. If they are not, the out- +of-order tab stop will appear in the ruler and in the properties dialog, but +will not actually be used by Word. + + +XML Semantics +------------- + +* Both "num" and "list" alignment are a legacy from early versions of Word + before hanging indents were available. Both are deprecated. + +* "start" alignment is equivalent to "left", and "end" alignment are equivalent + to "right". (Confirmed with manually edited XML.) + +* A "clear" tab stop is not shown in Word's tab bar and default tab behavior + is followed in the document. That is, Word ignores that tab stop + specification completely, acting as if it were not there at all. This + allows a tab stop inherited from a style, for example, to be ignored. + +* The w:pos attribute uses twips rather than EMU. + +* The w:tabs element must be removed when empty. If present, it must contain + at least one w:tab element. + + +Specimen XML +------------ + +.. highlight:: xml + +:: + + + + + + + + + +Enumerations +------------ + +* `WdTabAlignment Enumeration on MSDN`_ + +.. _WdTabAlignment Enumeration on MSDN: + https://msdn.microsoft.com/EN-US/library/office/ff195609.aspx + +================= ======== ===== +Name XML Value +================= ======== ===== +wdAlignTabBar bar 4 +wdAlignTabCenter center 1 +wdAlignTabDecimal decimal 3 +wdAlignTabLeft left 0 +wdAlignTabList list 6 +wdAlignTabRight right 2 +================= ======== ===== + +Additional Enumeration values not appearing in WdTabAlignment + +=============== ======== ===== +Name XML Value +=============== ======== ===== +wdAlignTabClear clear 101 +wdAlignTabEnd end 102 +wdAlignTabNum num 103 +wdAlignTabStart start 104 +=============== ======== ===== + + +* `WdTabLeader Enumeration on MSDN`_ + +.. _WdTabLeader Enumeration on MSDN: + https://msdn.microsoft.com/en-us/library/office/ff845050.aspx + +==================== ========== ===== +Name XML Value +==================== ========== ===== +wdTabLeaderDashes hyphen 2 +wdTabLeaderDots dot 1 +wdTabLeaderHeavy heavy 4 +wdTabLeaderLines underscore 3 +wdTabLeaderMiddleDot middleDot 5 +wdTabLeaderSpaces none 0 +==================== ========== ===== + + +MS API Protocol +--------------- + +The MS API defines a `TabStops object`_ which is a collection of +`TabStop objects`_. + +.. _TabStops object: + https://msdn.microsoft.com/EN-US/library/office/ff192806.aspx + +.. _TabStop objects: + https://msdn.microsoft.com/EN-US/library/office/ff195736.aspx + + +Schema excerpt +-------------- + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/underline.rst b/docs/dev/analysis/features/text/underline.rst similarity index 99% rename from docs/dev/analysis/features/underline.rst rename to docs/dev/analysis/features/text/underline.rst index 6cad0c25e..4ed1b7652 100644 --- a/docs/dev/analysis/features/underline.rst +++ b/docs/dev/analysis/features/text/underline.rst @@ -1,6 +1,6 @@ -Run underline -============= +Underline +========= Text in a Word document can be underlined in a variety of styles. diff --git a/docs/dev/analysis/index.rst b/docs/dev/analysis/index.rst index 7e4d7589e..25bf5fb4e 100644 --- a/docs/dev/analysis/index.rst +++ b/docs/dev/analysis/index.rst @@ -8,23 +8,18 @@ Feature Analysis ---------------- .. toctree:: - :maxdepth: 1 - - features/table - features/table-props - features/table-cell - features/par-alignment - features/run-content + :titlesonly: + + features/comments + features/header + features/settings + features/text/index + features/table/index + features/styles/index + features/shapes/index + features/coreprops features/numbering - features/underline - features/char-style - features/breaks features/sections - features/shapes - features/shapes-inline - features/shapes-inline-size - features/picture - features/bool-run-props Schema Analysis @@ -39,5 +34,3 @@ ISO/IEC 29500 spec. schema/ct_document schema/ct_body schema/ct_p - schema/ct_ppr - schema/ct_styles diff --git a/docs/dev/analysis/schema/ct_ppr.rst b/docs/dev/analysis/schema/ct_ppr.rst deleted file mode 100644 index a872dcca9..000000000 --- a/docs/dev/analysis/schema/ct_ppr.rst +++ /dev/null @@ -1,189 +0,0 @@ -########## -``CT_PPr`` -########## - -.. highlight:: xml - -.. csv-table:: - :header-rows: 0 - :stub-columns: 1 - :widths: 15, 50 - - Schema Name , CT_PPr - Spec Name , Paragraph Properties - Tag(s) , w:pPr - Namespace , wordprocessingml (wml.xsd) - Spec Section , 17.3.1.26 - - -Analysis -======== - - - -attributes -^^^^^^^^^^ - -None. - - -child elements -^^^^^^^^^^^^^^ - -========= === ================ -name # type -========= === ================ -xyz ? CT_abc -abc ? CT_TextListStyle -p ? CT_TextParagraph -========= === ================ - - -Spec text -^^^^^^^^^ - - This element specifies a set of paragraph properties which shall be applied - to the contents of the parent paragraph after all style/numbering/table - properties have been applied to the text. These properties are defined as - direct formatting, since they are directly applied to the paragraph and - supersede any formatting from styles. - - Consider a paragraph which should have a set of paragraph formatting - properties. This set of properties is specified in the paragraph properties - as follows:: - - - - - - - - - - - - The pPr element specifies the properties which are applied to the current - paragraph - in this case, a bottom paragraph border using the bottom - element (§17.3.1.7), spacing after the paragraph using the spacing element - (§17.3.1.33), and that spacing should be ignored for paragraphs above/below - of the same style using the contextualSpacing element (§17.3.1.9). - - -Schema excerpt -^^^^^^^^^^^^^^ - -:: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/dev/analysis/schema/ct_styles.rst b/docs/dev/analysis/schema/ct_styles.rst deleted file mode 100644 index 1977acc8a..000000000 --- a/docs/dev/analysis/schema/ct_styles.rst +++ /dev/null @@ -1,120 +0,0 @@ - -``CT_Styles`` -============= - -.. highlight:: xml - -.. csv-table:: - :header-rows: 0 - :stub-columns: 1 - :widths: 15, 50 - - Schema Name, CT_Styles - Spec Name, Styles - Tag(s), w:styles - Namespace, wordprocessingml (wml.xsd) - Spec Section, 17.7.4.18 - - -Analysis --------- - -Only styles with an explicit ```` definition affect the formatting -of paragraphs that are assigned that style. - -Word includes behavior definitions (```` elements) for the -"latent" styles that are built in to the Word client. These are present in a -new document created from install defaults. - -Word does not add a formatting definition (```` element) for a -built-in style until it is used. - -Once present in ``styles.xml``, Word does not remove a style element when it -is no longer used by any paragraphs. The definition of each of the styles -ever used in a document are accumulated in ``styles.xml``. - - -Spec text ---------- - - This element specifies all of the style information stored in the - WordprocessingML document: style definitions as well as latent style - information. - - Example: The Normal paragraph style in a word processing document can have - any number of formatting properties, e.g. font face = Times New Roman; font - size = 12pt; paragraph justification = left. All paragraphs which reference - this paragraph style would automatically inherit these properties. - - -Schema excerpt --------------- - -:: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/index.rst b/docs/index.rst index a79fa9644..aee0acfbf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -31,27 +31,33 @@ Here's an example of what |docx| can do: p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) - document.add_paragraph('Intense quote', style='IntenseQuote') + document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph( - 'first item in unordered list', style='ListBullet' + 'first item in unordered list', style='List Bullet' ) document.add_paragraph( - 'first item in ordered list', style='ListNumber' + 'first item in ordered list', style='List Number' ) document.add_picture('monty-truth.png', width=Inches(1.25)) + records = ( + (3, '101', 'Spam'), + (7, '422', 'Eggs'), + (4, '631', 'Spam, spam, eggs, and spam') + ) + table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' - for item in recordset: + for qty, id, desc in records: row_cells = table.add_row().cells - row_cells[0].text = str(item.qty) - row_cells[1].text = str(item.id) - row_cells[2].text = item.desc + row_cells[0].text = str(qty) + row_cells[1].text = id + row_cells[2].text = desc document.add_page_break() @@ -68,11 +74,15 @@ User Guide user/install user/quickstart user/documents + user/tables + user/text user/sections + user/hdrftr user/api-concepts - user/styles + user/styles-understanding + user/styles-using + user/comments user/shapes - user/text API Documentation @@ -82,10 +92,14 @@ API Documentation :maxdepth: 2 api/document - api/table + api/settings + api/style api/text + api/table api/section + api/comments api/shape + api/dml api/shared api/enum/index diff --git a/docs/user/comments.rst b/docs/user/comments.rst new file mode 100644 index 000000000..869d6f5f1 --- /dev/null +++ b/docs/user/comments.rst @@ -0,0 +1,168 @@ +.. _comments: + +Working with Comments +===================== + +Word allows *comments* to be added to a document. This is an aspect of the *reviewing* +feature-set and is typically used by a second party to provide feedback to the author +without changing the document itself. + +The procedure is simple: + +- You select some range of text with the mouse or Shift+Arrow keys +- You press the *New Comment* button (Review toolbar) +- You type or paste in your comment + +.. image:: /_static/img/comment-parts.png + +A comment can only be added to the main document. A comment cannot be added in a header, +a footer, or within a comment. A comment _can_ be added to a footnote or endnote, but +those are not yet supported by *python-docx*. + +**Comment Anatomy.** Each comment has two parts, the *comment-reference* and the +*comment-content*: + +The **comment-refererence**, sometimes *comment-anchor*, is the text in the main +document you selected before pressing the *New Comment* button. It is a so-called +*range* in the main document that starts at the first selected character and ends after +the last one. + +The **comment-content**, sometimes just *comment*, is whatever content you typed or +pasted in. The content for each comment is stored in a separate comment object, and +these comment objects are stored in a separate *comments-part* (part-name +``word/comments.xml``), not in the main document. Each comment is assigned a unique id +when it is created, allowing the comment reference to be associated with its content and +vice versa. + +**Comment Reference.** The comment-reference is a *range*. A range must both start and +end at an even *run* boundary. Intuitively, a range corresponds to a *selection* of text +in the Word UI, one formed by dragging with the mouse or using the *Shift-Arrow* keys. + +In the XML, this range is delimited by a start marker `` and an +end marker ``, both of which contain the *id* of the comment they +delimit. The start marker appears before the run starting with the first character of +the range and the end marker appears immediately after the run ending with the last +character of the range. Adding a comment that references an arbitrary range of text in +an existing document may require splitting runs on the desired character boundaries. + +In general a range can span paragraphs, such that the range begins in one paragraph and +ends in a later paragraph. However, a range must enclose *contiguous* runs, such that a +range that contains only two vertically adjacent cells in a multi-column table is not +possible (even though Word allows such a selection with the mouse). + +**Comment Content.** Interestingly, although commonly used to contain a single line of +plain text, the comment-content can contain essentially any content that can appear in +the document body. This includes rich text with emphasis, runs with a different typeface +and size, both paragraph and character styles, hyperlinks, images, and tables. Note that +tables do not appear in the comment as displayed in the *comment-sidebar* although they +do apper in the *reviewing-pane*. + +**Comment Metadata.** Each comment can be assigned *author*, *initals*, and *date* +metadata. In Word, these fields are assigned automatically based on values in ``Settings +> User`` of the installed Word application. These might be configured automatically in +an enterprise installation, based on the user account, but by default they are empty. + +*author* metadata is required, although silently assigned the empty string by Word if +the user name is not configured. *initials* is optional, but always set by Word, to the +empty string if not configured. *date* is also optional, but always set by Word to the +UTC date and time the comment was added, with seconds resolution (no milliseconds or +microseconds). + +**Additional Features.** Later versions of Word allow a comment to be *resolved*. A +comment in this state will appear grayed-out in the Word UI. Later versions of Word also +allow a comment to be *replied to*, forming a *comment thread*. Neither of these +features is supported by the initial implementation of comments in *python-docx*. + +**Applicability.** Note that comments cannot be added to a header or footer and cannot +be nested inside a comment itself. In general the *python-docx* API will not allow these +operations but if you outsmart it then the resulting comment will either be silently +removed or trigger a repair error when the document is loaded by Word. + + +Adding a Comment +---------------- + +A simple example is adding a comment to a paragraph:: + + >>> from docx import Document + >>> document = Document() + >>> paragraph = document.add_paragraph("Hello, world!") + + >>> comment = document.add_comment( + ... runs=paragraph.runs, + ... text="I have this to say about that" + ... author="Steve Canny", + ... initials="SC", + ... ) + >>> comment + + >>> comment.id + 0 + >>> comment.author + 'Steve Canny' + >>> comment.initials + 'SC' + >>> comment.date + datetime.datetime(2025, 6, 11, 20, 42, 30, 0, tzinfo=datetime.timezone.utc) + >>> comment.text + 'I have this to say about that' + +The API documentation for :meth:`.Document.add_comment` provides further details. + + +Accessing and using the Comments collection +------------------------------------------- + +The comments collection is accessed via the :attr:`.Document.comments` property:: + + >>> comments = document.comments + >>> comments + + >>> len(comments) + 1 + +The comments collection supports random access to a comment by its id:: + + >>> comment = comments.get(0) + >>> comment + + + +Adding rich content to a comment +-------------------------------- + +A comment is a _block-item container_, just like the document body or a table cell, so +it can contain any content that can appear in those places. It does not contain +page-layout sections and cannot contain a comment reference, but it can contain multiple +paragraphs and/or tables, and runs within paragraphs can have emphasis such as bold or +italic, and have images or hyperlinks. + +A comment created with `text=""` will contain a single paragraph with a single empty run +containing the so-called *annotation reference* but no text. It's probably best to leave +this run as it is but you can freely add additional runs to the paragraph that contain +whatever content you like. + +The methods for adding this content are the same as those used for the document and +table cells:: + + >>> paragraph = document.add_paragraph("The rain in Spain.") + >>> comment = document.add_comment( + ... runs=paragraph.runs, + ... text="", + ... ) + >>> cmt_para = comment.paragraphs[0] + >>> cmt_para.add_run("Please finish this thought. I believe it should be ") + >>> cmt_para.add_run("falls mainly in the plain.").bold = True + + +Updating comment metadata +------------------------- + +The author and initials metadata can be updated as desired:: + + >>> comment.author = "John Smith" + >>> comment.initials = "JS" + >>> comment.author + 'John Smith' + >>> comment.initials + 'JS' diff --git a/docs/user/documents.rst b/docs/user/documents.rst index ecdd82e95..ecdfefab1 100644 --- a/docs/user/documents.rst +++ b/docs/user/documents.rst @@ -1,3 +1,4 @@ +.. _documents: Working with Documents ========================== @@ -70,13 +71,13 @@ to (or aren't allowed to) interact with the file system. In practice this means you can pass an open file or StringIO/BytesIO stream object to open or save a document like so:: - f = open('foobar.docx') + f = open('foobar.docx', 'rb') document = Document(f) f.close() # or - with open('foobar.docx') as f: + with open('foobar.docx', 'rb') as f: source_stream = StringIO(f.read()) document = Document(source_stream) source_stream.close() @@ -84,6 +85,10 @@ a document like so:: target_stream = StringIO() document.save(target_stream) +The ``'rb'`` file open mode parameter isn't required on all operating +systems. It defaults to ``'r'`` which is enough sometimes, but the 'b' +(selecting binary mode) is required on Windows and at least some versions of +Linux to allow Zipfile to open the file. Okay, so you've got a document open and are pretty sure you can save it somewhere later. Next step is to get some content in there ... diff --git a/docs/user/hdrftr.rst b/docs/user/hdrftr.rst new file mode 100644 index 000000000..ae378536b --- /dev/null +++ b/docs/user/hdrftr.rst @@ -0,0 +1,166 @@ +.. _hdrftr: + +Working with Headers and Footers +================================ + +Word supports *page headers* and *page footers*. A page header is text that appears in +the top margin area of each page, separated from the main body of text, and usually +conveying context information, such as the document title, author, creation date, or the +page number. The page headers in a document are the same from page to page, with only +small differences in content, such as a changing section title or page number. A page +header is also known as a *running head*. + +A *page footer* is analogous in every way to a page header except that it appears at the +bottom of a page. It should not be confused with a footnote, which is not uniform +between pages. For brevity's sake, the term `header` is often used here to refer to what +may be either a header or footer object, trusting the reader to understand its +applicability to both object types. + + +Accessing the header for a section +---------------------------------- + +Headers and footers are linked to a `section`; this allows each section to have +a distinct header and/or footer. For example, a landscape section might have a wider +header than a portrait section. + +Each section object has a ``.header`` property providing access to a |_Header| object +for that section:: + + >>> document = Document() + >>> section = document.sections[0] + >>> header = section.header + >>> header + + +A |_Header| object is `always` present on ``Section.header``, even when no header is +defined for that section. The presence of an actual header definition is indicated by +``_Header.is_linked_to_previous``:: + + >>> header.is_linked_to_previous + True + +A value of ``True`` indicates the |_Header| object contains no header definition and the +section will display the same header as the previous section. This "inheritance" +behavior is recursive, such that a "linked" header actually gets its definition from the +first prior section having a header definition. This "linked" state is indicated as +*"Same as previous"* in the Word UI. + +A new document does not have a header (on the single section it contains) and so +``.is_linked_to_previous`` is ``True`` in that case. Note this case may be a bit +counterintuitive in that there *is no previous section header* to link to. In +this "no previous header" case, no header is displayed. + + +Adding a header (simple case) +----------------------------- + +A header can be added to a new document simply by editing the content of the |_Header| +object. A |_Header| object is a "story" container and its content is edited just like +a |Document| object. Note that like a new document, a new header already contains +a single (empty) paragraph:: + + >>> paragraph = header.paragraphs[0] + >>> paragraph.text = "Title of my document" + +.. image:: /_static/img/hdrftr-01.png + :scale: 50% + +Note also that the act of adding content (or even just accessing ``header.paragraphs``) +added a header definition and changed the state of ``.is_linked_to_previous``:: + + >>> header.is_linked_to_previous + False + + +Adding "zoned" header content +----------------------------- + +A header with multiple "zones" is often accomplished using carefully placed tab stops. + +The required tab-stops for a center and right-aligned "zone" are part of the ``Header`` +and ``Footer`` styles in Word. If you're using a custom template rather than the +`python-docx` default, it probably makes sense to define that style in your template. + +Inserted tab characters (``"\t"``) are used to separate left, center, and right-aligned +header content:: + + >>> paragraph = header.paragraphs[0] + >>> paragraph.text = "Left Text\tCenter Text\tRight Text" + >>> paragraph.style = document.styles["Header"] + +.. image:: /_static/img/hdrftr-02.png + :scale: 75% + +The ``Header`` style is automatically applied to a new header, so the third line just +above (applying the ``Header`` style) is unnecessary in this case, but included here to +illustrate the general case. + + +Removing a header +----------------- + +An unwanted header can be removed by assigning ``True`` to its +``.is_linked_to_previous`` attribute:: + + >>> header.is_linked_to_previous = True + >>> header.is_linked_to_previous + True + +The content for a header is irreversably deleted when ``True`` is assigned to +``.is_linked_to_previous``. + + +Understanding headers in a multi-section document +------------------------------------------------- + +The "just start editing" approach works fine for the simple case, but to make sense of +header behaviors in a multi-section document, a few simple concepts will be helpful. +Here they are in a nutshell: + +1. Each section can have its own header definition (but doesn't have to). + +2. A section that lacks a header definition inherits the header of the section before + it. The ``_Header.is_linked_to_previous`` property simply reflects the presence of + a header definition, ``False`` when a definition is present and ``True`` when not. + +3. Lacking a header definition is the default state. A new document has no defined + header and neither does a newly-inserted section. ``.is_linked_to_previous`` reports + ``True`` in both those cases. + +4. The content of a ``_Header`` object is its own content if it has a header definition. + If not, its content is that of the first prior section that `does` have a header + definition. If no sections have a header definition, a new one is added on the first + section and all other sections inherit that one. This adding of a header definition + happens the first time header content is accessed, perhaps by referencing + ``header.paragraphs``. + + +Adding a header definition (general case) +----------------------------------------- + +An explicit header definition can be given to a section that lacks one by assigning +``False`` to its ``.is_linked_to_previous`` property:: + + >>> header.is_linked_to_previous + True + >>> header.is_linked_to_previous = False + >>> header.is_linked_to_previous + False + +The newly added header definition contains a single empty paragraph. Note that leaving +the header this way is occasionally useful as it effectively "turns-off" a header for +that section and those after it until the next section with a defined header. + +Assigning ``False`` to ``.is_linked_to_previous`` on a header that already has a header +definition does nothing. + + +Inherited content is automatically located +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Editing the content of a header edits the content of the `source` header, taking into +account any "inheritance". So for example, if the section 2 header inherits from section +1 and you edit the section 2 header, you actually change the contents of the section +1 header. A new header definition is not added for section 2 unless you first explicitly +assign ``False`` to its ``.is_linked_to_previous`` property. diff --git a/docs/user/quickstart.rst b/docs/user/quickstart.rst index 01c5f2729..0d6982ee0 100644 --- a/docs/user/quickstart.rst +++ b/docs/user/quickstart.rst @@ -115,9 +115,9 @@ supports indexed access, like a list:: row.cells[0].text = 'Foo bar to you.' row.cells[1].text = 'And a hearty foo bar to you too sir!' -The ``.rows`` and ``.columns`` collections on a table are iterable, so you can -use them directly in a ``for`` loop. Same with the ``.cells`` sequences on -a row or column:: +The ``.rows`` and ``.columns`` collections on a table are iterable, so you +can use them directly in a ``for`` loop. Same with the ``.cells`` sequences +on a row or column:: for row in table.rows: for cell in row.cells: @@ -132,12 +132,16 @@ the sequence:: You can also add rows to a table incrementally like so:: row = table.add_row() - + This can be very handy for the variable length table scenario we mentioned above:: # get table data ------------- - items = get_things_from_database_or_something() + items = ( + (7, '1024', 'Plush kittens'), + (3, '2042', 'Furbees'), + (1, '1288', 'French Poodle Collars, Deluxe'), + ) # add table ------------------ table = document.add_table(1, 3) @@ -185,7 +189,7 @@ or over a network and don't want to get the filesystem involved. Image size ~~~~~~~~~~ -By default, the added image appears at *native* size. This is often bigger than +By default, the added image appears at `native` size. This is often bigger than you want. Native size is calculated as ``pixels / dpi``. So a 300x300 pixel image having 300 dpi resolution appears in a one inch square. The problem is most images don't contain a dpi property and it defaults to 72 dpi. This would @@ -229,14 +233,11 @@ thing. You can also apply a style afterward. These two lines are equivalent to the one above:: paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.') - paragraph.style = 'ListBullet' + paragraph.style = 'List Bullet' -The style is specified using its style ID, 'ListBullet' in this example. -Generally, the style ID is formed by removing the spaces in the style name as -it appears in the Word user interface (UI). So the style 'List Number 3' -would be specified as ``'ListNumber3'``. However, note that if you are using -a localized version of Word, the style ID may be derived from the English -style name and may not correspond so neatly to its style name in the Word UI. +The style is specified using its style name, 'List Bullet' in this example. +Generally, the style name is exactly as it appears in the Word user interface +(UI). Applying bold and italic @@ -249,7 +250,7 @@ a little about what goes on inside a paragraph. The short version is this: height, tabs, and so forth. #. Character-level formatting, such as bold and italic, are applied at the - *run* level. All content within a paragraph must be within a run, but there + `run` level. All content within a paragraph must be within a run, but there can be more than one. So a paragraph with a bold word in the middle would need three runs, a normal one, a bold one containing the word, and another normal one for the text after. @@ -282,14 +283,14 @@ Note that you can set bold or italic right on the result of ``.add_run()`` if you don't need it for anything else:: paragraph.add_run('dolor').bold = True - + # is equivalent to: run = paragraph.add_run('dolor') run.bold = True # except you don't have a reference to `run` afterward - + It's not necessary to provide text to the ``.add_paragraph()`` method. This can make your code simpler if you're building the paragraph up from runs anyway:: @@ -298,7 +299,7 @@ make your code simpler if you're building the paragraph up from runs anyway:: paragraph.add_run('Lorem ipsum ') paragraph.add_run('dolor').bold = True paragraph.add_run(' sit amet.') - + Applying a character style -------------------------- @@ -308,7 +309,9 @@ settings, Word has *character styles* which specify a group of run-level settings. In general you can think of a character style as specifying a font, including its typeface, size, color, bold, italic, etc. -Like paragraph styles, a character style must already be defined in the document you open with the ``Document()`` call (*see* :doc:`styles`). +Like paragraph styles, a character style must already be defined in the +document you open with the ``Document()`` call (`see` +:ref:`understanding_styles`). A character style can be specified when adding a new run:: @@ -322,8 +325,4 @@ the same result as the lines above:: run = paragraph.add_run('text with emphasis.') run.style = 'Emphasis' -As with a paragraph style, the style ID is formed by removing the spaces in -the name as it appears in the Word UI. So the style 'Subtle Emphasis' would -be specified as ``'SubtleEmphasis'``. Note that if you are using -a localized version of Word, the style ID may be derived from the English -style name and may not correspond to its style name in the Word UI. +As with a paragraph style, the style name is as it appears in the Word UI. diff --git a/docs/user/sections.rst b/docs/user/sections.rst index 6ab37015b..895021874 100644 --- a/docs/user/sections.rst +++ b/docs/user/sections.rst @@ -3,15 +3,14 @@ Working with Sections ===================== -Word supports the notion of a *section*, a division of a document having the -same page layout settings, such as margins and page orientation. This is how, -for example, a document can contain some pages in portrait layout and others in -landscape. +Word supports the notion of a `section`, a division of a document having the same page +layout settings, such as margins and page orientation. This is how, for example, a +document can contain some pages in portrait layout and others in landscape. Each section +also defines the headers and footers that apply to the pages of that section. -Most Word documents have only the single section that comes by default and -further, most of those have no reason to change the default margins or other -page layout. But when you *do* need to change the page layout, you'll need -to understand sections to get it done. +Most Word documents have only the single section that comes by default and further, most +of those have no reason to change the default margins or other page layout. But when you +`do` need to change the page layout, you'll need to understand sections to get it done. Accessing sections @@ -52,7 +51,7 @@ The :meth:`Document.add_section` method allows a new section to be started at the end of the document. Paragraphs and tables added after calling this method will appear in the new section:: - >>> current_section = document.section[-1] # last section in document + >>> current_section = document.sections[-1] # last section in document >>> current_section.start_type NEW_PAGE (2) >>> new_section = document.add_section(WD_SECTION.ODD_PAGE) @@ -98,7 +97,7 @@ from portrait to landscape:: >>> section.page_width = new_width >>> section.page_height = new_height >>> section.orientation, section.page_width, section.page_height - (LANDSCAPE (1), 10058400, 7772400) + (LANDSCAPE (1), 10058400, 7772400) Page margins diff --git a/docs/user/shapes.rst b/docs/user/shapes.rst index ec5d22797..5dcefbf61 100644 --- a/docs/user/shapes.rst +++ b/docs/user/shapes.rst @@ -2,11 +2,11 @@ Understanding pictures and other shapes ======================================= -Conceptually, Word documents have two *layers*, a *text layer* and a *drawing +Conceptually, Word documents have two `layers`, a *text layer* and a *drawing layer*. In the text layer, text objects are flowed from left to right and from top to bottom, starting a new page when the prior one is filled. In the drawing -layer, drawing objects, called *shapes*, are placed at arbitrary positions. -These are sometimes referred to as *floating* shapes. +layer, drawing objects, called `shapes`, are placed at arbitrary positions. +These are sometimes referred to as `floating` shapes. A picture is a shape that can appear in either the text or drawing layer. When it appears in the text layer it is called an *inline shape*, or more diff --git a/docs/user/styles-understanding.rst b/docs/user/styles-understanding.rst new file mode 100644 index 000000000..114b7ad6a --- /dev/null +++ b/docs/user/styles-understanding.rst @@ -0,0 +1,382 @@ +.. _understanding_styles: + +Understanding Styles +==================== + +**Grasshopper:** + *"Master, why doesn't my paragraph appear with the style I specified?"* + +**Master:** + *"You have come to the right page Grasshopper; read on ..."* + + +What is a style in Word? +------------------------ + +Documents communicate better when like elements are formatted consistently. To +achieve that consistency, professional document designers develop a *style +sheet* which defines the document element types and specifies how each should +be formatted. For example, perhaps body paragraphs are to be set in 9 pt Times +Roman with a line height of 11 pt, justified flush left, ragged right. When +these specifications are applied to each of the elements of the document, +a consistent and polished look is achieved. + +A style in Word is such a set of specifications that may be applied, all at +once, to a document element. Word has paragraph styles, character styles, table +styles, and numbering definitions. These are applied to a paragraph, a span of +text, a table, and a list, respectively. + +Experienced programmers will recognize styles as a level of indirection. The +great thing about those is it allows you to define something once, then apply +that definition many times. This saves the work of defining the same thing +over an over; but more importantly it allows you to change the definition and +have that change reflected in all the places you have applied it. + + +Why doesn't the style I applied show up? +---------------------------------------- + +This is likely to show up quite a bit until I can add some fancier features to +work around it, so here it is up top. + +#. When you're working in Word, there are all these styles you can apply to + things, pretty good looking ones that look all the better because you don't + have to make them yourself. Most folks never look further than the built-in + styles. + +#. Although those styles show up in the UI, they're not actually in the + document you're creating, at least not until you use it for the first time. + That's kind of a good thing. They take up room and there's a lot of them. + The file would get a little bloated if it contained all the style + definitions you could use but haven't. + +#. If you apply a style using |docx| that's not defined in your file (in the + styles.xml part if you're curious), Word just ignores it. It doesn't + complain, it just doesn't change how things are formatted. I'm sure + there's a good reason for this. But it can present as a bit of a puzzle if + you don't understand how Word works that way. + +#. When you use a style, Word adds it to the file. Once there, it stays. + I imagine there's a way to get rid of it, but you have to work at it. If + you apply a style, delete the content you applied it to, and then save the + document; the style definition stays in the saved file. + +All this adds up to the following: If you want to use a style in a document you +create with |docx|, the document you start with must contain the style +definition. Otherwise it just won't work. It won't raise an exception, it just +won't work. + +If you use the "default" template document, it contains the styles listed +below, most of the ones you're likely to want if you're not designing your own. +If you're using your own starting document, you need to use each of the styles +you want at least once in it. You don't have to keep the content, but you need +to apply the style to something at least once before saving the document. +Creating a one-word paragraph, applying five styles to it in succession and +then deleting the paragraph works fine. That's how I got the ones below into +the default template :). + + +Glossary +-------- + +style definition + A ```` element in the styles part of a document that explicitly + defines the attributes of a style. + +defined style + A style that is explicitly defined in a document. Contrast with *latent + style*. + +built-in style + One of the set of 276 pre-set styles built into Word, such as "Heading + 1". A built-in style can be either defined or latent. A built-in style + that is not yet defined is known as a *latent style*. Both defined and + latent built-in styles may appear as options in Word's style panel and + style gallery. + +custom style + Also known as a *user defined style*, any style defined in a Word + document that is not a built-in style. Note that a custom style cannot be + a latent style. + +latent style + A built-in style having no definition in a particular document is known + as a *latent style* in that document. A latent style can appear as an + option in the Word UI depending on the settings in the |LatentStyles| + object for the document. + +recommended style list + A list of styles that appears in the styles toolbox or panel when + "Recommended" is selected from the "List:" dropdown box. + +Style Gallery + The selection of example styles that appear in the ribbon of the Word UI + and which may be applied by clicking on one of them. + + +Identifying a style +------------------- + +A style has three identifying properties, `name`, `style_id`, and `type`. + +Each style's :attr:`name` property is its stable, unique identifier for +access purposes. + +A style's :attr:`style_id` is used internally to key a content object such as +a paragraph to its style. However this value is generated automatically by +Word and is not guaranteed to be stable across saves. In general, the style +id is formed simply by removing spaces from the `localized` style name, +however there are exceptions. Users of |docx| should generally avoid using +the style id unless they are confident with the internals involved. + +A style's :attr:`type` is set at creation time and cannot be changed. + + +.. _builtin_styles: + +Built-in styles +--------------- + +Word comes with almost 300 so-called *built-in* styles like `Normal`, +`Heading 1`, and `List Bullet`. Style definitions are stored in the +`styles.xml` part of a .docx package, but built-in style definitions are +stored in the Word application itself and are not written to `styles.xml` +until they are actually used. This is a sensible strategy because they take +up considerable room and would be largely redundant and useless overhead in +every .docx file otherwise. + +The fact that built-in styles are not written to the .docx package until used +gives rise to the need for *latent style* definitions, explained below. + + +.. _style_behavior: + +Style Behavior +-------------- + +In addition to collecting a set of formatting properties, a style has five +properties that specify its `behavior`. This behavior is relatively simple, +basically amounting to when and where the style appears in the Word or +LibreOffice UI. + +The key notion to understanding style behavior is the recommended list. In +the style pane in Word, the user can select which list of styles they want to +see. One of these is named `Recommended` and is known as the *recommended +list*. All five behavior properties affect some aspect of the style’s +appearance in this list and in the style gallery. + +In brief, a style appears in the recommended list if its :attr:`hidden` +property is |False| (the default). If a style is not hidden and its +:attr:`quick_style` property is |True|, it also appears in the style gallery. +If a hidden style's :attr:`unhide_when_used` property is |True|, its hidden +property is set |False| the first time it is used. Styles in the style lists +and style gallery are sorted in :attr:`priority` order, then alphabetically +for styles of the same priority. If a style's :attr:`locked` property is +|True| and formatting restrictions are turned on for the document, the style +will not appear in any list or the style gallery and cannot be applied to +content. + + +.. _latent_styles: + +Latent styles +------------- + +The need to specify the UI behavior of built-in styles not defined in +`styles.xml` gives rise to the need for *latent style* definitions. A latent +style definition is basically a stub style definition that has at most the +five behavior attributes in addition to the style name. Additional space is +saved by defining defaults for each of the behavior attributes, so only those +that differ from the default need be defined and styles that match all +defaults need no latent style definition. + +Latent style definitions are specified using the `w:latentStyles` and +`w:lsdException` elements appearing in `styles.xml`. + +A latent style definition is only required for a built-in style because only +a built-in style can appear in the UI without a style definition in +`styles.xml`. + + +Style inheritance +----------------- + +A style can inherit properties from another style, somewhat similarly to how +Cascading Style Sheets (CSS) works. Inheritance is specified using the +:attr:`~.BaseStyle.base_style` attribute. By basing one style on another, an +inheritance hierarchy of arbitrary depth can be formed. A style having no +base style inherits properties from the document defaults. + + +Paragraph styles in default template +------------------------------------ + +* Normal +* Body Text +* Body Text 2 +* Body Text 3 +* Caption +* Heading 1 +* Heading 2 +* Heading 3 +* Heading 4 +* Heading 5 +* Heading 6 +* Heading 7 +* Heading 8 +* Heading 9 +* Intense Quote +* List +* List 2 +* List 3 +* List Bullet +* List Bullet 2 +* List Bullet 3 +* List Continue +* List Continue 2 +* List Continue 3 +* List Number +* List Number 2 +* List Number 3 +* List Paragraph +* Macro Text +* No Spacing +* Quote +* Subtitle +* TOCHeading +* Title + + +Character styles in default template +------------------------------------ + +* Body Text Char +* Body Text 2 Char +* Body Text 3 Char +* Book Title +* Default Paragraph Font +* Emphasis +* Heading 1 Char +* Heading 2 Char +* Heading 3 Char +* Heading 4 Char +* Heading 5 Char +* Heading 6 Char +* Heading 7 Char +* Heading 8 Char +* Heading 9 Char +* Intense Emphasis +* Intense Quote Char +* Intense Reference +* Macro Text Char +* Quote Char +* Strong +* Subtitle Char +* Subtle Emphasis +* Subtle Reference +* Title Char + + +Table styles in default template +-------------------------------- + +* Table Normal +* Colorful Grid +* Colorful Grid Accent 1 +* Colorful Grid Accent 2 +* Colorful Grid Accent 3 +* Colorful Grid Accent 4 +* Colorful Grid Accent 5 +* Colorful Grid Accent 6 +* Colorful List +* Colorful List Accent 1 +* Colorful List Accent 2 +* Colorful List Accent 3 +* Colorful List Accent 4 +* Colorful List Accent 5 +* Colorful List Accent 6 +* Colorful Shading +* Colorful Shading Accent 1 +* Colorful Shading Accent 2 +* Colorful Shading Accent 3 +* Colorful Shading Accent 4 +* Colorful Shading Accent 5 +* Colorful Shading Accent 6 +* Dark List +* Dark List Accent 1 +* Dark List Accent 2 +* Dark List Accent 3 +* Dark List Accent 4 +* Dark List Accent 5 +* Dark List Accent 6 +* Light Grid +* Light Grid Accent 1 +* Light Grid Accent 2 +* Light Grid Accent 3 +* Light Grid Accent 4 +* Light Grid Accent 5 +* Light Grid Accent 6 +* Light List +* Light List Accent 1 +* Light List Accent 2 +* Light List Accent 3 +* Light List Accent 4 +* Light List Accent 5 +* Light List Accent 6 +* Light Shading +* Light Shading Accent 1 +* Light Shading Accent 2 +* Light Shading Accent 3 +* Light Shading Accent 4 +* Light Shading Accent 5 +* Light Shading Accent 6 +* Medium Grid 1 +* Medium Grid 1 Accent 1 +* Medium Grid 1 Accent 2 +* Medium Grid 1 Accent 3 +* Medium Grid 1 Accent 4 +* Medium Grid 1 Accent 5 +* Medium Grid 1 Accent 6 +* Medium Grid 2 +* Medium Grid 2 Accent 1 +* Medium Grid 2 Accent 2 +* Medium Grid 2 Accent 3 +* Medium Grid 2 Accent 4 +* Medium Grid 2 Accent 5 +* Medium Grid 2 Accent 6 +* Medium Grid 3 +* Medium Grid 3 Accent 1 +* Medium Grid 3 Accent 2 +* Medium Grid 3 Accent 3 +* Medium Grid 3 Accent 4 +* Medium Grid 3 Accent 5 +* Medium Grid 3 Accent 6 +* Medium List 1 +* Medium List 1 Accent 1 +* Medium List 1 Accent 2 +* Medium List 1 Accent 3 +* Medium List 1 Accent 4 +* Medium List 1 Accent 5 +* Medium List 1 Accent 6 +* Medium List 2 +* Medium List 2 Accent 1 +* Medium List 2 Accent 2 +* Medium List 2 Accent 3 +* Medium List 2 Accent 4 +* Medium List 2 Accent 5 +* Medium List 2 Accent 6 +* Medium Shading 1 +* Medium Shading 1 Accent 1 +* Medium Shading 1 Accent 2 +* Medium Shading 1 Accent 3 +* Medium Shading 1 Accent 4 +* Medium Shading 1 Accent 5 +* Medium Shading 1 Accent 6 +* Medium Shading 2 +* Medium Shading 2 Accent 1 +* Medium Shading 2 Accent 2 +* Medium Shading 2 Accent 3 +* Medium Shading 2 Accent 4 +* Medium Shading 2 Accent 5 +* Medium Shading 2 Accent 6 +* Table Grid diff --git a/docs/user/styles-using.rst b/docs/user/styles-using.rst new file mode 100644 index 000000000..93dd7a344 --- /dev/null +++ b/docs/user/styles-using.rst @@ -0,0 +1,391 @@ + +Working with Styles +=================== + +This page uses concepts developed in the prior page without introduction. If +a term is unfamiliar, consult the prior page :ref:`understanding_styles` for +a definition. + + +Access a style +-------------- + +Styles are accessed using the :attr:`.Document.styles` attribute:: + + >>> document = Document() + >>> styles = document.styles + >>> styles + + +The |Styles| object provides dictionary-style access to defined styles by +name:: + + >>> styles['Normal'] + + +.. note:: Built-in styles are stored in a WordprocessingML file using their + English name, e.g. 'Heading 1', even though users working on a localized + version of Word will see native language names in the UI, e.g. 'Kop 1'. + Because |docx| operates on the WordprocessingML file, style lookups must + use the English name. A document available on this external site allows + you to create a mapping between local language names and English style + names: + http://www.thedoctools.com/index.php?show=mt_create_style_name_list + + User-defined styles, also known as *custom styles*, are not localized and + are accessed with the name exactly as it appears in the Word UI. + +The |Styles| object is also iterable. By using the identification properties +on |BaseStyle|, various subsets of the defined styles can be generated. For +example, this code will produce a list of the defined paragraph styles:: + + >>> from docx.enum.style import WD_STYLE_TYPE + >>> styles = document.styles + >>> paragraph_styles = [ + ... s for s in styles if s.type == WD_STYLE_TYPE.PARAGRAPH + ... ] + >>> for style in paragraph_styles: + ... print(style.name) + ... + Normal + Body Text + List Bullet + + +Apply a style +------------- + +The |Paragraph|, |Run|, and |Table| objects each have a :attr:`style` +attribute. Assigning a style object to this attribute applies that style:: + + >>> document = Document() + >>> paragraph = document.add_paragraph() + >>> paragraph.style + + >>> paragraph.style.name + 'Normal' + >>> paragraph.style = document.styles['Heading 1'] + >>> paragraph.style.name + 'Heading 1' + +A style name can also be assigned directly, in which case |docx| will do the +lookup for you:: + + >>> paragraph.style = 'List Bullet' + >>> paragraph.style + + >>> paragraph.style.name + 'List Bullet' + +A style can also be applied at creation time using either the style object or +its name:: + + >>> paragraph = document.add_paragraph(style='Body Text') + >>> paragraph.style.name + 'Body Text' + >>> body_text_style = document.styles['Body Text'] + >>> paragraph = document.add_paragraph(style=body_text_style) + >>> paragraph.style.name + 'Body Text' + + +Add or delete a style +--------------------- + +A new style can be added to the document by specifying a unique name and +a style type:: + + >>> from docx.enum.style import WD_STYLE_TYPE + >>> styles = document.styles + >>> style = styles.add_style('Citation', WD_STYLE_TYPE.PARAGRAPH) + >>> style.name + 'Citation' + >>> style.type + PARAGRAPH (1) + +Use the :attr:`~.BaseStyle.base_style` property to specify a style the new +style should inherit formatting settings from:: + + >>> style.base_style + None + >>> style.base_style = styles['Normal'] + >>> style.base_style + + >>> style.base_style.name + 'Normal' + +A style can be removed from the document simply by calling its +:meth:`~.BaseStyle.delete` method:: + + >>> styles = document.styles + >>> len(styles) + 10 + >>> styles['Citation'].delete() + >>> len(styles) + 9 + +.. note:: The :meth:`.Style.delete` method removes the style's definition + from the document. It does not affect content in the document to which + that style is applied. Content having a style not defined in the document + is rendered using the default style for that content object, e.g. + 'Normal' in the case of a paragraph. + + +Define character formatting +--------------------------- + +Character, paragraph, and table styles can all specify character formatting +to be applied to content with that style. All the character formatting that +can be applied directly to text can be specified in a style. Examples include +font typeface and size, bold, italic, and underline. + +Each of these three style types have a :attr:`~._CharacterStyle.font` +attribute providing access to a |Font| object. A style's |Font| object +provides properties for getting and setting the character formatting for that +style. + +Several examples are provided here. For a complete set of the available +properties, see the |Font| API documentation. + +The font for a style can be accessed like this:: + + >>> from docx import Document + >>> document = Document() + >>> style = document.styles['Normal'] + >>> font = style.font + +Typeface and size are set like this:: + + >>> from docx.shared import Pt + >>> font.name = 'Calibri' + >>> font.size = Pt(12) + +Many font properties are *tri-state*, meaning they can take the values +|True|, |False|, and |None|. |True| means the property is "on", |False| means +it is "off". Conceptually, the |None| value means "inherit". Because a style +exists in an inheritance hierarchy, it is important to have the ability to +specify a property at the right place in the hierarchy, generally as far up +the hierarchy as possible. For example, if all headings should be in the +Arial typeface, it makes more sense to set that property on the `Heading 1` +style and have `Heading 2` inherit from `Heading 1`. + +Bold and italic are tri-state properties, as are all-caps, strikethrough, +superscript, and many others. See the |Font| API documentation for a full +list:: + + >>> font.bold, font.italic + (None, None) + >>> font.italic = True + >>> font.italic + True + >>> font.italic = False + >>> font.italic + False + >>> font.italic = None + >>> font.italic + None + +Underline is a bit of a special case. It is a hybrid of a tri-state property +and an enumerated value property. |True| means single underline, by far the +most common. |False| means no underline, but more often |None| is the right +choice if no underlining is wanted since it is rare to inherit it from a base +style. The other forms of underlining, such as double or dashed, are +specified with a member of the :ref:`WdUnderline` enumeration:: + + >>> font.underline + None + >>> font.underline = True + >>> # or perhaps + >>> font.underline = WD_UNDERLINE.DOT_DASH + + +Define paragraph formatting +--------------------------- + +Both a paragraph style and a table style allow paragraph formatting to be +specified. These styles provide access to a |ParagraphFormat| object via +their :attr:`~._ParagraphStyle.paragraph_format` property. + +Paragraph formatting includes layout behaviors such as justification, +indentation, space before and after, page break before, and widow/orphan +control. For a complete list of the available properties, consult the API +documentation page for the |ParagraphFormat| object. + +Here's an example of how you would create a paragraph style having hanging +indentation of 1/4 inch, 12 points spacing above, and widow/orphan control:: + + >>> from docx.enum.style import WD_STYLE_TYPE + >>> from docx.shared import Inches, Pt + >>> document = Document() + >>> style = document.styles.add_style('Indent', WD_STYLE_TYPE.PARAGRAPH) + >>> paragraph_format = style.paragraph_format + >>> paragraph_format.left_indent = Inches(0.25) + >>> paragraph_format.first_line_indent = Inches(-0.25) + >>> paragraph_format.space_before = Pt(12) + >>> paragraph_format.widow_control = True + + +Use paragraph-specific style properties +--------------------------------------- + +A paragraph style has a :attr:`~._ParagraphStyle.next_paragraph_style` +property that specifies the style to be applied to new paragraphs inserted +after a paragraph of that style. This is most useful when the style would +normally appear only once in a sequence, such as a heading. In that case, the +paragraph style can automatically be set back to a body style after +completing the heading. + +In the most common case (body paragraphs), subsequent paragraphs should +receive the same style as the current paragraph. The default handles this +case well by applying the same style if a next paragraph style is not +specified. + +Here's an example of how you would change the next paragraph style of the +*Heading 1* style to *Body Text*:: + + >>> from docx import Document + >>> document = Document() + >>> styles = document.styles + + >>> styles['Heading 1'].next_paragraph_style = styles['Body Text'] + +The default behavior can be restored by assigning |None| or the style itself:: + + >>> heading_1_style = styles['Heading 1'] + >>> heading_1_style.next_paragraph_style.name + 'Body Text' + + >>> heading_1_style.next_paragraph_style = heading_1_style + >>> heading_1_style.next_paragraph_style.name + 'Heading 1' + + >>> heading_1_style.next_paragraph_style = None + >>> heading_1_style.next_paragraph_style.name + 'Heading 1' + + +Control how a style appears in the Word UI +------------------------------------------ + +The properties of a style fall into two categories, *behavioral properties* +and *formatting properties*. Its behavioral properties control when and where +the style appears in the Word UI. Its formatting properties determine the +formatting of content to which the style is applied, such as the size of the +font and its paragraph indentation. + +There are five behavioral properties of a style: + +* :attr:`~.BaseStyle.hidden` +* :attr:`~.BaseStyle.unhide_when_used` +* :attr:`~.BaseStyle.priority` +* :attr:`~.BaseStyle.quick_style` +* :attr:`~.BaseStyle.locked` + +See the :ref:`style_behavior` section in :ref:`understanding_styles` for +a description of how these behavioral properties interact to determine when +and where a style appears in the Word UI. + +The :attr:`priority` property takes an integer value. The other four style +behavior properties are *tri-state*, meaning they can take the value |True| +(on), |False| (off), or |None| (inherit). + +Display a style in the style gallery +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following code will cause the 'Body Text' paragraph style to appear first +in the style gallery:: + + >>> from docx import Document + >>> document = Document() + >>> style = document.styles['Body Text'] + + >>> style.hidden = False + >>> style.quick_style = True + >>> style.priorty = 1 + +Remove a style from the style gallery +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This code will remove the 'Normal' paragraph style from the style gallery, +but allow it to remain in the recommended list:: + + >>> style = document.styles['Normal'] + + >>> style.hidden = False + >>> style.quick_style = False + + +Working with Latent Styles +-------------------------- + +See the :ref:`builtin_styles` and :ref:`latent_styles` sections in +:ref:`understanding_styles` for a description of how latent styles define the +behavioral properties of built-in styles that are not yet defined in the +`styles.xml` part of a .docx file. + +Access the latent styles in a document +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The latent styles in a document are accessed from the styles object:: + + >>> document = Document() + >>> latent_styles = document.styles.latent_styles + +A |LatentStyles| object supports :meth:`len`, iteration, and dictionary-style +access by style name:: + + >>> len(latent_styles) + 161 + + >>> latent_style_names = [ls.name for ls in latent_styles] + >>> latent_style_names + ['Normal', 'Heading 1', 'Heading 2', ... 'TOC Heading'] + + >>> latent_quote = latent_styles['Quote'] + >>> latent_quote + + >>> latent_quote.priority + 29 + +Change latent style defaults +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The |LatentStyles| object also provides access to the default behavioral +properties for built-in styles in the current document. These defaults +provide the value for any undefined attributes of the |_LatentStyle| +definitions and to all behavioral properties of built-in styles having no +explicit latent style definition. See the API documentation for the +|LatentStyles| object for the complete set of available properties:: + + >>> latent_styles.default_to_locked + False + >>> latent_styles.default_to_locked = True + >>> latent_styles.default_to_locked + True + +Add a latent style definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new latent style can be added using the +:meth:`~.LatentStyles.add_latent_style` method on |LatentStyles|. This code +adds a new latent style for the builtin style 'List Bullet', setting it to +appear in the style gallery:: + + >>> latent_style = latent_styles['List Bullet'] + KeyError: no latent style with name 'List Bullet' + >>> latent_style = latent_styles.add_latent_style('List Bullet') + >>> latent_style.hidden = False + >>> latent_style.priority = 2 + >>> latent_style.quick_style = True + +Delete a latent style definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A latent style definition can be deleted by calling its +:meth:`~.LatentStyle.delete` method:: + + >>> latent_styles['Light Grid'] + + >>> latent_styles['Light Grid'].delete() + >>> latent_styles['Light Grid'] + KeyError: no latent style with name 'Light Grid' diff --git a/docs/user/styles.rst b/docs/user/styles.rst deleted file mode 100644 index 87e34272d..000000000 --- a/docs/user/styles.rst +++ /dev/null @@ -1,249 +0,0 @@ - -Understanding Styles -==================== - -**Grasshopper:** - *"Master, why doesn't my paragraph appear with the style I specified?"* - -**Master:** - *"You have come to the right page Grasshopper; read on ..."* - - -What is a style in Word? ------------------------- - -Documents communicate better when like elements are formatted consistently. To -achieve that consistency, professional document designers develop a *style -sheet* which defines the document element types and specifies how each should -be formatted. For example, perhaps body paragraphs are to be set in 9 pt Times -Roman with a line height of 11 pt, justified flush left, ragged right. When -these specifications are applied to each of the elements of the document, -a consistent and polished look is achieved. - -A style in Word is such a set of specifications that may be applied, all at -once, to a document element. Word has paragraph styles, character styles, table -styles, and numbering definitions. These are applied to a paragraph, a span of -text, a table, and a list, respectively. - -Experienced programmers will recognize styles as a level of indirection. The -great thing about those is it allows you to define something once, then apply -that definition many times. This saves the work of defining the same thing over -an over; but more importantly it allows you to change it the definition and -have that change reflected in all the places you originally applied it. - - -Why doesn't the style I applied show up? ----------------------------------------- - -This is likely to show up quite a bit until I can add some fancier features to -work around it, so here it is up top. - -#. When you're working in Word, there are all these styles you can apply to - things, pretty good looking ones that look all the better because you don't - have to make them yourself. Most folks never look further than the built-in - styles. - -#. Although those styles show up in the UI, they're not actually in the - document you're creating, at least not until you use it for the first time. - That's kind of a good thing. They take up room and there's a lot of them. - The file would get a little bloated if it contained all the style - definitions you could use but haven't. - -#. If you apply a style that's not defined in your file (in the styles.xml part - if you're curious), Word just ignores it. It doesn't complain, it just - doesn't change how things are formatted. I'm sure there's a good reason for - this. But it can present as a bit of a puzzle if you don't understand how - Word works that way. - -#. When you use a style, Word adds it to the file. Once there, it stays. - I imagine there's a way to get rid of it, but you have to work at it. If - you apply a style, delete the content you applied it to, and then save the - document; the style definition stays in the saved file. - -All this adds up to the following: If you want to use a style in a document you -create with |docx|, the document you start with must contain the style -definition. Otherwise it just won't work. It won't raise an exception, it just -won't work. - -If you use the "default" template document, it contains the styles listed -below, most of the ones you're likely to want if you're not designing your own. -If you're using your own starting document, you need to use each of the styles -you want at least once in it. You don't have to keep the content, but you need -to apply the style to something at least once before saving the document. -Creating a one-word paragraph, applying five styles to it in succession and -then deleting the paragraph works fine. That's how I got the ones below into -the default template :). - - -Paragraph styles in default template -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Normal -* BodyText -* BodyText2 -* BodyText3 -* Caption -* Heading1 -* Heading2 -* Heading3 -* Heading4 -* Heading5 -* Heading6 -* Heading7 -* Heading8 -* Heading9 -* IntenseQuote -* List -* List2 -* List3 -* ListBullet -* ListBullet2 -* ListBullet3 -* ListContinue -* ListContinue2 -* ListContinue3 -* ListNumber -* ListNumber2 -* ListNumber3 -* ListParagraph -* MacroText -* NoSpacing -* Quote -* Subtitle -* TOCHeading -* Title - - -Table styles in default template -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* TableNormal -* ColorfulGrid -* ColorfulGrid-Accent1 -* ColorfulGrid-Accent2 -* ColorfulGrid-Accent3 -* ColorfulGrid-Accent4 -* ColorfulGrid-Accent5 -* ColorfulGrid-Accent6 -* ColorfulList -* ColorfulList-Accent1 -* ColorfulList-Accent2 -* ColorfulList-Accent3 -* ColorfulList-Accent4 -* ColorfulList-Accent5 -* ColorfulList-Accent6 -* ColorfulShading -* ColorfulShading-Accent1 -* ColorfulShading-Accent2 -* ColorfulShading-Accent3 -* ColorfulShading-Accent4 -* ColorfulShading-Accent5 -* ColorfulShading-Accent6 -* DarkList -* DarkList-Accent1 -* DarkList-Accent2 -* DarkList-Accent3 -* DarkList-Accent4 -* DarkList-Accent5 -* DarkList-Accent6 -* LightGrid -* LightGrid-Accent1 -* LightGrid-Accent2 -* LightGrid-Accent3 -* LightGrid-Accent4 -* LightGrid-Accent5 -* LightGrid-Accent6 -* LightList -* LightList-Accent1 -* LightList-Accent2 -* LightList-Accent3 -* LightList-Accent4 -* LightList-Accent5 -* LightList-Accent6 -* LightShading -* LightShading-Accent1 -* LightShading-Accent2 -* LightShading-Accent3 -* LightShading-Accent4 -* LightShading-Accent5 -* LightShading-Accent6 -* MediumGrid1 -* MediumGrid1-Accent1 -* MediumGrid1-Accent2 -* MediumGrid1-Accent3 -* MediumGrid1-Accent4 -* MediumGrid1-Accent5 -* MediumGrid1-Accent6 -* MediumGrid2 -* MediumGrid2-Accent1 -* MediumGrid2-Accent2 -* MediumGrid2-Accent3 -* MediumGrid2-Accent4 -* MediumGrid2-Accent5 -* MediumGrid2-Accent6 -* MediumGrid3 -* MediumGrid3-Accent1 -* MediumGrid3-Accent2 -* MediumGrid3-Accent3 -* MediumGrid3-Accent4 -* MediumGrid3-Accent5 -* MediumGrid3-Accent6 -* MediumList1 -* MediumList1-Accent1 -* MediumList1-Accent2 -* MediumList1-Accent3 -* MediumList1-Accent4 -* MediumList1-Accent5 -* MediumList1-Accent6 -* MediumList2 -* MediumList2-Accent1 -* MediumList2-Accent2 -* MediumList2-Accent3 -* MediumList2-Accent4 -* MediumList2-Accent5 -* MediumList2-Accent6 -* MediumShading1 -* MediumShading1-Accent1 -* MediumShading1-Accent2 -* MediumShading1-Accent3 -* MediumShading1-Accent4 -* MediumShading1-Accent5 -* MediumShading1-Accent6 -* MediumShading2 -* MediumShading2-Accent1 -* MediumShading2-Accent2 -* MediumShading2-Accent3 -* MediumShading2-Accent4 -* MediumShading2-Accent5 -* MediumShading2-Accent6 -* TableGrid - - -Character styles in default template -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* BodyText2Char -* BodyText3Char -* BodyTextChar -* BookTitle -* DefaultParagraphFont -* Emphasis -* Heading1Char -* Heading2Char -* Heading3Char -* Heading4Char -* Heading5Char -* Heading6Char -* Heading7Char -* Heading8Char -* Heading9Char -* IntenseEmphasis -* IntenseQuoteChar -* IntenseReference -* MacroTextChar -* QuoteChar -* Strong -* SubtitleChar -* SubtleEmphasis -* SubtleReference -* TitleChar diff --git a/docs/user/tables.rst b/docs/user/tables.rst new file mode 100644 index 000000000..40ef20933 --- /dev/null +++ b/docs/user/tables.rst @@ -0,0 +1,202 @@ +.. _tables: + +Working with Tables +=================== + +Word provides sophisticated capabilities to create tables. As usual, this power comes with +additional conceptual complexity. + +This complexity becomes most apparent when *reading* tables, in particular from documents drawn from +the wild where there is limited or no prior knowledge as to what the tables might contain or how +they might be structured. + +These are some of the important concepts you'll need to understand. + + +Concept: Simple (uniform) tables +-------------------------------- + +:: + + +---+---+---+ + | a | b | c | + +---+---+---+ + | d | e | f | + +---+---+---+ + | g | h | i | + +---+---+---+ + +The basic concept of a table is intuitive enough. You have *rows* and *columns*, and at each (row, +column) position is a different *cell*. It can be described as a *grid* or a *matrix*. Let's call +this concept a *uniform table*. A relational database table and a Pandas dataframe are both examples +of a uniform table. + +The following invariants apply to uniform tables: + +* Each row has the same number of cells, one for each column. +* Each column has the same number of cells, one for each row. + + +Complication 1: Merged Cells +---------------------------- + +:: + + +---+---+---+ +---+---+---+ + | a | b | | | b | c | + +---+---+---+ + a +---+---+ + | c | d | e | | | d | e | + +---+---+---+ +---+---+---+ + | f | g | h | | f | g | h | + +---+---+---+ +---+---+---+ + +While very suitable for data processing, a uniform table lacks expressive power desireable for +tables intended for a human reader. + +Perhaps the most important characteristic a uniform table lacks is *merged cells*. It is very common +to want to group multiple cells into one, for example to form a column-group heading or provide the +same value for a sequence of cells rather than repeat it for each cell. These make a rendered table +more *readable* by reducing the cognitive load on the human reader and make certain relationships +explicit that might easily be missed otherwise. + +Unfortunately, accommodating merged cells breaks both the invariants of a uniform table: + +* Each row can have a different number of cells. +* Each column can have a different number of cells. + +This challenges reading table contents programatically. One might naturally want to read the table +into a uniform matrix data structure like a 3 x 3 "2D array" (list of lists perhaps), but this is +not directly possible when the table is not known to be uniform. + + +Concept: The layout grid +------------------------ + +:: + + + - + - + - + + | | | | + + - + - + - + + | | | | + + - + - + - + + | | | | + + - + - + - + + +In Word, each table has a *layout grid*. + +- The layout grid is *uniform*. There is a layout position for every (layout-row, layout-column) + pair. +- The layout grid itself is not visible. However it is represented and referenced by certain + elements and attributes within the table XML +- Each table cell is located at a layout-grid position; i.e. the top-left corner of each cell is the + top-left corner of a layout-grid cell. +- Each table cell occupies one or more whole layout-grid cells. A merged cell will occupy multiple + layout-grid cells. No table cell can occupy a partial layout-grid cell. +- Another way of saying this is that every vertical boundary (left and right) of a cell aligns with + a layout-grid vertical boundary, likewise for horizontal boundaries. But not all layout-grid + boundaries need be occupied by a cell boundary of the table. + + +Complication 2: Omitted Cells +----------------------------- + +:: + + +---+---+ +---+---+---+ + | a | b | | a | b | c | + +---+---+---+ +---+---+---+ + | c | d | | d | + +---+---+ +---+---+---+ + | e | | e | f | g | + +---+ +---+---+---+ + +Word is unusual in that it allows cells to be omitted from the beginning or end (but not the middle) +of a row. A typical practical example is a table with both a row of column headings and a column of +row headings, but no top-left cell (position 0, 0), such as this XOR truth table. + +:: + + +---+---+ + | T | F | + +---+---+---+ + | T | F | T | + +---+---+---+ + | F | T | F | + +---+---+---+ + +In `python-docx`, omitted cells in a |_Row| object are represented by the ``.grid_cols_before`` and +``.grid_cols_after`` properties. In the example above, for the first row, ``.grid_cols_before`` +would equal ``1`` and ``.grid_cols_after`` would equal ``0``. + +Note that omitted cells are not just "empty" cells. They represent layout-grid positions that are +unoccupied by a cell and they cannot be represented by a |_Cell| object. This distinction becomes +important when trying to produce a uniform representation (e.g. a 2D array) for an arbitrary Word +table. + + +Concept: `python-docx` approximates uniform tables by default +------------------------------------------------------------- + +To accurately represent an arbitrary table would require a complex graph data structure. Navigating +this data structure would be at least as complex as navigating the `python-docx` object graph for a +table. When extracting content from a collection of arbitrary Word files, such as for indexing the +document, it is common to choose a simpler data structure and *approximate* the table in that +structure. + +Reflecting on how a relational table or dataframe represents tabular information, a straightforward +approximation would simply repeat merged-cell values for each layout-grid cell occupied by the +merged cell:: + + + +---+---+---+ +---+---+---+ + | a | b | -> | a | a | b | + +---+---+---+ +---+---+---+ + | | d | e | -> | c | d | e | + + c +---+---+ +---+---+---+ + | | f | g | -> | c | f | g | + +---+---+---+ +---+---+---+ + +This is what ``_Row.cells`` does by default. Conceptually:: + + >>> [tuple(c.text for c in r.cells) for r in table.rows] + [ + (a, a, b), + (c, d, e), + (c, f, g), + ] + +Note this only produces a uniform "matrix" of cells when there are no omitted cells. Dealing with +omitted cells requires a more sophisticated approach when maintaining column integrity is required:: + + # +---+---+ + # | a | b | + # +---+---+---+ + # | c | d | + # +---+---+ + # | e | + # +---+ + + def iter_row_cell_texts(row: _Row) -> Iterator[str]: + for _ in range(row.grid_cols_before): + yield "" + for c in row.cells: + yield c.text + for _ in range(row.grid_cols_after): + yield "" + + >>> [tuple(iter_row_cell_texts(r)) for r in table.rows] + [ + ("", "a", "b"), + ("c", "d", ""), + ("", "e", ""), + ] + + +Complication 3: Tables are Recursive +------------------------------------ + +Further complicating table processing is their recursive nature. In Word, as in HTML, a table cell +can itself include one or more tables. + +These can be detected using ``_Cell.tables`` or ``_Cell.iter_inner_content()``. The latter preserves +the document order of the table with respect to paragraphs also in the cell. diff --git a/docs/user/text.rst b/docs/user/text.rst index 25ab8f742..f2e54f3b4 100644 --- a/docs/user/text.rst +++ b/docs/user/text.rst @@ -1,28 +1,389 @@ -Low-level text API -================== +Working with Text +================= + +To work effectively with text, it's important to first understand a little +about block-level elements like paragraphs and inline-level objects like +runs. -For the greatest control over inserted text, an understanding of the low-level -text API is required. Block-level vs. inline text objects ----------------------------------- -The paragraph is the primary block-level object in Word. A table is also -a block-level object, however its acts primarily as a container rather than -content. Each cell of a table is a block-level container, much like the -document body itself. Its rows and columns simply provide structure to the -cells. +The paragraph is the primary block-level object in Word. + +A block-level item flows the text it contains between its left and right +edges, adding an additional line each time the text extends beyond its right +boundary. For a paragraph, the boundaries are generally the page margins, but +they can also be column boundaries if the page is laid out in columns, or +cell boundaries if the paragraph occurs inside a table cell. + +A table is also a block-level object. + +An inline object is a portion of the content that occurs inside a block-level +item. An example would be a word that appears in bold or a sentence in +all-caps. The most common inline object is a `run`. All content within +a block container is inside of an inline object. Typically, a paragraph +contains one or more runs, each of which contain some part of the paragraph's +text. + +The attributes of a block-level item specify its placement on the page, such +items as indentation and space before and after a paragraph. The attributes +of an inline item generally specify the font in which the content appears, +things like typeface, font size, bold, and italic. + + +Paragraph properties +-------------------- + +A paragraph has a variety of properties that specify its placement within its +container (typically a page) and the way it divides its content into separate +lines. + +In general, it's best to define a *paragraph style* collecting these +attributes into a meaningful group and apply the appropriate style to each +paragraph, rather than repeatedly apply those properties directly to each +paragraph. This is analogous to how Cascading Style Sheets (CSS) work with +HTML. All the paragraph properties described here can be set using a style as +well as applied directly to a paragraph. + +The formatting properties of a paragraph are accessed using the +|ParagraphFormat| object available using the paragraph's +:attr:`~.Paragraph.paragraph_format` property. + + +Horizontal alignment (justification) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Also known as `justification`, the horizontal alignment of a paragraph can be +set to left, centered, right, or fully justified (aligned on both the left +and right sides) using values from the enumeration +:ref:`WdParagraphAlignment`:: + + >>> from docx.enum.text import WD_ALIGN_PARAGRAPH + >>> document = Document() + >>> paragraph = document.add_paragraph() + >>> paragraph_format = paragraph.paragraph_format + + >>> paragraph_format.alignment + None # indicating alignment is inherited from the style hierarchy + >>> paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER + >>> paragraph_format.alignment + CENTER (1) + + +Indentation +~~~~~~~~~~~ + +Indentation is the horizontal space between a paragraph and edge of its +container, typically the page margin. A paragraph can be indented separately +on the left and right side. The first line can also have a different +indentation than the rest of the paragraph. A first line indented further +than the rest of the paragraph has *first line indent*. A first line indented +less has a *hanging indent*. + +Indentation is specified using a |Length| value, such as |Inches|, |Pt|, or +|Cm|. Negative values are valid and cause the paragraph to overlap the margin +by the specified amount. A value of |None| indicates the indentation value is +inherited from the style hierarchy. Assigning |None| to an indentation +property removes any directly-applied indentation setting and restores +inheritance from the style hierarchy:: + + >>> from docx.shared import Inches + >>> paragraph = document.add_paragraph() + >>> paragraph_format = paragraph.paragraph_format + + >>> paragraph_format.left_indent + None # indicating indentation is inherited from the style hierarchy + >>> paragraph_format.left_indent = Inches(0.5) + >>> paragraph_format.left_indent + 457200 + >>> paragraph_format.left_indent.inches + 0.5 + + +Right-side indent works in a similar way:: + + >>> from docx.shared import Pt + >>> paragraph_format.right_indent + None + >>> paragraph_format.right_indent = Pt(24) + >>> paragraph_format.right_indent + 304800 + >>> paragraph_format.right_indent.pt + 24.0 + + + + +First-line indent is specified using the +:attr:`~.ParagraphFormat.first_line_indent` property and is interpreted +relative to the left indent. A negative value indicates a hanging indent:: + + >>> paragraph_format.first_line_indent + None + >>> paragraph_format.first_line_indent = Inches(-0.25) + >>> paragraph_format.first_line_indent + -228600 + >>> paragraph_format.first_line_indent.inches + -0.25 + + +Tab stops +~~~~~~~~~ + +A tab stop determines the rendering of a tab character in the text of +a paragraph. In particular, it specifies the position where the text +following the tab character will start, how it will be aligned to that +position, and an optional leader character that will fill the horizontal +space spanned by the tab. + +The tab stops for a paragraph or style are contained in a |TabStops| object +accessed using the :attr:`~.ParagraphFormat.tab_stops` property on +|ParagraphFormat|:: + + >>> tab_stops = paragraph_format.tab_stops + >>> tab_stops + + +A new tab stop is added using the :meth:`~.TabStops.add_tab_stop` method:: + + >>> tab_stop = tab_stops.add_tab_stop(Inches(1.5)) + >>> tab_stop.position + 1371600 + >>> tab_stop.position.inches + 1.5 + +Alignment defaults to left, but may be specified by providing a member of the +:ref:`WdTabAlignment` enumeration. The leader character defaults to spaces, +but may be specified by providing a member of the :ref:`WdTabLeader` +enumeration:: + + >>> from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER + >>> tab_stop = tab_stops.add_tab_stop(Inches(1.5), WD_TAB_ALIGNMENT.RIGHT, WD_TAB_LEADER.DOTS) + >>> print(tab_stop.alignment) + RIGHT (2) + >>> print(tab_stop.leader) + DOTS (1) + +Existing tab stops are accessed using sequence semantics on |TabStops|:: + + >>> tab_stops[0] + + +More details are available in the |TabStops| and |TabStop| API documentation + + +Paragraph spacing +~~~~~~~~~~~~~~~~~ + +The :attr:`~.ParagraphFormat.space_before` and +:attr:`~.ParagraphFormat.space_after` properties control the spacing between +subsequent paragraphs, controlling the spacing before and after a paragraph, +respectively. Inter-paragraph spacing is `collapsed` during page layout, +meaning the spacing between two paragraphs is the maximum of the +`space_after` for the first paragraph and the `space_before` of the second +paragraph. Paragraph spacing is specified as a |Length| value, often using +|Pt|:: + + >>> paragraph_format.space_before, paragraph_format.space_after + (None, None) # inherited by default + + >>> paragraph_format.space_before = Pt(18) + >>> paragraph_format.space_before.pt + 18.0 + + >>> paragraph_format.space_after = Pt(12) + >>> paragraph_format.space_after.pt + 12.0 + + +Line spacing +~~~~~~~~~~~~ + +Line spacing is the distance between subsequent baselines in the lines of +a paragraph. Line spacing can be specified either as an absolute distance or +relative to the line height (essentially the point size of the font used). +A typical absolute measure would be 18 points. A typical relative measure +would be double-spaced (2.0 line heights). The default line spacing is +single-spaced (1.0 line heights). + +Line spacing is controlled by the interaction of the +:attr:`~.ParagraphFormat.line_spacing` and +:attr:`~.ParagraphFormat.line_spacing_rule` properties. +:attr:`~.ParagraphFormat.line_spacing` is either a |Length| value, +a (small-ish) |float|, or None. A |Length| value indicates an absolute +distance. A |float| indicates a number of line heights. |None| indicates line +spacing is inherited. :attr:`~.ParagraphFormat.line_spacing_rule` is a member +of the :ref:`WdLineSpacing` enumeration or |None|:: + + >>> from docx.shared import Length + >>> paragraph_format.line_spacing + None + >>> paragraph_format.line_spacing_rule + None + + >>> paragraph_format.line_spacing = Pt(18) + >>> isinstance(paragraph_format.line_spacing, Length) + True + >>> paragraph_format.line_spacing.pt + 18.0 + >>> paragraph_format.line_spacing_rule + EXACTLY (4) + + >>> paragraph_format.line_spacing = 1.75 + >>> paragraph_format.line_spacing + 1.75 + >>> paragraph_format.line_spacing_rule + MULTIPLE (5) + + +Pagination properties +~~~~~~~~~~~~~~~~~~~~~ + +Four paragraph properties, :attr:`~.ParagraphFormat.keep_together`, +:attr:`~.ParagraphFormat.keep_with_next`, +:attr:`~.ParagraphFormat.page_break_before`, and +:attr:`~.ParagraphFormat.widow_control` control aspects of how the paragraph +behaves near page boundaries. + +:attr:`~.ParagraphFormat.keep_together` causes the entire paragraph to appear +on the same page, issuing a page break before the paragraph if it would +otherwise be broken across two pages. + +:attr:`~.ParagraphFormat.keep_with_next` keeps a paragraph on the same page +as the subsequent paragraph. This can be used, for example, to keep a section +heading on the same page as the first paragraph of the section. + +:attr:`~.ParagraphFormat.page_break_before` causes a paragraph to be placed +at the top of a new page. This could be used on a chapter heading to ensure +chapters start on a new page. + +:attr:`~.ParagraphFormat.widow_control` breaks a page to avoid placing the +first or last line of the paragraph on a separate page from the rest of the +paragraph. + +All four of these properties are *tri-state*, meaning they can take the value +|True|, |False|, or |None|. |None| indicates the property value is inherited +from the style hierarchy. |True| means "on" and |False| means "off":: + + >>> paragraph_format.keep_together + None # all four inherit by default + >>> paragraph_format.keep_with_next = True + >>> paragraph_format.keep_with_next + True + >>> paragraph_format.page_break_before = False + >>> paragraph_format.page_break_before + False + + +Apply character formatting +-------------------------- + +Character formatting is applied at the Run level. Examples include font +typeface and size, bold, italic, and underline. + +A |Run| object has a read-only :attr:`~.Run.font` property providing access +to a |Font| object. A run's |Font| object provides properties for getting +and setting the character formatting for that run. + +Several examples are provided here. For a complete set of the available +properties, see the |Font| API documentation. + +The font for a run can be accessed like this:: + + >>> from docx import Document + >>> document = Document() + >>> run = document.add_paragraph().add_run() + >>> font = run.font + +Typeface and size are set like this:: + + >>> from docx.shared import Pt + >>> font.name = 'Calibri' + >>> font.size = Pt(12) + +Many font properties are *tri-state*, meaning they can take the values +|True|, |False|, and |None|. |True| means the property is "on", |False| means +it is "off". Conceptually, the |None| value means "inherit". A run exists in +the style inheritance hierarchy and by default inherits its character +formatting from that hierarchy. Any character formatting directly applied +using the |Font| object overrides the inherited values. + +Bold and italic are tri-state properties, as are all-caps, strikethrough, +superscript, and many others. See the |Font| API documentation for a full +list:: + + >>> font.bold, font.italic + (None, None) + >>> font.italic = True + >>> font.italic + True + >>> font.italic = False + >>> font.italic + False + >>> font.italic = None + >>> font.italic + None + +Underline is a bit of a special case. It is a hybrid of a tri-state property +and an enumerated value property. |True| means single underline, by far the +most common. |False| means no underline, but more often |None| is the right +choice if no underlining is wanted. The other forms of underlining, such as +double or dashed, are specified with a member of the :ref:`WdUnderline` +enumeration:: + + >>> font.underline + None + >>> font.underline = True + >>> # or perhaps + >>> font.underline = WD_UNDERLINE.DOT_DASH + +Font color +~~~~~~~~~~ + +Each |Font| object has a |ColorFormat| object that provides access to its +color, accessed via its read-only :attr:`~.Font.color` property. + +Apply a specific RGB color to a font:: + + >>> from docx.shared import RGBColor + >>> font.color.rgb = RGBColor(0x42, 0x24, 0xE9) + +A font can also be set to a theme color by assigning a member of the +:ref:`MsoThemeColorIndex` enumeration:: + + >>> from docx.enum.dml import MSO_THEME_COLOR + >>> font.color.theme_color = MSO_THEME_COLOR.ACCENT_1 + +A font's color can be restored to its default (inherited) value by assigning +|None| to either the :attr:`~.ColorFormat.rgb` or +:attr:`~.ColorFormat.theme_color` attribute of |ColorFormat|:: + + >>> font.color.rgb = None + +Determining the color of a font begins with determining its color type:: + + >>> font.color.type + RGB (1) + +The value of the :attr:`~.ColorFormat.type` property can be a member of the +:ref:`MsoColorType` enumeration or None. `MSO_COLOR_TYPE.RGB` indicates it is +an RGB color. `MSO_COLOR_TYPE.THEME` indicates a theme color. +`MSO_COLOR_TYPE.AUTO` indicates its value is determined automatically by the +application, usually set to black. (This value is relatively rare.) |None| +indicates no color is applied and the color is inherited from the style +hierarchy; this is the most common case. + +When the color type is `MSO_COLOR_TYPE.RGB`, the :attr:`~.ColorFormat.rgb` +property will be an |RGBColor| value indicating the RGB color:: -A paragraph contains one or more inline elements called *runs*. It is the -run that actually contains text content. + >>> font.color.rgb + RGBColor(0x42, 0x24, 0xe9) -The main purpose of a run it to carry character formatting information, such as -font typeface and size. Bold, italic, and underline formatting are also -examples. All text within a run shares the same character formatting. So -a three-word paragraph having the middle word bold would require three runs. +When the color type is `MSO_COLOR_TYPE.THEME`, the +:attr:`~.ColorFormat.theme_color` property will be a member of +:ref:`MsoThemeColorIndex` indicating the theme color:: -Producing paragraphs containing so-called "rich" text requires building the -paragraph up out of multiple runs. Runs can also contain other content objects -such as line breaks and fields, so there are other reasons you may need to use -the low-level text API. + >>> font.color.theme_color + ACCENT_1 (5) diff --git a/docx/__init__.py b/docx/__init__.py deleted file mode 100644 index 4e4fdfda0..000000000 --- a/docx/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# encoding: utf-8 - -from docx.api import Document # noqa - -__version__ = '0.7.4' - - -# register custom Part classes with opc package reader - -from docx.opc.constants import CONTENT_TYPE as CT, RELATIONSHIP_TYPE as RT -from docx.opc.package import PartFactory - -from docx.parts.document import DocumentPart -from docx.parts.image import ImagePart -from docx.parts.numbering import NumberingPart -from docx.parts.styles import StylesPart - - -def part_class_selector(content_type, reltype): - if reltype == RT.IMAGE: - return ImagePart - return None - - -PartFactory.part_class_selector = part_class_selector -PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart -PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart -PartFactory.part_type_for[CT.WML_STYLES] = StylesPart - -del CT, DocumentPart, PartFactory, part_class_selector diff --git a/docx/api.py b/docx/api.py deleted file mode 100644 index c1ac093b7..000000000 --- a/docx/api.py +++ /dev/null @@ -1,190 +0,0 @@ -# encoding: utf-8 - -""" -Directly exposed API functions and classes, :func:`Document` for now. -Provides a syntactically more convenient API for interacting with the -OpcPackage graph. -""" - -from __future__ import absolute_import, division, print_function - -import os - -from docx.enum.section import WD_SECTION -from docx.enum.text import WD_BREAK -from docx.opc.constants import CONTENT_TYPE as CT, RELATIONSHIP_TYPE as RT -from docx.package import Package -from docx.parts.numbering import NumberingPart -from docx.parts.styles import StylesPart -from docx.shared import lazyproperty - - -_thisdir = os.path.split(__file__)[0] -_default_docx_path = os.path.join(_thisdir, 'templates', 'default.docx') - - -class Document(object): - """ - Return a |Document| instance loaded from *docx*, where *docx* can be - either a path to a ``.docx`` file (a string) or a file-like object. If - *docx* is missing or ``None``, the built-in default document "template" - is loaded. - """ - def __init__(self, docx=None): - super(Document, self).__init__() - document_part, package = self._open(docx) - self._document_part = document_part - self._package = package - - def add_heading(self, text='', level=1): - """ - Return a heading paragraph newly added to the end of the document, - populated with *text* and having the heading paragraph style - determined by *level*. If *level* is 0, the style is set to - ``'Title'``. If *level* is 1 (or not present), ``'Heading1'`` is used. - Otherwise the style is set to ``'Heading{level}'``. If *level* is - outside the range 0-9, |ValueError| is raised. - """ - if not 0 <= level <= 9: - raise ValueError("level must be in range 0-9, got %d" % level) - style = 'Title' if level == 0 else 'Heading%d' % level - return self.add_paragraph(text, style) - - def add_page_break(self): - """ - Return a paragraph newly added to the end of the document and - containing only a page break. - """ - p = self._document_part.add_paragraph() - r = p.add_run() - r.add_break(WD_BREAK.PAGE) - return p - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the document, populated - with *text* and having paragraph style *style*. *text* can contain - tab (``\\t``) characters, which are converted to the appropriate XML - form for a tab. *text* can also include newline (``\\n``) or carriage - return (``\\r``) characters, each of which is converted to a line - break. - """ - return self._document_part.add_paragraph(text, style) - - def add_picture(self, image_path_or_stream, width=None, height=None): - """ - Return a new picture shape added in its own paragraph at the end of - the document. The picture contains the image at - *image_path_or_stream*, scaled based on *width* and *height*. If - neither width nor height is specified, the picture appears at its - native size. If only one is specified, it is used to compute - a scaling factor that is then applied to the unspecified dimension, - preserving the aspect ratio of the image. The native size of the - picture is calculated using the dots-per-inch (dpi) value specified - in the image file, defaulting to 72 dpi if no value is specified, as - is often the case. - """ - run = self.add_paragraph().add_run() - picture = run.add_picture(image_path_or_stream, width, height) - return picture - - def add_section(self, start_type=WD_SECTION.NEW_PAGE): - """ - Return a |Section| object representing a new section added at the end - of the document. The optional *start_type* argument must be a member - of the :ref:`WdSectionStart` enumeration defaulting to - ``WD_SECTION.NEW_PAGE`` if not provided. - """ - return self._document_part.add_section(start_type) - - def add_table(self, rows, cols, style='LightShading-Accent1'): - """ - Add a table having row and column counts of *rows* and *cols* - respectively and table style of *style*. If *style* is |None|, a - table with no style is produced. - """ - table = self._document_part.add_table(rows, cols) - if style: - table.style = style - return table - - @property - def inline_shapes(self): - """ - Return a reference to the |InlineShapes| instance for this document. - """ - return self._document_part.inline_shapes - - @lazyproperty - def numbering_part(self): - """ - Instance of |NumberingPart| for this document. Creates an empty - numbering part if one is not present. - """ - try: - return self._document_part.part_related_by(RT.NUMBERING) - except KeyError: - numbering_part = NumberingPart.new() - self._document_part.relate_to(numbering_part, RT.NUMBERING) - return numbering_part - - @property - def paragraphs(self): - """ - A list of |Paragraph| instances corresponding to the paragraphs in - the document, in document order. Note that paragraphs within revision - marks such as ```` or ```` do not appear in this list. - """ - return self._document_part.paragraphs - - def save(self, path_or_stream): - """ - Save this document to *path_or_stream*, which can be either a path to - a filesystem location (a string) or a file-like object. - """ - self._package.save(path_or_stream) - - @property - def sections(self): - """ - Return a reference to the |Sections| instance for this document. - """ - return self._document_part.sections - - @lazyproperty - def styles_part(self): - """ - Instance of |StylesPart| for this document. Creates an empty styles - part if one is not present. - """ - try: - return self._document_part.part_related_by(RT.STYLES) - except KeyError: - styles_part = StylesPart.new() - self._document_part.relate_to(styles_part, RT.STYLES) - return styles_part - - @property - def tables(self): - """ - A list of |Table| instances corresponding to the tables in the - document, in document order. Note that tables within revision marks - such as ```` or ```` do not appear in this list. - """ - return self._document_part.tables - - @staticmethod - def _open(docx): - """ - Return a (document_part, package) 2-tuple loaded from *docx*, where - *docx* can be either a path to a ``.docx`` file (a string) or a - file-like object. If *docx* is ``None``, the built-in default - document "template" is loaded. - """ - docx = _default_docx_path if docx is None else docx - package = Package.open(docx) - document_part = package.main_document - if document_part.content_type != CT.WML_DOCUMENT_MAIN: - tmpl = "file '%s' is not a Word file, content type is '%s'" - raise ValueError(tmpl % (docx, document_part.content_type)) - return document_part, package diff --git a/docx/blkcntnr.py b/docx/blkcntnr.py deleted file mode 100644 index b11f3a50d..000000000 --- a/docx/blkcntnr.py +++ /dev/null @@ -1,70 +0,0 @@ -# encoding: utf-8 - -""" -Block item container, used by body, cell, header, etc. Block level items are -things like paragraph and table, although there are a few other specialized -ones like structured document tags. -""" - -from __future__ import absolute_import, print_function - -from .shared import Parented -from .text import Paragraph - - -class BlockItemContainer(Parented): - """ - Base class for proxy objects that can contain block items, such as _Body, - _Cell, header, footer, footnote, endnote, comment, and text box objects. - Provides the shared functionality to add a block item like a paragraph or - table. - """ - def __init__(self, element, parent): - super(BlockItemContainer, self).__init__(parent) - self._element = element - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the content in this - container, having *text* in a single run if present, and having - paragraph style *style*. If *style* is |None|, no paragraph style is - applied, which has the same effect as applying the 'Normal' style. - """ - p = self._element.add_p() - paragraph = Paragraph(p, self) - if text: - paragraph.add_run(text) - if style is not None: - paragraph.style = style - return paragraph - - def add_table(self, rows, cols): - """ - Return a newly added table having *rows* rows and *cols* cols, - appended to the content in this container. - """ - from .table import Table - tbl = self._element.add_tbl() - table = Table(tbl, self) - for i in range(cols): - table.add_column() - for i in range(rows): - table.add_row() - return table - - @property - def paragraphs(self): - """ - A list containing the paragraphs in this container, in document - order. Read-only. - """ - return [Paragraph(p, self) for p in self._element.p_lst] - - @property - def tables(self): - """ - A list containing the tables in this container, in document order. - Read-only. - """ - from .table import Table - return [Table(tbl, self) for tbl in self._element.tbl_lst] diff --git a/docx/compat.py b/docx/compat.py deleted file mode 100644 index dc9e20e39..000000000 --- a/docx/compat.py +++ /dev/null @@ -1,43 +0,0 @@ -# encoding: utf-8 - -""" -Provides Python 2/3 compatibility objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import sys - -# =========================================================================== -# Python 3 versions -# =========================================================================== - -if sys.version_info >= (3, 0): - - from io import BytesIO - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, str) - - Unicode = str - -# =========================================================================== -# Python 2 versions -# =========================================================================== - -else: - - from StringIO import StringIO as BytesIO # noqa - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, basestring) - - Unicode = unicode diff --git a/docx/enum/__init__.py b/docx/enum/__init__.py deleted file mode 100644 index dd49faafd..000000000 --- a/docx/enum/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations used in python-docx -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class Enumeration(object): - - @classmethod - def from_xml(cls, xml_val): - return cls._xml_to_idx[xml_val] - - @classmethod - def to_xml(cls, enum_val): - return cls._idx_to_xml[enum_val] diff --git a/docx/enum/base.py b/docx/enum/base.py deleted file mode 100644 index aad44e9c8..000000000 --- a/docx/enum/base.py +++ /dev/null @@ -1,360 +0,0 @@ -# encoding: utf-8 - -""" -Base classes and other objects used by enumerations -""" - -from __future__ import absolute_import, print_function - -import sys -import textwrap - -from ..exceptions import InvalidXmlError - - -def alias(*aliases): - """ - Decorating a class with @alias('FOO', 'BAR', ..) allows the class to - be referenced by each of the names provided as arguments. - """ - def decorator(cls): - # alias must be set in globals from caller's frame - caller = sys._getframe(1) - globals_dict = caller.f_globals - for alias in aliases: - globals_dict[alias] = cls - return cls - return decorator - - -class _DocsPageFormatter(object): - """ - Formats a RestructuredText documention page (string) for the enumeration - class parts passed to the constructor. An immutable one-shot service - object. - """ - def __init__(self, clsname, clsdict): - self._clsname = clsname - self._clsdict = clsdict - - @property - def page_str(self): - """ - The RestructuredText documentation page for the enumeration. This is - the only API member for the class. - """ - tmpl = '.. _%s:\n\n%s\n\n%s\n\n----\n\n%s' - components = ( - self._ms_name, self._page_title, self._intro_text, - self._member_defs - ) - return tmpl % components - - @property - def _intro_text(self): - """ - The docstring of the enumeration, formatted for use at the top of the - documentation page - """ - try: - cls_docstring = self._clsdict['__doc__'] - except KeyError: - cls_docstring = '' - return textwrap.dedent(cls_docstring).strip() - - def _member_def(self, member): - """ - Return an individual member definition formatted as an RST glossary - entry, wrapped to fit within 78 columns. - """ - member_docstring = textwrap.dedent(member.docstring).strip() - member_docstring = textwrap.fill( - member_docstring, width=78, initial_indent=' '*4, - subsequent_indent=' '*4 - ) - return '%s\n%s\n' % (member.name, member_docstring) - - @property - def _member_defs(self): - """ - A single string containing the aggregated member definitions section - of the documentation page - """ - members = self._clsdict['__members__'] - member_defs = [ - self._member_def(member) for member in members - if member.name is not None - ] - return '\n'.join(member_defs) - - @property - def _ms_name(self): - """ - The Microsoft API name for this enumeration - """ - return self._clsdict['__ms_name__'] - - @property - def _page_title(self): - """ - The title for the documentation page, formatted as code (surrounded - in double-backtics) and underlined with '=' characters - """ - title_underscore = '=' * (len(self._clsname)+4) - return '``%s``\n%s' % (self._clsname, title_underscore) - - -class MetaEnumeration(type): - """ - The metaclass for Enumeration and its subclasses. Adds a name for each - named member and compiles state needed by the enumeration class to - respond to other attribute gets - """ - def __new__(meta, clsname, bases, clsdict): - meta._add_enum_members(clsdict) - meta._collect_valid_settings(clsdict) - meta._generate_docs_page(clsname, clsdict) - return type.__new__(meta, clsname, bases, clsdict) - - @classmethod - def _add_enum_members(meta, clsdict): - """ - Dispatch ``.add_to_enum()`` call to each member so it can do its - thing to properly add itself to the enumeration class. This - delegation allows member sub-classes to add specialized behaviors. - """ - enum_members = clsdict['__members__'] - for member in enum_members: - member.add_to_enum(clsdict) - - @classmethod - def _collect_valid_settings(meta, clsdict): - """ - Return a sequence containing the enumeration values that are valid - assignment values. Return-only values are excluded. - """ - enum_members = clsdict['__members__'] - valid_settings = [] - for member in enum_members: - valid_settings.extend(member.valid_settings) - clsdict['_valid_settings'] = valid_settings - - @classmethod - def _generate_docs_page(meta, clsname, clsdict): - """ - Return the RST documentation page for the enumeration. - """ - clsdict['__docs_rst__'] = ( - _DocsPageFormatter(clsname, clsdict).page_str - ) - - -class EnumerationBase(object): - """ - Base class for all enumerations, used directly for enumerations requiring - only basic behavior. It's __dict__ is used below in the Python 2+3 - compatible metaclass definition. - """ - __members__ = () - __ms_name__ = '' - - @classmethod - def validate(cls, value): - """ - Raise |ValueError| if *value* is not an assignable value. - """ - if value not in cls._valid_settings: - raise ValueError( - "%s not a member of %s enumeration" % (value, cls.__name__) - ) - - -Enumeration = MetaEnumeration( - 'Enumeration', (object,), dict(EnumerationBase.__dict__) -) - - -class XmlEnumeration(Enumeration): - """ - Provides ``to_xml()`` and ``from_xml()`` methods in addition to base - enumeration features - """ - __members__ = () - __ms_name__ = '' - - @classmethod - def from_xml(cls, xml_val): - """ - Return the enumeration member corresponding to the XML value - *xml_val*. - """ - if xml_val not in cls._xml_to_member: - raise InvalidXmlError( - "attribute value '%s' not valid for this type" % xml_val - ) - return cls._xml_to_member[xml_val] - - @classmethod - def to_xml(cls, enum_val): - """ - Return the XML value of the enumeration value *enum_val*. - """ - if enum_val not in cls._member_to_xml: - raise ValueError( - "value '%s' not in enumeration %s" % (enum_val, cls.__name__) - ) - return cls._member_to_xml[enum_val] - - -class EnumMember(object): - """ - Used in the enumeration class definition to define a member value and its - mappings - """ - def __init__(self, name, value, docstring): - self._name = name - if isinstance(value, int): - value = EnumValue(name, value, docstring) - self._value = value - self._docstring = docstring - - def add_to_enum(self, clsdict): - """ - Add a name to *clsdict* for this member. - """ - self.register_name(clsdict) - - @property - def docstring(self): - """ - The description of this member - """ - return self._docstring - - @property - def name(self): - """ - The distinguishing name of this member within the enumeration class, - e.g. 'MIDDLE' for MSO_VERTICAL_ANCHOR.MIDDLE, if this is a named - member. Otherwise the primitive value such as |None|, |True| or - |False|. - """ - return self._name - - def register_name(self, clsdict): - """ - Add a member name to the class dict *clsdict* containing the value of - this member object. Where the name of this object is None, do - nothing; this allows out-of-band values to be defined without adding - a name to the class dict. - """ - if self.name is None: - return - clsdict[self.name] = self.value - - @property - def valid_settings(self): - """ - A sequence containing the values valid for assignment for this - member. May be zero, one, or more in number. - """ - return (self._value,) - - @property - def value(self): - """ - The enumeration value for this member, often an instance of - EnumValue, but may be a primitive value such as |None|. - """ - return self._value - - -class EnumValue(int): - """ - A named enumeration value, providing __str__ and __doc__ string values - for its symbolic name and description, respectively. Subclasses int, so - behaves as a regular int unless the strings are asked for. - """ - def __new__(cls, member_name, int_value, docstring): - return super(EnumValue, cls).__new__(cls, int_value) - - def __init__(self, member_name, int_value, docstring): - super(EnumValue, self).__init__() - self._member_name = member_name - self._docstring = docstring - - @property - def __doc__(self): - """ - The description of this enumeration member - """ - return self._docstring.strip() - - def __str__(self): - """ - The symbolic name and string value of this member, e.g. 'MIDDLE (3)' - """ - return "%s (%d)" % (self._member_name, int(self)) - - -class ReturnValueOnlyEnumMember(EnumMember): - """ - Used to define a member of an enumeration that is only valid as a query - result and is not valid as a setting, e.g. MSO_VERTICAL_ANCHOR.MIXED (-2) - """ - @property - def valid_settings(self): - """ - No settings are valid for a return-only value. - """ - return () - - -class XmlMappedEnumMember(EnumMember): - """ - Used to define a member whose value maps to an XML attribute value. - """ - def __init__(self, name, value, xml_value, docstring): - super(XmlMappedEnumMember, self).__init__(name, value, docstring) - self._xml_value = xml_value - - def add_to_enum(self, clsdict): - """ - Compile XML mappings in addition to base add behavior. - """ - super(XmlMappedEnumMember, self).add_to_enum(clsdict) - self.register_xml_mapping(clsdict) - - def register_xml_mapping(self, clsdict): - """ - Add XML mappings to the enumeration class state for this member. - """ - member_to_xml = self._get_or_add_member_to_xml(clsdict) - member_to_xml[self.value] = self.xml_value - xml_to_member = self._get_or_add_xml_to_member(clsdict) - xml_to_member[self.xml_value] = self.value - - @property - def xml_value(self): - """ - The XML attribute value that corresponds to this enumeration value - """ - return self._xml_value - - @staticmethod - def _get_or_add_member_to_xml(clsdict): - """ - Add the enum -> xml value mapping to the enumeration class state - """ - if '_member_to_xml' not in clsdict: - clsdict['_member_to_xml'] = dict() - return clsdict['_member_to_xml'] - - @staticmethod - def _get_or_add_xml_to_member(clsdict): - """ - Add the xml -> enum value mapping to the enumeration class state - """ - if '_xml_to_member' not in clsdict: - clsdict['_xml_to_member'] = dict() - return clsdict['_xml_to_member'] diff --git a/docx/enum/section.py b/docx/enum/section.py deleted file mode 100644 index b16ddbe72..000000000 --- a/docx/enum/section.py +++ /dev/null @@ -1,76 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to the main document in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .base import alias, XmlEnumeration, XmlMappedEnumMember - - -@alias('WD_ORIENT') -class WD_ORIENTATION(XmlEnumeration): - """ - alias: **WD_ORIENT** - - Specifies the page layout orientation. - - Example:: - - from docx.enum.section import WD_ORIENT - - section = document.sections[-1] - section.orientation = WD_ORIENT.LANDSCAPE - """ - - __ms_name__ = 'WdOrientation' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff837902.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'PORTRAIT', 0, 'portrait', 'Portrait orientation.' - ), - XmlMappedEnumMember( - 'LANDSCAPE', 1, 'landscape', 'Landscape orientation.' - ), - ) - - -@alias('WD_SECTION') -class WD_SECTION_START(XmlEnumeration): - """ - alias: **WD_SECTION** - - Specifies the start type of a section break. - - Example:: - - from docx.enum.section import WD_SECTION - - section = document.sections[0] - section.start_type = WD_SECTION.NEW_PAGE - """ - - __ms_name__ = 'WdSectionStart' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff840975.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'CONTINUOUS', 0, 'continuous', 'Continuous section break.' - ), - XmlMappedEnumMember( - 'NEW_COLUMN', 1, 'nextColumn', 'New column section break.' - ), - XmlMappedEnumMember( - 'NEW_PAGE', 2, 'nextPage', 'New page section break.' - ), - XmlMappedEnumMember( - 'EVEN_PAGE', 3, 'evenPage', 'Even pages section break.' - ), - XmlMappedEnumMember( - 'ODD_PAGE', 4, 'oddPage', 'Section begins on next odd page.' - ), - ) diff --git a/docx/enum/shape.py b/docx/enum/shape.py deleted file mode 100644 index f1d6ffd8c..000000000 --- a/docx/enum/shape.py +++ /dev/null @@ -1,21 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to DrawingML shapes in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class WD_INLINE_SHAPE_TYPE(object): - """ - Corresponds to WdInlineShapeType enumeration - http://msdn.microsoft.com/en-us/library/office/ff192587.aspx - """ - CHART = 12 - LINKED_PICTURE = 4 - PICTURE = 3 - SMART_ART = 15 - NOT_IMPLEMENTED = -6 - -WD_INLINE_SHAPE = WD_INLINE_SHAPE_TYPE diff --git a/docx/enum/text.py b/docx/enum/text.py deleted file mode 100644 index 713597fc6..000000000 --- a/docx/enum/text.py +++ /dev/null @@ -1,166 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to text in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .base import alias, XmlEnumeration, XmlMappedEnumMember - - -@alias('WD_ALIGN_PARAGRAPH') -class WD_PARAGRAPH_ALIGNMENT(XmlEnumeration): - """ - alias: **WD_ALIGN_PARAGRAPH** - - Specifies paragraph justification type. - - Example:: - - from docx.enum.text import WD_ALIGN_PARAGRAPH - - paragraph = document.add_paragraph() - paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER - """ - - __ms_name__ = 'WdParagraphAlignment' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff835817.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'LEFT', 0, 'left', 'Left-aligned' - ), - XmlMappedEnumMember( - 'CENTER', 1, 'center', 'Center-aligned.' - ), - XmlMappedEnumMember( - 'RIGHT', 2, 'right', 'Right-aligned.' - ), - XmlMappedEnumMember( - 'JUSTIFY', 3, 'both', 'Fully justified.' - ), - XmlMappedEnumMember( - 'DISTRIBUTE', 4, 'distribute', 'Paragraph characters are distrib' - 'uted to fill the entire width of the paragraph.' - ), - XmlMappedEnumMember( - 'JUSTIFY_MED', 5, 'mediumKashida', 'Justified with a medium char' - 'acter compression ratio.' - ), - XmlMappedEnumMember( - 'JUSTIFY_HI', 7, 'highKashida', 'Justified with a high character' - ' compression ratio.' - ), - XmlMappedEnumMember( - 'JUSTIFY_LOW', 8, 'lowKashida', 'Justified with a low character ' - 'compression ratio.' - ), - XmlMappedEnumMember( - 'THAI_JUSTIFY', 9, 'thaiDistribute', 'Justified according to Tha' - 'i formatting layout.' - ), - ) - - -class WD_BREAK_TYPE(object): - """ - Corresponds to WdBreakType enumeration - http://msdn.microsoft.com/en-us/library/office/ff195905.aspx - """ - COLUMN = 8 - LINE = 6 - LINE_CLEAR_LEFT = 9 - LINE_CLEAR_RIGHT = 10 - LINE_CLEAR_ALL = 11 # added for consistency, not in MS version - PAGE = 7 - SECTION_CONTINUOUS = 3 - SECTION_EVEN_PAGE = 4 - SECTION_NEXT_PAGE = 2 - SECTION_ODD_PAGE = 5 - TEXT_WRAPPING = 11 - -WD_BREAK = WD_BREAK_TYPE - - -class WD_UNDERLINE(XmlEnumeration): - """ - Specifies the style of underline applied to a run of characters. - """ - - __ms_name__ = 'WdUnderline' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff822388.aspx' - - __members__ = ( - XmlMappedEnumMember( - None, None, None, 'Inherit underline setting from containing par' - 'agraph.' - ), - XmlMappedEnumMember( - 'NONE', 0, 'none', 'No underline. This setting overrides any inh' - 'erited underline value, so can be used to remove underline from' - ' a run that inherits underlining from its containing paragraph.' - ' Note this is not the same as assigning |None| to Run.underline' - '. |None| is a valid assignment value, but causes the run to inh' - 'erit its underline value. Assigning ``WD_UNDERLINE.NONE`` cause' - 's underlining to be unconditionally turned off.' - ), - XmlMappedEnumMember( - 'SINGLE', 1, 'single', 'A single line. Note that this setting is' - 'write-only in the sense that |True| (rather than ``WD_UNDERLINE' - '.SINGLE``) is returned for a run having this setting.' - ), - XmlMappedEnumMember( - 'WORDS', 2, 'words', 'Underline individual words only.' - ), - XmlMappedEnumMember( - 'DOUBLE', 3, 'double', 'A double line.' - ), - XmlMappedEnumMember( - 'DOTTED', 4, 'dotted', 'Dots.' - ), - XmlMappedEnumMember( - 'THICK', 6, 'thick', 'A single thick line.' - ), - XmlMappedEnumMember( - 'DASH', 7, 'dash', 'Dashes.' - ), - XmlMappedEnumMember( - 'DOT_DASH', 9, 'dotDash', 'Alternating dots and dashes.' - ), - XmlMappedEnumMember( - 'DOT_DOT_DASH', 10, 'dotDotDash', 'An alternating dot-dot-dash p' - 'attern.' - ), - XmlMappedEnumMember( - 'WAVY', 11, 'wave', 'A single wavy line.' - ), - XmlMappedEnumMember( - 'DOTTED_HEAVY', 20, 'dottedHeavy', 'Heavy dots.' - ), - XmlMappedEnumMember( - 'DASH_HEAVY', 23, 'dashedHeavy', 'Heavy dashes.' - ), - XmlMappedEnumMember( - 'DOT_DASH_HEAVY', 25, 'dashDotHeavy', 'Alternating heavy dots an' - 'd heavy dashes.' - ), - XmlMappedEnumMember( - 'DOT_DOT_DASH_HEAVY', 26, 'dashDotDotHeavy', 'An alternating hea' - 'vy dot-dot-dash pattern.' - ), - XmlMappedEnumMember( - 'WAVY_HEAVY', 27, 'wavyHeavy', 'A heavy wavy line.' - ), - XmlMappedEnumMember( - 'DASH_LONG', 39, 'dashLong', 'Long dashes.' - ), - XmlMappedEnumMember( - 'WAVY_DOUBLE', 43, 'wavyDouble', 'A double wavy line.' - ), - XmlMappedEnumMember( - 'DASH_LONG_HEAVY', 55, 'dashLongHeavy', 'Long heavy dashes.' - ), - ) diff --git a/docx/exceptions.py b/docx/exceptions.py deleted file mode 100644 index 00215615b..000000000 --- a/docx/exceptions.py +++ /dev/null @@ -1,20 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions used with python-docx. - -The base exception class is PythonDocxError. -""" - - -class PythonDocxError(Exception): - """ - Generic error class. - """ - - -class InvalidXmlError(PythonDocxError): - """ - Raised when invalid XML is encountered, such as on attempt to access a - missing required child element - """ diff --git a/docx/image/__init__.py b/docx/image/__init__.py deleted file mode 100644 index 8ab3ada68..000000000 --- a/docx/image/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# encoding: utf-8 - -""" -Provides objects that can characterize image streams as to content type and -size, as a required step in including them in a document. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from docx.image.bmp import Bmp -from docx.image.gif import Gif -from docx.image.jpeg import Exif, Jfif -from docx.image.png import Png -from docx.image.tiff import Tiff - - -SIGNATURES = ( - # class, offset, signature_bytes - (Png, 0, b'\x89PNG\x0D\x0A\x1A\x0A'), - (Jfif, 6, b'JFIF'), - (Exif, 6, b'Exif'), - (Gif, 0, b'GIF87a'), - (Gif, 0, b'GIF89a'), - (Tiff, 0, b'MM\x00*'), # big-endian (Motorola) TIFF - (Tiff, 0, b'II*\x00'), # little-endian (Intel) TIFF - (Bmp, 0, b'BM'), -) diff --git a/docx/image/constants.py b/docx/image/constants.py deleted file mode 100644 index 90b469705..000000000 --- a/docx/image/constants.py +++ /dev/null @@ -1,169 +0,0 @@ -# encoding: utf-8 - -""" -Constants specific the the image sub-package -""" - - -class JPEG_MARKER_CODE(object): - """ - JPEG marker codes - """ - TEM = b'\x01' - DHT = b'\xC4' - DAC = b'\xCC' - JPG = b'\xC8' - - SOF0 = b'\xC0' - SOF1 = b'\xC1' - SOF2 = b'\xC2' - SOF3 = b'\xC3' - SOF5 = b'\xC5' - SOF6 = b'\xC6' - SOF7 = b'\xC7' - SOF9 = b'\xC9' - SOFA = b'\xCA' - SOFB = b'\xCB' - SOFD = b'\xCD' - SOFE = b'\xCE' - SOFF = b'\xCF' - - RST0 = b'\xD0' - RST1 = b'\xD1' - RST2 = b'\xD2' - RST3 = b'\xD3' - RST4 = b'\xD4' - RST5 = b'\xD5' - RST6 = b'\xD6' - RST7 = b'\xD7' - - SOI = b'\xD8' - EOI = b'\xD9' - SOS = b'\xDA' - DQT = b'\xDB' # Define Quantization Table(s) - DNL = b'\xDC' - DRI = b'\xDD' - DHP = b'\xDE' - EXP = b'\xDF' - - APP0 = b'\xE0' - APP1 = b'\xE1' - APP2 = b'\xE2' - APP3 = b'\xE3' - APP4 = b'\xE4' - APP5 = b'\xE5' - APP6 = b'\xE6' - APP7 = b'\xE7' - APP8 = b'\xE8' - APP9 = b'\xE9' - APPA = b'\xEA' - APPB = b'\xEB' - APPC = b'\xEC' - APPD = b'\xED' - APPE = b'\xEE' - APPF = b'\xEF' - - STANDALONE_MARKERS = ( - TEM, SOI, EOI, RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7 - ) - - SOF_MARKER_CODES = ( - SOF0, SOF1, SOF2, SOF3, SOF5, SOF6, SOF7, SOF9, SOFA, SOFB, SOFD, - SOFE, SOFF - ) - - marker_names = { - b'\x00': 'UNKNOWN', - b'\xC0': 'SOF0', - b'\xC2': 'SOF2', - b'\xC4': 'DHT', - b'\xDA': 'SOS', # start of scan - b'\xD8': 'SOI', # start of image - b'\xD9': 'EOI', # end of image - b'\xDB': 'DQT', - b'\xE0': 'APP0', - b'\xE1': 'APP1', - b'\xE2': 'APP2', - b'\xED': 'APP13', - b'\xEE': 'APP14', - } - - @classmethod - def is_standalone(cls, marker_code): - return marker_code in cls.STANDALONE_MARKERS - - -class MIME_TYPE(object): - """ - Image content types - """ - BMP = 'image/bmp' - GIF = 'image/gif' - JPEG = 'image/jpeg' - PNG = 'image/png' - TIFF = 'image/tiff' - - -class PNG_CHUNK_TYPE(object): - """ - PNG chunk type names - """ - IHDR = 'IHDR' - pHYs = 'pHYs' - IEND = 'IEND' - - -class TIFF_FLD_TYPE(object): - """ - Tag codes for TIFF Image File Directory (IFD) entries. - """ - BYTE = 1 - ASCII = 2 - SHORT = 3 - LONG = 4 - RATIONAL = 5 - - field_type_names = { - 1: 'BYTE', 2: 'ASCII char', 3: 'SHORT', 4: 'LONG', - 5: 'RATIONAL' - } - - -TIFF_FLD = TIFF_FLD_TYPE - - -class TIFF_TAG(object): - """ - Tag codes for TIFF Image File Directory (IFD) entries. - """ - IMAGE_WIDTH = 0x0100 - IMAGE_LENGTH = 0x0101 - X_RESOLUTION = 0x011A - Y_RESOLUTION = 0x011B - RESOLUTION_UNIT = 0x0128 - - tag_names = { - 0x00FE: 'NewSubfileType', - 0x0100: 'ImageWidth', - 0x0101: 'ImageLength', - 0x0102: 'BitsPerSample', - 0x0103: 'Compression', - 0x0106: 'PhotometricInterpretation', - 0x010E: 'ImageDescription', - 0x010F: 'Make', - 0x0110: 'Model', - 0x0111: 'StripOffsets', - 0x0112: 'Orientation', - 0x0115: 'SamplesPerPixel', - 0x0117: 'StripByteCounts', - 0x011A: 'XResolution', - 0x011B: 'YResolution', - 0x011C: 'PlanarConfiguration', - 0x0128: 'ResolutionUnit', - 0x0131: 'Software', - 0x0132: 'DateTime', - 0x0213: 'YCbCrPositioning', - 0x8769: 'ExifTag', - 0x8825: 'GPS IFD', - 0xC4A5: 'PrintImageMatching', - } diff --git a/docx/image/exceptions.py b/docx/image/exceptions.py deleted file mode 100644 index f233edc4e..000000000 --- a/docx/image/exceptions.py +++ /dev/null @@ -1,23 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions specific the the image sub-package -""" - - -class InvalidImageStreamError(Exception): - """ - The recognized image stream appears to be corrupted - """ - - -class UnexpectedEndOfFileError(Exception): - """ - EOF was unexpectedly encountered while reading an image stream. - """ - - -class UnrecognizedImageError(Exception): - """ - The provided image stream could not be recognized. - """ diff --git a/docx/image/gif.py b/docx/image/gif.py deleted file mode 100644 index 57f037d80..000000000 --- a/docx/image/gif.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -from __future__ import absolute_import, division, print_function - -from struct import Struct - -from .constants import MIME_TYPE -from .image import BaseImageHeader - - -class Gif(BaseImageHeader): - """ - Image header parser for GIF images. Note that the GIF format does not - support resolution (DPI) information. Both horizontal and vertical DPI - default to 72. - """ - @classmethod - def from_stream(cls, stream): - """ - Return |Gif| instance having header properties parsed from GIF image - in *stream*. - """ - px_width, px_height = cls._dimensions_from_stream(stream) - return cls(px_width, px_height, 72, 72) - - @property - def content_type(self): - """ - MIME content type for this image, unconditionally `image/gif` for - GIF images. - """ - return MIME_TYPE.GIF - - @property - def default_ext(self): - """ - Default filename extension, always 'gif' for GIF images. - """ - return 'gif' - - @classmethod - def _dimensions_from_stream(cls, stream): - stream.seek(6) - bytes_ = stream.read(4) - struct = Struct('L' - return self._read_int(fmt, base, offset) - - def read_short(self, base, offset=0): - """ - Return the int value of the two bytes at the file position determined - by *base* and *offset*, similarly to ``read_long()`` above. - """ - fmt = b'H' - return self._read_int(fmt, base, offset) - - def read_str(self, char_count, base, offset=0): - """ - Return a string containing the *char_count* bytes at the file - position determined by self._base_offset + *base* + *offset*. - """ - def str_struct(char_count): - format_ = '%ds' % char_count - return Struct(format_) - struct = str_struct(char_count) - chars = self._unpack_item(struct, base, offset) - unicode_str = chars.decode('UTF-8') - return unicode_str - - def seek(self, base, offset=0): - location = self._base_offset + base + offset - self._stream.seek(location) - - def tell(self): - """ - Allow pass-through tell() call - """ - return self._stream.tell() - - def _read_bytes(self, byte_count, base, offset): - self.seek(base, offset) - bytes_ = self._stream.read(byte_count) - if len(bytes_) < byte_count: - raise UnexpectedEndOfFileError - return bytes_ - - def _read_int(self, fmt, base, offset): - struct = Struct(fmt) - return self._unpack_item(struct, base, offset) - - def _unpack_item(self, struct, base, offset): - bytes_ = self._read_bytes(struct.size, base, offset) - return struct.unpack(bytes_)[0] diff --git a/docx/image/image.py b/docx/image/image.py deleted file mode 100644 index 692ea5860..000000000 --- a/docx/image/image.py +++ /dev/null @@ -1,220 +0,0 @@ -# encoding: utf-8 - -""" -Provides objects that can characterize image streams as to content type and -size, as a required step in including them in a document. -""" - -from __future__ import absolute_import, division, print_function - -import hashlib -import os - -from ..compat import BytesIO, is_string -from ..shared import lazyproperty -from .exceptions import UnrecognizedImageError - - -class Image(object): - """ - Graphical image stream such as JPEG, PNG, or GIF with properties and - methods required by ImagePart. - """ - def __init__(self, blob, filename, image_header): - super(Image, self).__init__() - self._blob = blob - self._filename = filename - self._image_header = image_header - - @classmethod - def from_blob(cls, blob): - """ - Return a new |Image| subclass instance parsed from the image binary - contained in *blob*. - """ - stream = BytesIO(blob) - return cls._from_stream(stream, blob) - - @classmethod - def from_file(cls, image_descriptor): - """ - Return a new |Image| subclass instance loaded from the image file - identified by *image_descriptor*, a path or file-like object. - """ - if is_string(image_descriptor): - path = image_descriptor - with open(path, 'rb') as f: - blob = f.read() - stream = BytesIO(blob) - filename = os.path.basename(path) - else: - stream = image_descriptor - stream.seek(0) - blob = stream.read() - filename = None - return cls._from_stream(stream, blob, filename) - - @property - def blob(self): - """ - The bytes of the image 'file' - """ - return self._blob - - @property - def content_type(self): - """ - MIME content type for this image, e.g. ``'image/jpeg'`` for a JPEG - image - """ - return self._image_header.content_type - - @lazyproperty - def ext(self): - """ - The file extension for the image. If an actual one is available from - a load filename it is used. Otherwise a canonical extension is - assigned based on the content type. Does not contain the leading - period, e.g. 'jpg', not '.jpg'. - """ - return os.path.splitext(self._filename)[1][1:] - - @property - def filename(self): - """ - Original image file name, if loaded from disk, or a generic filename - if loaded from an anonymous stream. - """ - return self._filename - - @property - def px_width(self): - """ - The horizontal pixel dimension of the image - """ - return self._image_header.px_width - - @property - def px_height(self): - """ - The vertical pixel dimension of the image - """ - return self._image_header.px_height - - @property - def horz_dpi(self): - """ - Integer dots per inch for the width of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._image_header.horz_dpi - - @property - def vert_dpi(self): - """ - Integer dots per inch for the height of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._image_header.vert_dpi - - @lazyproperty - def sha1(self): - """ - SHA1 hash digest of the image blob - """ - return hashlib.sha1(self._blob).hexdigest() - - @classmethod - def _from_stream(cls, stream, blob, filename=None): - """ - Return an instance of the |Image| subclass corresponding to the - format of the image in *stream*. - """ - image_header = _ImageHeaderFactory(stream) - if filename is None: - filename = 'image.%s' % image_header.default_ext - return cls(blob, filename, image_header) - - -def _ImageHeaderFactory(stream): - """ - Return a |BaseImageHeader| subclass instance that knows how to parse the - headers of the image in *stream*. - """ - from docx.image import SIGNATURES - - def read_32(stream): - stream.seek(0) - return stream.read(32) - - header = read_32(stream) - for cls, offset, signature_bytes in SIGNATURES: - end = offset + len(signature_bytes) - found_bytes = header[offset:end] - if found_bytes == signature_bytes: - return cls.from_stream(stream) - raise UnrecognizedImageError - - -class BaseImageHeader(object): - """ - Base class for image header subclasses like |Jpeg| and |Tiff|. - """ - def __init__(self, px_width, px_height, horz_dpi, vert_dpi): - self._px_width = px_width - self._px_height = px_height - self._horz_dpi = horz_dpi - self._vert_dpi = vert_dpi - - @property - def content_type(self): - """ - Abstract property definition, must be implemented by all subclasses. - """ - msg = ( - 'content_type property must be implemented by all subclasses of ' - 'BaseImageHeader' - ) - raise NotImplementedError(msg) - - @property - def default_ext(self): - """ - Default filename extension for images of this type. An abstract - property definition, must be implemented by all subclasses. - """ - msg = ( - 'default_ext property must be implemented by all subclasses of ' - 'BaseImageHeader' - ) - raise NotImplementedError(msg) - - @property - def px_width(self): - """ - The horizontal pixel dimension of the image - """ - return self._px_width - - @property - def px_height(self): - """ - The vertical pixel dimension of the image - """ - return self._px_height - - @property - def horz_dpi(self): - """ - Integer dots per inch for the width of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._horz_dpi - - @property - def vert_dpi(self): - """ - Integer dots per inch for the height of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._vert_dpi diff --git a/docx/image/tiff.py b/docx/image/tiff.py deleted file mode 100644 index d6561eca8..000000000 --- a/docx/image/tiff.py +++ /dev/null @@ -1,337 +0,0 @@ -# encoding: utf-8 - -from __future__ import absolute_import, division, print_function - -from .constants import MIME_TYPE, TIFF_FLD, TIFF_TAG -from .helpers import BIG_ENDIAN, LITTLE_ENDIAN, StreamReader -from .image import BaseImageHeader - - -class Tiff(BaseImageHeader): - """ - Image header parser for TIFF images. Handles both big and little endian - byte ordering. - """ - @property - def content_type(self): - """ - Return the MIME type of this TIFF image, unconditionally the string - ``image/tiff``. - """ - return MIME_TYPE.TIFF - - @property - def default_ext(self): - """ - Default filename extension, always 'tiff' for TIFF images. - """ - return 'tiff' - - @classmethod - def from_stream(cls, stream): - """ - Return a |Tiff| instance containing the properties of the TIFF image - in *stream*. - """ - parser = _TiffParser.parse(stream) - - px_width = parser.px_width - px_height = parser.px_height - horz_dpi = parser.horz_dpi - vert_dpi = parser.vert_dpi - - return cls(px_width, px_height, horz_dpi, vert_dpi) - - -class _TiffParser(object): - """ - Parses a TIFF image stream to extract the image properties found in its - main image file directory (IFD) - """ - def __init__(self, ifd_entries): - super(_TiffParser, self).__init__() - self._ifd_entries = ifd_entries - - @classmethod - def parse(cls, stream): - """ - Return an instance of |_TiffParser| containing the properties parsed - from the TIFF image in *stream*. - """ - stream_rdr = cls._make_stream_reader(stream) - ifd0_offset = stream_rdr.read_long(4) - ifd_entries = _IfdEntries.from_stream(stream_rdr, ifd0_offset) - return cls(ifd_entries) - - @property - def horz_dpi(self): - """ - The horizontal dots per inch value calculated from the XResolution - and ResolutionUnit tags of the IFD; defaults to 72 if those tags are - not present. - """ - return self._dpi(TIFF_TAG.X_RESOLUTION) - - @property - def vert_dpi(self): - """ - The vertical dots per inch value calculated from the XResolution and - ResolutionUnit tags of the IFD; defaults to 72 if those tags are not - present. - """ - return self._dpi(TIFF_TAG.Y_RESOLUTION) - - @property - def px_height(self): - """ - The number of stacked rows of pixels in the image, |None| if the IFD - contains no ``ImageLength`` tag, the expected case when the TIFF is - embeded in an Exif image. - """ - return self._ifd_entries.get(TIFF_TAG.IMAGE_LENGTH) - - @property - def px_width(self): - """ - The number of pixels in each row in the image, |None| if the IFD - contains no ``ImageWidth`` tag, the expected case when the TIFF is - embeded in an Exif image. - """ - return self._ifd_entries.get(TIFF_TAG.IMAGE_WIDTH) - - @classmethod - def _detect_endian(cls, stream): - """ - Return either BIG_ENDIAN or LITTLE_ENDIAN depending on the endian - indicator found in the TIFF *stream* header, either 'MM' or 'II'. - """ - stream.seek(0) - endian_str = stream.read(2) - return BIG_ENDIAN if endian_str == b'MM' else LITTLE_ENDIAN - - def _dpi(self, resolution_tag): - """ - Return the dpi value calculated for *resolution_tag*, which can be - either TIFF_TAG.X_RESOLUTION or TIFF_TAG.Y_RESOLUTION. The - calculation is based on the values of both that tag and the - TIFF_TAG.RESOLUTION_UNIT tag in this parser's |_IfdEntries| instance. - """ - if resolution_tag not in self._ifd_entries: - return 72 - resolution_unit = self._ifd_entries[TIFF_TAG.RESOLUTION_UNIT] - if resolution_unit == 1: # aspect ratio only - return 72 - # resolution_unit == 2 for inches, 3 for centimeters - units_per_inch = 1 if resolution_unit == 2 else 2.54 - dots_per_unit = self._ifd_entries[resolution_tag] - return int(round(dots_per_unit * units_per_inch)) - - @classmethod - def _make_stream_reader(cls, stream): - """ - Return a |StreamReader| instance with wrapping *stream* and having - "endian-ness" determined by the 'MM' or 'II' indicator in the TIFF - stream header. - """ - endian = cls._detect_endian(stream) - return StreamReader(stream, endian) - - -class _IfdEntries(object): - """ - Image File Directory for a TIFF image, having mapping (dict) semantics - allowing "tag" values to be retrieved by tag code. - """ - def __init__(self, entries): - super(_IfdEntries, self).__init__() - self._entries = entries - - def __contains__(self, key): - """ - Provides ``in`` operator, e.g. ``tag in ifd_entries`` - """ - return self._entries.__contains__(key) - - def __getitem__(self, key): - """ - Provides indexed access, e.g. ``tag_value = ifd_entries[tag_code]`` - """ - return self._entries.__getitem__(key) - - @classmethod - def from_stream(cls, stream, offset): - """ - Return a new |_IfdEntries| instance parsed from *stream* starting at - *offset*. - """ - ifd_parser = _IfdParser(stream, offset) - entries = dict((e.tag, e.value) for e in ifd_parser.iter_entries()) - return cls(entries) - - def get(self, tag_code, default=None): - """ - Return value of IFD entry having tag matching *tag_code*, or - *default* if no matching tag found. - """ - return self._entries.get(tag_code, default) - - -class _IfdParser(object): - """ - Service object that knows how to extract directory entries from an Image - File Directory (IFD) - """ - def __init__(self, stream_rdr, offset): - super(_IfdParser, self).__init__() - self._stream_rdr = stream_rdr - self._offset = offset - - def iter_entries(self): - """ - Generate an |_IfdEntry| instance corresponding to each entry in the - directory. - """ - for idx in range(self._entry_count): - dir_entry_offset = self._offset + 2 + (idx*12) - ifd_entry = _IfdEntryFactory(self._stream_rdr, dir_entry_offset) - yield ifd_entry - - @property - def _entry_count(self): - """ - The count of directory entries, read from the top of the IFD header - """ - return self._stream_rdr.read_short(self._offset) - - -def _IfdEntryFactory(stream_rdr, offset): - """ - Return an |_IfdEntry| subclass instance containing the value of the - directory entry at *offset* in *stream_rdr*. - """ - ifd_entry_classes = { - TIFF_FLD.ASCII: _AsciiIfdEntry, - TIFF_FLD.SHORT: _ShortIfdEntry, - TIFF_FLD.LONG: _LongIfdEntry, - TIFF_FLD.RATIONAL: _RationalIfdEntry, - } - field_type = stream_rdr.read_short(offset, 2) - if field_type in ifd_entry_classes: - entry_cls = ifd_entry_classes[field_type] - else: - entry_cls = _IfdEntry - return entry_cls.from_stream(stream_rdr, offset) - - -class _IfdEntry(object): - """ - Base class for IFD entry classes. Subclasses are differentiated by value - type, e.g. ASCII, long int, etc. - """ - def __init__(self, tag_code, value): - super(_IfdEntry, self).__init__() - self._tag_code = tag_code - self._value = value - - @classmethod - def from_stream(cls, stream_rdr, offset): - """ - Return an |_IfdEntry| subclass instance containing the tag and value - of the tag parsed from *stream_rdr* at *offset*. Note this method is - common to all subclasses. Override the ``_parse_value()`` method to - provide distinctive behavior based on field type. - """ - tag_code = stream_rdr.read_short(offset, 0) - value_count = stream_rdr.read_long(offset, 4) - value_offset = stream_rdr.read_long(offset, 8) - value = cls._parse_value( - stream_rdr, offset, value_count, value_offset - ) - return cls(tag_code, value) - - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the value of this field parsed from *stream_rdr* at *offset*. - Intended to be overridden by subclasses. - """ - return 'UNIMPLEMENTED FIELD TYPE' # pragma: no cover - - @property - def tag(self): - """ - Short int code that identifies this IFD entry - """ - return self._tag_code - - @property - def value(self): - """ - Value of this tag, its type being dependent on the tag. - """ - return self._value - - -class _AsciiIfdEntry(_IfdEntry): - """ - IFD entry having the form of a NULL-terminated ASCII string - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the ASCII string parsed from *stream_rdr* at *value_offset*. - The length of the string, including a terminating '\x00' (NUL) - character, is in *value_count*. - """ - return stream_rdr.read_str(value_count-1, value_offset) - - -class _ShortIfdEntry(_IfdEntry): - """ - IFD entry expressed as a short (2-byte) integer - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the short int value contained in the *value_offset* field of - this entry. Only supports single values at present. - """ - if value_count == 1: - return stream_rdr.read_short(offset, 8) - else: # pragma: no cover - return 'Multi-value short integer NOT IMPLEMENTED' - - -class _LongIfdEntry(_IfdEntry): - """ - IFD entry expressed as a long (4-byte) integer - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the long int value contained in the *value_offset* field of - this entry. Only supports single values at present. - """ - if value_count == 1: - return stream_rdr.read_long(offset, 8) - else: # pragma: no cover - return 'Multi-value long integer NOT IMPLEMENTED' - - -class _RationalIfdEntry(_IfdEntry): - """ - IFD entry expressed as a numerator, denominator pair - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the rational (numerator / denominator) value at *value_offset* - in *stream_rdr* as a floating-point number. Only supports single - values at present. - """ - if value_count == 1: - numerator = stream_rdr.read_long(value_offset) - denominator = stream_rdr.read_long(value_offset, 4) - return numerator / denominator - else: # pragma: no cover - return 'Multi-value Rational NOT IMPLEMENTED' diff --git a/docx/opc/compat.py b/docx/opc/compat.py deleted file mode 100644 index d944fe43b..000000000 --- a/docx/opc/compat.py +++ /dev/null @@ -1,50 +0,0 @@ -# encoding: utf-8 - -""" -Provides Python 2/3 compatibility objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import sys - -# =========================================================================== -# Python 3 versions -# =========================================================================== - -if sys.version_info >= (3, 0): - - def cls_method_fn(cls, method_name): - """ - Return the function object associated with the method of *cls* having - *method_name*. - """ - return getattr(cls, method_name) - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, str) - -# =========================================================================== -# Python 2 versions -# =========================================================================== - -else: - - def cls_method_fn(cls, method_name): - """ - Return the function object associated with the method of *cls* having - *method_name*. - """ - unbound_method = getattr(cls, method_name) - return unbound_method.__func__ - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, basestring) diff --git a/docx/opc/constants.py b/docx/opc/constants.py deleted file mode 100644 index b90aa394a..000000000 --- a/docx/opc/constants.py +++ /dev/null @@ -1,658 +0,0 @@ -# encoding: utf-8 - -""" -Constant values related to the Open Packaging Convention, in particular, -content types and relationship types. -""" - - -class CONTENT_TYPE(object): - """ - Content type URIs (like MIME-types) that specify a part's format - """ - BMP = ( - 'image/bmp' - ) - DML_CHART = ( - 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml' - ) - DML_CHARTSHAPES = ( - 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes' - '+xml' - ) - DML_DIAGRAM_COLORS = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramColo' - 'rs+xml' - ) - DML_DIAGRAM_DATA = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramData' - '+xml' - ) - DML_DIAGRAM_LAYOUT = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramLayo' - 'ut+xml' - ) - DML_DIAGRAM_STYLE = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramStyl' - 'e+xml' - ) - GIF = ( - 'image/gif' - ) - JPEG = ( - 'image/jpeg' - ) - MS_PHOTO = ( - 'image/vnd.ms-photo' - ) - OFC_CUSTOM_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.custom-properties+xml' - ) - OFC_CUSTOM_XML_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.customXmlProperties+x' - 'ml' - ) - OFC_DRAWING = ( - 'application/vnd.openxmlformats-officedocument.drawing+xml' - ) - OFC_EXTENDED_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.extended-properties+x' - 'ml' - ) - OFC_OLE_OBJECT = ( - 'application/vnd.openxmlformats-officedocument.oleObject' - ) - OFC_PACKAGE = ( - 'application/vnd.openxmlformats-officedocument.package' - ) - OFC_THEME = ( - 'application/vnd.openxmlformats-officedocument.theme+xml' - ) - OFC_THEME_OVERRIDE = ( - 'application/vnd.openxmlformats-officedocument.themeOverride+xml' - ) - OFC_VML_DRAWING = ( - 'application/vnd.openxmlformats-officedocument.vmlDrawing' - ) - OPC_CORE_PROPERTIES = ( - 'application/vnd.openxmlformats-package.core-properties+xml' - ) - OPC_DIGITAL_SIGNATURE_CERTIFICATE = ( - 'application/vnd.openxmlformats-package.digital-signature-certificat' - 'e' - ) - OPC_DIGITAL_SIGNATURE_ORIGIN = ( - 'application/vnd.openxmlformats-package.digital-signature-origin' - ) - OPC_DIGITAL_SIGNATURE_XMLSIGNATURE = ( - 'application/vnd.openxmlformats-package.digital-signature-xmlsignatu' - 're+xml' - ) - OPC_RELATIONSHIPS = ( - 'application/vnd.openxmlformats-package.relationships+xml' - ) - PML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.commen' - 'ts+xml' - ) - PML_COMMENT_AUTHORS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.commen' - 'tAuthors+xml' - ) - PML_HANDOUT_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.handou' - 'tMaster+xml' - ) - PML_NOTES_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.notesM' - 'aster+xml' - ) - PML_NOTES_SLIDE = ( - 'application/vnd.openxmlformats-officedocument.presentationml.notesS' - 'lide+xml' - ) - PML_PRESENTATION_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.presen' - 'tation.main+xml' - ) - PML_PRES_PROPS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.presPr' - 'ops+xml' - ) - PML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.printe' - 'rSettings' - ) - PML_SLIDE = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slide+' - 'xml' - ) - PML_SLIDESHOW_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slides' - 'how.main+xml' - ) - PML_SLIDE_LAYOUT = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideL' - 'ayout+xml' - ) - PML_SLIDE_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideM' - 'aster+xml' - ) - PML_SLIDE_UPDATE_INFO = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideU' - 'pdateInfo+xml' - ) - PML_TABLE_STYLES = ( - 'application/vnd.openxmlformats-officedocument.presentationml.tableS' - 'tyles+xml' - ) - PML_TAGS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.tags+x' - 'ml' - ) - PML_TEMPLATE_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.templa' - 'te.main+xml' - ) - PML_VIEW_PROPS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.viewPr' - 'ops+xml' - ) - PNG = ( - 'image/png' - ) - SML_CALC_CHAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.calcCha' - 'in+xml' - ) - SML_CHARTSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.chartsh' - 'eet+xml' - ) - SML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.comment' - 's+xml' - ) - SML_CONNECTIONS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.connect' - 'ions+xml' - ) - SML_CUSTOM_PROPERTY = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.customP' - 'roperty' - ) - SML_DIALOGSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.dialogs' - 'heet+xml' - ) - SML_EXTERNAL_LINK = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.externa' - 'lLink+xml' - ) - SML_PIVOT_CACHE_DEFINITION = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCa' - 'cheDefinition+xml' - ) - SML_PIVOT_CACHE_RECORDS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCa' - 'cheRecords+xml' - ) - SML_PIVOT_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTa' - 'ble+xml' - ) - SML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.printer' - 'Settings' - ) - SML_QUERY_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.queryTa' - 'ble+xml' - ) - SML_REVISION_HEADERS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.revisio' - 'nHeaders+xml' - ) - SML_REVISION_LOG = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.revisio' - 'nLog+xml' - ) - SML_SHARED_STRINGS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedS' - 'trings+xml' - ) - SML_SHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' - ) - SML_SHEET_MAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.m' - 'ain+xml' - ) - SML_SHEET_METADATA = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMe' - 'tadata+xml' - ) - SML_STYLES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+' - 'xml' - ) - SML_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.table+x' - 'ml' - ) - SML_TABLE_SINGLE_CELLS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.tableSi' - 'ngleCells+xml' - ) - SML_TEMPLATE_MAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.templat' - 'e.main+xml' - ) - SML_USER_NAMES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.userNam' - 'es+xml' - ) - SML_VOLATILE_DEPENDENCIES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.volatil' - 'eDependencies+xml' - ) - SML_WORKSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.workshe' - 'et+xml' - ) - TIFF = ( - 'image/tiff' - ) - WML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.comm' - 'ents+xml' - ) - WML_DOCUMENT = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment' - ) - WML_DOCUMENT_GLOSSARY = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment.glossary+xml' - ) - WML_DOCUMENT_MAIN = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment.main+xml' - ) - WML_ENDNOTES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.endn' - 'otes+xml' - ) - WML_FONT_TABLE = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.font' - 'Table+xml' - ) - WML_FOOTER = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.foot' - 'er+xml' - ) - WML_FOOTNOTES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.foot' - 'notes+xml' - ) - WML_HEADER = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.head' - 'er+xml' - ) - WML_NUMBERING = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.numb' - 'ering+xml' - ) - WML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.prin' - 'terSettings' - ) - WML_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.sett' - 'ings+xml' - ) - WML_STYLES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.styl' - 'es+xml' - ) - WML_WEB_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.webS' - 'ettings+xml' - ) - XML = ( - 'application/xml' - ) - X_EMF = ( - 'image/x-emf' - ) - X_FONTDATA = ( - 'application/x-fontdata' - ) - X_FONT_TTF = ( - 'application/x-font-ttf' - ) - X_WMF = ( - 'image/x-wmf' - ) - - -class NAMESPACE(object): - """Constant values for OPC XML namespaces""" - DML_WORDPROCESSING_DRAWING = ( - 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDraw' - 'ing' - ) - OFC_RELATIONSHIPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - ) - OPC_RELATIONSHIPS = ( - 'http://schemas.openxmlformats.org/package/2006/relationships' - ) - OPC_CONTENT_TYPES = ( - 'http://schemas.openxmlformats.org/package/2006/content-types' - ) - WML_MAIN = ( - 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' - ) - - -class RELATIONSHIP_TARGET_MODE(object): - """Open XML relationship target modes""" - EXTERNAL = 'External' - INTERNAL = 'Internal' - - -class RELATIONSHIP_TYPE(object): - AUDIO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/audio' - ) - A_F_CHUNK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/aFChunk' - ) - CALC_CHAIN = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/calcChain' - ) - CERTIFICATE = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/certificate' - ) - CHART = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chart' - ) - CHARTSHEET = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chartsheet' - ) - CHART_USER_SHAPES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chartUserShapes' - ) - COMMENTS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/comments' - ) - COMMENT_AUTHORS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/commentAuthors' - ) - CONNECTIONS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/connections' - ) - CONTROL = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/control' - ) - CORE_PROPERTIES = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/metada' - 'ta/core-properties' - ) - CUSTOM_PROPERTIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/custom-properties' - ) - CUSTOM_PROPERTY = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customProperty' - ) - CUSTOM_XML = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customXml' - ) - CUSTOM_XML_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customXmlProps' - ) - DIAGRAM_COLORS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramColors' - ) - DIAGRAM_DATA = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramData' - ) - DIAGRAM_LAYOUT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramLayout' - ) - DIAGRAM_QUICK_STYLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramQuickStyle' - ) - DIALOGSHEET = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/dialogsheet' - ) - DRAWING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/drawing' - ) - ENDNOTES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/endnotes' - ) - EXTENDED_PROPERTIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/extended-properties' - ) - EXTERNAL_LINK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/externalLink' - ) - FONT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/font' - ) - FONT_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/fontTable' - ) - FOOTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/footer' - ) - FOOTNOTES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/footnotes' - ) - GLOSSARY_DOCUMENT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/glossaryDocument' - ) - HANDOUT_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/handoutMaster' - ) - HEADER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/header' - ) - HYPERLINK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/hyperlink' - ) - IMAGE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/image' - ) - NOTES_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/notesMaster' - ) - NOTES_SLIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/notesSlide' - ) - NUMBERING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/numbering' - ) - OFFICE_DOCUMENT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/officeDocument' - ) - OLE_OBJECT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/oleObject' - ) - ORIGIN = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/origin' - ) - PACKAGE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/package' - ) - PIVOT_CACHE_DEFINITION = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/pivotCacheDefinition' - ) - PIVOT_CACHE_RECORDS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/spreadsheetml/pivotCacheRecords' - ) - PIVOT_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/pivotTable' - ) - PRES_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/presProps' - ) - PRINTER_SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/printerSettings' - ) - QUERY_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/queryTable' - ) - REVISION_HEADERS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/revisionHeaders' - ) - REVISION_LOG = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/revisionLog' - ) - SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/settings' - ) - SHARED_STRINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/sharedStrings' - ) - SHEET_METADATA = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/sheetMetadata' - ) - SIGNATURE = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/signature' - ) - SLIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slide' - ) - SLIDE_LAYOUT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideLayout' - ) - SLIDE_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideMaster' - ) - SLIDE_UPDATE_INFO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideUpdateInfo' - ) - STYLES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/styles' - ) - TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/table' - ) - TABLE_SINGLE_CELLS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tableSingleCells' - ) - TABLE_STYLES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tableStyles' - ) - TAGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tags' - ) - THEME = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/theme' - ) - THEME_OVERRIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/themeOverride' - ) - THUMBNAIL = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/metada' - 'ta/thumbnail' - ) - USERNAMES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/usernames' - ) - VIDEO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/video' - ) - VIEW_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/viewProps' - ) - VML_DRAWING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/vmlDrawing' - ) - VOLATILE_DEPENDENCIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/volatileDependencies' - ) - WEB_SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/webSettings' - ) - WORKSHEET_SOURCE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/worksheetSource' - ) - XML_MAPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/xmlMaps' - ) diff --git a/docx/opc/exceptions.py b/docx/opc/exceptions.py deleted file mode 100644 index b8e6de43f..000000000 --- a/docx/opc/exceptions.py +++ /dev/null @@ -1,19 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions specific to python-opc - -The base exception class is OpcError. -""" - - -class OpcError(Exception): - """ - Base error class for python-opc - """ - - -class PackageNotFoundError(OpcError): - """ - Raised when a package cannot be found at the specified path. - """ diff --git a/docx/opc/oxml.py b/docx/opc/oxml.py deleted file mode 100644 index 0c09312b5..000000000 --- a/docx/opc/oxml.py +++ /dev/null @@ -1,292 +0,0 @@ -# encoding: utf-8 - -""" -Temporary stand-in for main oxml module that came across with the -PackageReader transplant. Probably much will get replaced with objects from -the pptx.oxml.core and then this module will either get deleted or only hold -the package related custom element classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from lxml import etree - -from .constants import NAMESPACE as NS, RELATIONSHIP_TARGET_MODE as RTM - - -# configure XML parser -element_class_lookup = etree.ElementNamespaceClassLookup() -oxml_parser = etree.XMLParser(remove_blank_text=True) -oxml_parser.set_element_class_lookup(element_class_lookup) - -nsmap = { - 'ct': NS.OPC_CONTENT_TYPES, - 'pr': NS.OPC_RELATIONSHIPS, - 'r': NS.OFC_RELATIONSHIPS, -} - - -# =========================================================================== -# functions -# =========================================================================== - -def parse_xml(text): - """ - ``etree.fromstring()`` replacement that uses oxml parser - """ - return etree.fromstring(text, oxml_parser) - - -def qn(tag): - """ - Stands for "qualified name", a utility function to turn a namespace - prefixed tag name into a Clark-notation qualified tag name for lxml. For - example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``. - """ - prefix, tagroot = tag.split(':') - uri = nsmap[prefix] - return '{%s}%s' % (uri, tagroot) - - -def serialize_part_xml(part_elm): - """ - Serialize *part_elm* etree element to XML suitable for storage as an XML - part. That is to say, no insignificant whitespace added for readability, - and an appropriate XML declaration added with UTF-8 encoding specified. - """ - return etree.tostring(part_elm, encoding='UTF-8', standalone=True) - - -def serialize_for_reading(element): - """ - Serialize *element* to human-readable XML suitable for tests. No XML - declaration. - """ - return etree.tostring(element, encoding='unicode', pretty_print=True) - - -# =========================================================================== -# Custom element classes -# =========================================================================== - -class BaseOxmlElement(etree.ElementBase): - """ - Base class for all custom element classes, to add standardized behavior - to all classes in one place. - """ - @property - def xml(self): - """ - Return XML string for this element, suitable for testing purposes. - Pretty printed for readability and without an XML declaration at the - top. - """ - return serialize_for_reading(self) - - -class CT_Default(BaseOxmlElement): - """ - ```` element, specifying the default content type to be applied - to a part with the specified extension. - """ - @property - def content_type(self): - """ - String held in the ``ContentType`` attribute of this ```` - element. - """ - return self.get('ContentType') - - @property - def extension(self): - """ - String held in the ``Extension`` attribute of this ```` - element. - """ - return self.get('Extension') - - @staticmethod - def new(ext, content_type): - """ - Return a new ```` element with attributes set to parameter - values. - """ - xml = '' % nsmap['ct'] - default = parse_xml(xml) - default.set('Extension', ext) - default.set('ContentType', content_type) - return default - - -class CT_Override(BaseOxmlElement): - """ - ```` element, specifying the content type to be applied for a - part with the specified partname. - """ - @property - def content_type(self): - """ - String held in the ``ContentType`` attribute of this ```` - element. - """ - return self.get('ContentType') - - @staticmethod - def new(partname, content_type): - """ - Return a new ```` element with attributes set to parameter - values. - """ - xml = '' % nsmap['ct'] - override = parse_xml(xml) - override.set('PartName', partname) - override.set('ContentType', content_type) - return override - - @property - def partname(self): - """ - String held in the ``PartName`` attribute of this ```` - element. - """ - return self.get('PartName') - - -class CT_Relationship(BaseOxmlElement): - """ - ```` element, representing a single relationship from a - source to a target part. - """ - @staticmethod - def new(rId, reltype, target, target_mode=RTM.INTERNAL): - """ - Return a new ```` element. - """ - xml = '' % nsmap['pr'] - relationship = parse_xml(xml) - relationship.set('Id', rId) - relationship.set('Type', reltype) - relationship.set('Target', target) - if target_mode == RTM.EXTERNAL: - relationship.set('TargetMode', RTM.EXTERNAL) - return relationship - - @property - def rId(self): - """ - String held in the ``Id`` attribute of this ```` - element. - """ - return self.get('Id') - - @property - def reltype(self): - """ - String held in the ``Type`` attribute of this ```` - element. - """ - return self.get('Type') - - @property - def target_ref(self): - """ - String held in the ``Target`` attribute of this ```` - element. - """ - return self.get('Target') - - @property - def target_mode(self): - """ - String held in the ``TargetMode`` attribute of this - ```` element, either ``Internal`` or ``External``. - Defaults to ``Internal``. - """ - return self.get('TargetMode', RTM.INTERNAL) - - -class CT_Relationships(BaseOxmlElement): - """ - ```` element, the root element in a .rels file. - """ - def add_rel(self, rId, reltype, target, is_external=False): - """ - Add a child ```` element with attributes set according - to parameter values. - """ - target_mode = RTM.EXTERNAL if is_external else RTM.INTERNAL - relationship = CT_Relationship.new(rId, reltype, target, target_mode) - self.append(relationship) - - @staticmethod - def new(): - """ - Return a new ```` element. - """ - xml = '' % nsmap['pr'] - relationships = parse_xml(xml) - return relationships - - @property - def Relationship_lst(self): - """ - Return a list containing all the ```` child elements. - """ - return self.findall(qn('pr:Relationship')) - - @property - def xml(self): - """ - Return XML string for this element, suitable for saving in a .rels - stream, not pretty printed and with an XML declaration at the top. - """ - return serialize_part_xml(self) - - -class CT_Types(BaseOxmlElement): - """ - ```` element, the container element for Default and Override - elements in [Content_Types].xml. - """ - def add_default(self, ext, content_type): - """ - Add a child ```` element with attributes set to parameter - values. - """ - default = CT_Default.new(ext, content_type) - self.append(default) - - def add_override(self, partname, content_type): - """ - Add a child ```` element with attributes set to parameter - values. - """ - override = CT_Override.new(partname, content_type) - self.append(override) - - @property - def defaults(self): - return self.findall(qn('ct:Default')) - - @staticmethod - def new(): - """ - Return a new ```` element. - """ - xml = '' % nsmap['ct'] - types = parse_xml(xml) - return types - - @property - def overrides(self): - return self.findall(qn('ct:Override')) - - -ct_namespace = element_class_lookup.get_namespace(nsmap['ct']) -ct_namespace['Default'] = CT_Default -ct_namespace['Override'] = CT_Override -ct_namespace['Types'] = CT_Types - -pr_namespace = element_class_lookup.get_namespace(nsmap['pr']) -pr_namespace['Relationship'] = CT_Relationship -pr_namespace['Relationships'] = CT_Relationships diff --git a/docx/opc/package.py b/docx/opc/package.py deleted file mode 100644 index 6c44453ce..000000000 --- a/docx/opc/package.py +++ /dev/null @@ -1,577 +0,0 @@ -# encoding: utf-8 - -""" -The :mod:`pptx.packaging` module coheres around the concerns of reading and -writing presentations to and from a .pptx file. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .compat import cls_method_fn -from .constants import RELATIONSHIP_TYPE as RT -from .oxml import CT_Relationships, serialize_part_xml -from ..oxml import parse_xml -from .packuri import PACKAGE_URI, PackURI -from .pkgreader import PackageReader -from .pkgwriter import PackageWriter -from .shared import lazyproperty - - -class OpcPackage(object): - """ - Main API class for |python-opc|. A new instance is constructed by calling - the :meth:`open` class method with a path to a package file or file-like - object containing one. - """ - def __init__(self): - super(OpcPackage, self).__init__() - - def after_unmarshal(self): - """ - Entry point for any post-unmarshaling processing. May be overridden - by subclasses without forwarding call to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - def iter_rels(self): - """ - Generate exactly one reference to each relationship in the package by - performing a depth-first traversal of the rels graph. - """ - def walk_rels(source, visited=None): - visited = [] if visited is None else visited - for rel in source.rels.values(): - yield rel - if rel.is_external: - continue - part = rel.target_part - if part in visited: - continue - visited.append(part) - new_source = part - for rel in walk_rels(new_source, visited): - yield rel - - for rel in walk_rels(self): - yield rel - - def iter_parts(self): - """ - Generate exactly one reference to each of the parts in the package by - performing a depth-first traversal of the rels graph. - """ - def walk_parts(source, visited=list()): - for rel in source.rels.values(): - if rel.is_external: - continue - part = rel.target_part - if part in visited: - continue - visited.append(part) - yield part - new_source = part - for part in walk_parts(new_source, visited): - yield part - - for part in walk_parts(self): - yield part - - def load_rel(self, reltype, target, rId, is_external=False): - """ - Return newly added |_Relationship| instance of *reltype* between this - part and *target* with key *rId*. Target mode is set to - ``RTM.EXTERNAL`` if *is_external* is |True|. Intended for use during - load from a serialized package, where the rId is well known. Other - methods exist for adding a new relationship to the package during - processing. - """ - return self.rels.add_relationship(reltype, target, rId, is_external) - - @property - def main_document(self): - """ - Return a reference to the main document part for this package. - Examples include a document part for a WordprocessingML package, a - presentation part for a PresentationML package, or a workbook part - for a SpreadsheetML package. - """ - return self.part_related_by(RT.OFFICE_DOCUMENT) - - @classmethod - def open(cls, pkg_file): - """ - Return an |OpcPackage| instance loaded with the contents of - *pkg_file*. - """ - pkg_reader = PackageReader.from_file(pkg_file) - package = cls() - Unmarshaller.unmarshal(pkg_reader, package, PartFactory) - return package - - def part_related_by(self, reltype): - """ - Return part to which this package has a relationship of *reltype*. - Raises |KeyError| if no such relationship is found and |ValueError| - if more than one such relationship is found. - """ - return self.rels.part_with_reltype(reltype) - - @property - def parts(self): - """ - Return a list containing a reference to each of the parts in this - package. - """ - return [part for part in self.iter_parts()] - - def relate_to(self, part, reltype): - """ - Return rId key of relationship to *part*, from the existing - relationship if there is one, otherwise a newly created one. - """ - rel = self.rels.get_or_add(reltype, part) - return rel.rId - - @lazyproperty - def rels(self): - """ - Return a reference to the |Relationships| instance holding the - collection of relationships for this package. - """ - return Relationships(PACKAGE_URI.baseURI) - - def save(self, pkg_file): - """ - Save this package to *pkg_file*, where *file* can be either a path to - a file (a string) or a file-like object. - """ - for part in self.parts: - part.before_marshal() - PackageWriter.write(pkg_file, self.rels, self.parts) - - -class Part(object): - """ - Base class for package parts. Provides common properties and methods, but - intended to be subclassed in client code to implement specific part - behaviors. - """ - def __init__(self, partname, content_type, blob=None, package=None): - super(Part, self).__init__() - self._partname = partname - self._content_type = content_type - self._blob = blob - self._package = package - - def after_unmarshal(self): - """ - Entry point for post-unmarshaling processing, for example to parse - the part XML. May be overridden by subclasses without forwarding call - to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - def before_marshal(self): - """ - Entry point for pre-serialization processing, for example to finalize - part naming if necessary. May be overridden by subclasses without - forwarding call to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - @property - def blob(self): - """ - Contents of this package part as a sequence of bytes. May be text or - binary. Intended to be overridden by subclasses. Default behavior is - to return load blob. - """ - return self._blob - - @property - def content_type(self): - """ - Content type of this part. - """ - return self._content_type - - def drop_rel(self, rId): - """ - Remove the relationship identified by *rId* if its reference count - is less than 2. Relationships with a reference count of 0 are - implicit relationships. - """ - if self._rel_ref_count(rId) < 2: - del self.rels[rId] - - @classmethod - def load(cls, partname, content_type, blob, package): - return cls(partname, content_type, blob, package) - - def load_rel(self, reltype, target, rId, is_external=False): - """ - Return newly added |_Relationship| instance of *reltype* between this - part and *target* with key *rId*. Target mode is set to - ``RTM.EXTERNAL`` if *is_external* is |True|. Intended for use during - load from a serialized package, where the rId is well-known. Other - methods exist for adding a new relationship to a part when - manipulating a part. - """ - return self.rels.add_relationship(reltype, target, rId, is_external) - - @property - def partname(self): - """ - |PackURI| instance holding partname of this part, e.g. - '/ppt/slides/slide1.xml' - """ - return self._partname - - @partname.setter - def partname(self, partname): - if not isinstance(partname, PackURI): - tmpl = "partname must be instance of PackURI, got '%s'" - raise TypeError(tmpl % type(partname).__name__) - self._partname = partname - - @property - def package(self): - """ - |OpcPackage| instance this part belongs to. - """ - return self._package - - def part_related_by(self, reltype): - """ - Return part to which this part has a relationship of *reltype*. - Raises |KeyError| if no such relationship is found and |ValueError| - if more than one such relationship is found. Provides ability to - resolve implicitly related part, such as Slide -> SlideLayout. - """ - return self.rels.part_with_reltype(reltype) - - def relate_to(self, target, reltype, is_external=False): - """ - Return rId key of relationship of *reltype* to *target*, from an - existing relationship if there is one, otherwise a newly created one. - """ - if is_external: - return self.rels.get_or_add_ext_rel(reltype, target) - else: - rel = self.rels.get_or_add(reltype, target) - return rel.rId - - @property - def related_parts(self): - """ - Dictionary mapping related parts by rId, so child objects can resolve - explicit relationships present in the part XML, e.g. sldIdLst to a - specific |Slide| instance. - """ - return self.rels.related_parts - - @lazyproperty - def rels(self): - """ - |Relationships| instance holding the relationships for this part. - """ - return Relationships(self._partname.baseURI) - - def target_ref(self, rId): - """ - Return URL contained in target ref of relationship identified by - *rId*. - """ - rel = self.rels[rId] - return rel.target_ref - - def _rel_ref_count(self, rId): - """ - Return the count of references in this part's XML to the relationship - identified by *rId*. - """ - rIds = self._element.xpath('//@r:id') - return len([_rId for _rId in rIds if _rId == rId]) - - -class XmlPart(Part): - """ - Base class for package parts containing an XML payload, which is most of - them. Provides additional methods to the |Part| base class that take care - of parsing and reserializing the XML payload and managing relationships - to other parts. - """ - def __init__(self, partname, content_type, element, package): - super(XmlPart, self).__init__( - partname, content_type, package=package - ) - self._element = element - - @property - def blob(self): - return serialize_part_xml(self._element) - - @classmethod - def load(cls, partname, content_type, blob, package): - element = parse_xml(blob) - return cls(partname, content_type, element, package) - - @property - def part(self): - """ - Part of the parent protocol, "children" of the document will not know - the part that contains them so must ask their parent object. That - chain of delegation ends here for child objects. - """ - return self - - -class PartFactory(object): - """ - Provides a way for client code to specify a subclass of |Part| to be - constructed by |Unmarshaller| based on its content type and/or a custom - callable. Setting ``PartFactory.part_class_selector`` to a callable - object will cause that object to be called with the parameters - ``content_type, reltype``, once for each part in the package. If the - callable returns an object, it is used as the class for that part. If it - returns |None|, part class selection falls back to the content type map - defined in ``PartFactory.part_type_for``. If no class is returned from - either of these, the class contained in ``PartFactory.default_part_type`` - is used to construct the part, which is by default ``opc.package.Part``. - """ - part_class_selector = None - part_type_for = {} - default_part_type = Part - - def __new__(cls, partname, content_type, reltype, blob, package): - PartClass = None - if cls.part_class_selector is not None: - part_class_selector = cls_method_fn(cls, 'part_class_selector') - PartClass = part_class_selector(content_type, reltype) - if PartClass is None: - PartClass = cls._part_cls_for(content_type) - return PartClass.load(partname, content_type, blob, package) - - @classmethod - def _part_cls_for(cls, content_type): - """ - Return the custom part class registered for *content_type*, or the - default part class if no custom class is registered for - *content_type*. - """ - if content_type in cls.part_type_for: - return cls.part_type_for[content_type] - return cls.default_part_type - - -class Relationships(dict): - """ - Collection object for |_Relationship| instances, having list semantics. - """ - def __init__(self, baseURI): - super(Relationships, self).__init__() - self._baseURI = baseURI - self._target_parts_by_rId = {} - - def add_relationship(self, reltype, target, rId, is_external=False): - """ - Return a newly added |_Relationship| instance. - """ - rel = _Relationship(rId, reltype, target, self._baseURI, is_external) - self[rId] = rel - if not is_external: - self._target_parts_by_rId[rId] = target - return rel - - def get_or_add(self, reltype, target_part): - """ - Return relationship of *reltype* to *target_part*, newly added if not - already present in collection. - """ - rel = self._get_matching(reltype, target_part) - if rel is None: - rId = self._next_rId - rel = self.add_relationship(reltype, target_part, rId) - return rel - - def get_or_add_ext_rel(self, reltype, target_ref): - """ - Return rId of external relationship of *reltype* to *target_ref*, - newly added if not already present in collection. - """ - rel = self._get_matching(reltype, target_ref, is_external=True) - if rel is None: - rId = self._next_rId - rel = self.add_relationship( - reltype, target_ref, rId, is_external=True - ) - return rel.rId - - def part_with_reltype(self, reltype): - """ - Return target part of rel with matching *reltype*, raising |KeyError| - if not found and |ValueError| if more than one matching relationship - is found. - """ - rel = self._get_rel_of_type(reltype) - return rel.target_part - - @property - def related_parts(self): - """ - dict mapping rIds to target parts for all the internal relationships - in the collection. - """ - return self._target_parts_by_rId - - @property - def xml(self): - """ - Serialize this relationship collection into XML suitable for storage - as a .rels file in an OPC package. - """ - rels_elm = CT_Relationships.new() - for rel in self.values(): - rels_elm.add_rel( - rel.rId, rel.reltype, rel.target_ref, rel.is_external - ) - return rels_elm.xml - - def _get_matching(self, reltype, target, is_external=False): - """ - Return relationship of matching *reltype*, *target*, and - *is_external* from collection, or None if not found. - """ - def matches(rel, reltype, target, is_external): - if rel.reltype != reltype: - return False - if rel.is_external != is_external: - return False - rel_target = rel.target_ref if rel.is_external else rel.target_part - if rel_target != target: - return False - return True - - for rel in self.values(): - if matches(rel, reltype, target, is_external): - return rel - return None - - def _get_rel_of_type(self, reltype): - """ - Return single relationship of type *reltype* from the collection. - Raises |KeyError| if no matching relationship is found. Raises - |ValueError| if more than one matching relationship is found. - """ - matching = [rel for rel in self.values() if rel.reltype == reltype] - if len(matching) == 0: - tmpl = "no relationship of type '%s' in collection" - raise KeyError(tmpl % reltype) - if len(matching) > 1: - tmpl = "multiple relationships of type '%s' in collection" - raise ValueError(tmpl % reltype) - return matching[0] - - @property - def _next_rId(self): - """ - Next available rId in collection, starting from 'rId1' and making use - of any gaps in numbering, e.g. 'rId2' for rIds ['rId1', 'rId3']. - """ - for n in range(1, len(self)+2): - rId_candidate = 'rId%d' % n # like 'rId19' - if rId_candidate not in self: - return rId_candidate - - -class Unmarshaller(object): - """ - Hosts static methods for unmarshalling a package from a |PackageReader| - instance. - """ - @staticmethod - def unmarshal(pkg_reader, package, part_factory): - """ - Construct graph of parts and realized relationships based on the - contents of *pkg_reader*, delegating construction of each part to - *part_factory*. Package relationships are added to *pkg*. - """ - parts = Unmarshaller._unmarshal_parts( - pkg_reader, package, part_factory - ) - Unmarshaller._unmarshal_relationships(pkg_reader, package, parts) - for part in parts.values(): - part.after_unmarshal() - package.after_unmarshal() - - @staticmethod - def _unmarshal_parts(pkg_reader, package, part_factory): - """ - Return a dictionary of |Part| instances unmarshalled from - *pkg_reader*, keyed by partname. Side-effect is that each part in - *pkg_reader* is constructed using *part_factory*. - """ - parts = {} - for partname, content_type, reltype, blob in pkg_reader.iter_sparts(): - parts[partname] = part_factory( - partname, content_type, reltype, blob, package - ) - return parts - - @staticmethod - def _unmarshal_relationships(pkg_reader, package, parts): - """ - Add a relationship to the source object corresponding to each of the - relationships in *pkg_reader* with its target_part set to the actual - target part in *parts*. - """ - for source_uri, srel in pkg_reader.iter_srels(): - source = package if source_uri == '/' else parts[source_uri] - target = (srel.target_ref if srel.is_external - else parts[srel.target_partname]) - source.load_rel(srel.reltype, target, srel.rId, srel.is_external) - - -class _Relationship(object): - """ - Value object for relationship to part. - """ - def __init__(self, rId, reltype, target, baseURI, external=False): - super(_Relationship, self).__init__() - self._rId = rId - self._reltype = reltype - self._target = target - self._baseURI = baseURI - self._is_external = bool(external) - - @property - def is_external(self): - return self._is_external - - @property - def reltype(self): - return self._reltype - - @property - def rId(self): - return self._rId - - @property - def target_part(self): - if self._is_external: - raise ValueError("target_part property on _Relationship is undef" - "ined when target mode is External") - return self._target - - @property - def target_ref(self): - if self._is_external: - return self._target - else: - return self._target.partname.relative_ref(self._baseURI) diff --git a/docx/opc/packuri.py b/docx/opc/packuri.py deleted file mode 100644 index 621ed92e5..000000000 --- a/docx/opc/packuri.py +++ /dev/null @@ -1,117 +0,0 @@ -# encoding: utf-8 - -""" -Provides the PackURI value type along with some useful known pack URI strings -such as PACKAGE_URI. -""" - -import posixpath -import re - - -class PackURI(str): - """ - Provides access to pack URI components such as the baseURI and the - filename slice. Behaves as |str| otherwise. - """ - _filename_re = re.compile('([a-zA-Z]+)([1-9][0-9]*)?') - - def __new__(cls, pack_uri_str): - if not pack_uri_str[0] == '/': - tmpl = "PackURI must begin with slash, got '%s'" - raise ValueError(tmpl % pack_uri_str) - return str.__new__(cls, pack_uri_str) - - @staticmethod - def from_rel_ref(baseURI, relative_ref): - """ - Return a |PackURI| instance containing the absolute pack URI formed by - translating *relative_ref* onto *baseURI*. - """ - joined_uri = posixpath.join(baseURI, relative_ref) - abs_uri = posixpath.abspath(joined_uri) - return PackURI(abs_uri) - - @property - def baseURI(self): - """ - The base URI of this pack URI, the directory portion, roughly - speaking. E.g. ``'/ppt/slides'`` for ``'/ppt/slides/slide1.xml'``. - For the package pseudo-partname '/', baseURI is '/'. - """ - return posixpath.split(self)[0] - - @property - def ext(self): - """ - The extension portion of this pack URI, e.g. ``'xml'`` for - ``'/word/document.xml'``. Note the period is not included. - """ - # raw_ext is either empty string or starts with period, e.g. '.xml' - raw_ext = posixpath.splitext(self)[1] - return raw_ext[1:] if raw_ext.startswith('.') else raw_ext - - @property - def filename(self): - """ - The "filename" portion of this pack URI, e.g. ``'slide1.xml'`` for - ``'/ppt/slides/slide1.xml'``. For the package pseudo-partname '/', - filename is ''. - """ - return posixpath.split(self)[1] - - @property - def idx(self): - """ - Return partname index as integer for tuple partname or None for - singleton partname, e.g. ``21`` for ``'/ppt/slides/slide21.xml'`` and - |None| for ``'/ppt/presentation.xml'``. - """ - filename = self.filename - if not filename: - return None - name_part = posixpath.splitext(filename)[0] # filename w/ext removed - match = self._filename_re.match(name_part) - if match is None: - return None - if match.group(2): - return int(match.group(2)) - return None - - @property - def membername(self): - """ - The pack URI with the leading slash stripped off, the form used as - the Zip file membername for the package item. Returns '' for the - package pseudo-partname '/'. - """ - return self[1:] - - def relative_ref(self, baseURI): - """ - Return string containing relative reference to package item from - *baseURI*. E.g. PackURI('/ppt/slideLayouts/slideLayout1.xml') would - return '../slideLayouts/slideLayout1.xml' for baseURI '/ppt/slides'. - """ - # workaround for posixpath bug in 2.6, doesn't generate correct - # relative path when *start* (second) parameter is root ('/') - if baseURI == '/': - relpath = self[1:] - else: - relpath = posixpath.relpath(self, baseURI) - return relpath - - @property - def rels_uri(self): - """ - The pack URI of the .rels part corresponding to the current pack URI. - Only produces sensible output if the pack URI is a partname or the - package pseudo-partname '/'. - """ - rels_filename = '%s.rels' % self.filename - rels_uri_str = posixpath.join(self.baseURI, '_rels', rels_filename) - return PackURI(rels_uri_str) - - -PACKAGE_URI = PackURI('/') -CONTENT_TYPES_URI = PackURI('/[Content_Types].xml') diff --git a/docx/opc/phys_pkg.py b/docx/opc/phys_pkg.py deleted file mode 100644 index c86a51994..000000000 --- a/docx/opc/phys_pkg.py +++ /dev/null @@ -1,155 +0,0 @@ -# encoding: utf-8 - -""" -Provides a general interface to a *physical* OPC package, such as a zip file. -""" - -from __future__ import absolute_import - -import os - -from zipfile import ZipFile, is_zipfile, ZIP_DEFLATED - -from .compat import is_string -from .exceptions import PackageNotFoundError -from .packuri import CONTENT_TYPES_URI - - -class PhysPkgReader(object): - """ - Factory for physical package reader objects. - """ - def __new__(cls, pkg_file): - # if *pkg_file* is a string, treat it as a path - if is_string(pkg_file): - if os.path.isdir(pkg_file): - reader_cls = _DirPkgReader - elif is_zipfile(pkg_file): - reader_cls = _ZipPkgReader - else: - raise PackageNotFoundError( - "Package not found at '%s'" % pkg_file - ) - else: # assume it's a stream and pass it to Zip reader to sort out - reader_cls = _ZipPkgReader - - return super(PhysPkgReader, cls).__new__(reader_cls) - - -class PhysPkgWriter(object): - """ - Factory for physical package writer objects. - """ - def __new__(cls, pkg_file): - return super(PhysPkgWriter, cls).__new__(_ZipPkgWriter) - - -class _DirPkgReader(PhysPkgReader): - """ - Implements |PhysPkgReader| interface for an OPC package extracted into a - directory. - """ - def __init__(self, path): - """ - *path* is the path to a directory containing an expanded package. - """ - super(_DirPkgReader, self).__init__() - self._path = os.path.abspath(path) - - def blob_for(self, pack_uri): - """ - Return contents of file corresponding to *pack_uri* in package - directory. - """ - path = os.path.join(self._path, pack_uri.membername) - with open(path, 'rb') as f: - blob = f.read() - return blob - - def close(self): - """ - Provides interface consistency with |ZipFileSystem|, but does - nothing, a directory file system doesn't need closing. - """ - pass - - @property - def content_types_xml(self): - """ - Return the `[Content_Types].xml` blob from the package. - """ - return self.blob_for(CONTENT_TYPES_URI) - - def rels_xml_for(self, source_uri): - """ - Return rels item XML for source with *source_uri*, or None if the - item has no rels item. - """ - try: - rels_xml = self.blob_for(source_uri.rels_uri) - except IOError: - rels_xml = None - return rels_xml - - -class _ZipPkgReader(PhysPkgReader): - """ - Implements |PhysPkgReader| interface for a zip file OPC package. - """ - def __init__(self, pkg_file): - super(_ZipPkgReader, self).__init__() - self._zipf = ZipFile(pkg_file, 'r') - - def blob_for(self, pack_uri): - """ - Return blob corresponding to *pack_uri*. Raises |ValueError| if no - matching member is present in zip archive. - """ - return self._zipf.read(pack_uri.membername) - - def close(self): - """ - Close the zip archive, releasing any resources it is using. - """ - self._zipf.close() - - @property - def content_types_xml(self): - """ - Return the `[Content_Types].xml` blob from the zip package. - """ - return self.blob_for(CONTENT_TYPES_URI) - - def rels_xml_for(self, source_uri): - """ - Return rels item XML for source with *source_uri* or None if no rels - item is present. - """ - try: - rels_xml = self.blob_for(source_uri.rels_uri) - except KeyError: - rels_xml = None - return rels_xml - - -class _ZipPkgWriter(PhysPkgWriter): - """ - Implements |PhysPkgWriter| interface for a zip file OPC package. - """ - def __init__(self, pkg_file): - super(_ZipPkgWriter, self).__init__() - self._zipf = ZipFile(pkg_file, 'w', compression=ZIP_DEFLATED) - - def close(self): - """ - Close the zip archive, flushing any pending physical writes and - releasing any resources it's using. - """ - self._zipf.close() - - def write(self, pack_uri, blob): - """ - Write *blob* to this zip package with the membername corresponding to - *pack_uri*. - """ - self._zipf.writestr(pack_uri.membername, blob) diff --git a/docx/opc/pkgwriter.py b/docx/opc/pkgwriter.py deleted file mode 100644 index fccda6cd8..000000000 --- a/docx/opc/pkgwriter.py +++ /dev/null @@ -1,125 +0,0 @@ -# encoding: utf-8 - -""" -Provides a low-level, write-only API to a serialized Open Packaging -Convention (OPC) package, essentially an implementation of OpcPackage.save() -""" - -from __future__ import absolute_import - -from .constants import CONTENT_TYPE as CT -from .oxml import CT_Types, serialize_part_xml -from .packuri import CONTENT_TYPES_URI, PACKAGE_URI -from .phys_pkg import PhysPkgWriter -from .shared import CaseInsensitiveDict -from .spec import default_content_types - - -class PackageWriter(object): - """ - Writes a zip-format OPC package to *pkg_file*, where *pkg_file* can be - either a path to a zip file (a string) or a file-like object. Its single - API method, :meth:`write`, is static, so this class is not intended to - be instantiated. - """ - @staticmethod - def write(pkg_file, pkg_rels, parts): - """ - Write a physical package (.pptx file) to *pkg_file* containing - *pkg_rels* and *parts* and a content types stream based on the - content types of the parts. - """ - phys_writer = PhysPkgWriter(pkg_file) - PackageWriter._write_content_types_stream(phys_writer, parts) - PackageWriter._write_pkg_rels(phys_writer, pkg_rels) - PackageWriter._write_parts(phys_writer, parts) - phys_writer.close() - - @staticmethod - def _write_content_types_stream(phys_writer, parts): - """ - Write ``[Content_Types].xml`` part to the physical package with an - appropriate content type lookup target for each part in *parts*. - """ - cti = _ContentTypesItem.from_parts(parts) - phys_writer.write(CONTENT_TYPES_URI, cti.blob) - - @staticmethod - def _write_parts(phys_writer, parts): - """ - Write the blob of each part in *parts* to the package, along with a - rels item for its relationships if and only if it has any. - """ - for part in parts: - phys_writer.write(part.partname, part.blob) - if len(part._rels): - phys_writer.write(part.partname.rels_uri, part._rels.xml) - - @staticmethod - def _write_pkg_rels(phys_writer, pkg_rels): - """ - Write the XML rels item for *pkg_rels* ('/_rels/.rels') to the - package. - """ - phys_writer.write(PACKAGE_URI.rels_uri, pkg_rels.xml) - - -class _ContentTypesItem(object): - """ - Service class that composes a content types item ([Content_Types].xml) - based on a list of parts. Not meant to be instantiated directly, its - single interface method is xml_for(), e.g. - ``_ContentTypesItem.xml_for(parts)``. - """ - def __init__(self): - self._defaults = CaseInsensitiveDict() - self._overrides = dict() - - @property - def blob(self): - """ - Return XML form of this content types item, suitable for storage as - ``[Content_Types].xml`` in an OPC package. - """ - return serialize_part_xml(self._element) - - @classmethod - def from_parts(cls, parts): - """ - Return content types XML mapping each part in *parts* to the - appropriate content type and suitable for storage as - ``[Content_Types].xml`` in an OPC package. - """ - cti = cls() - cti._defaults['rels'] = CT.OPC_RELATIONSHIPS - cti._defaults['xml'] = CT.XML - for part in parts: - cti._add_content_type(part.partname, part.content_type) - return cti - - def _add_content_type(self, partname, content_type): - """ - Add a content type for the part with *partname* and *content_type*, - using a default or override as appropriate. - """ - ext = partname.ext - if (ext.lower(), content_type) in default_content_types: - self._defaults[ext] = content_type - else: - self._overrides[partname] = content_type - - @property - def _element(self): - """ - Return XML form of this content types item, suitable for storage as - ``[Content_Types].xml`` in an OPC package. Although the sequence of - elements is not strictly significant, as an aid to testing and - readability Default elements are sorted by extension and Override - elements are sorted by partname. - """ - _types_elm = CT_Types.new() - for ext in sorted(self._defaults.keys()): - _types_elm.add_default(ext, self._defaults[ext]) - for partname in sorted(self._overrides.keys()): - _types_elm.add_override(partname, self._overrides[partname]) - return _types_elm diff --git a/docx/opc/shared.py b/docx/opc/shared.py deleted file mode 100644 index 55344483d..000000000 --- a/docx/opc/shared.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by opc modules. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class CaseInsensitiveDict(dict): - """ - Mapping type that behaves like dict except that it matches without respect - to the case of the key. E.g. cid['A'] == cid['a']. Note this is not - general-purpose, just complete enough to satisfy opc package needs. It - assumes str keys, and that it is created empty; keys passed in constructor - are not accounted for - """ - def __contains__(self, key): - return super(CaseInsensitiveDict, self).__contains__(key.lower()) - - def __getitem__(self, key): - return super(CaseInsensitiveDict, self).__getitem__(key.lower()) - - def __setitem__(self, key, value): - return super(CaseInsensitiveDict, self).__setitem__( - key.lower(), value - ) - - -def lazyproperty(f): - """ - @lazyprop decorator. Decorated method will be called only on first access - to calculate a cached property value. After that, the cached value is - returned. - """ - cache_attr_name = '_%s' % f.__name__ # like '_foobar' for prop 'foobar' - docstring = f.__doc__ - - def get_prop_value(obj): - try: - return getattr(obj, cache_attr_name) - except AttributeError: - value = f(obj) - setattr(obj, cache_attr_name, value) - return value - - return property(get_prop_value, doc=docstring) diff --git a/docx/opc/spec.py b/docx/opc/spec.py deleted file mode 100644 index 60fc38564..000000000 --- a/docx/opc/spec.py +++ /dev/null @@ -1,29 +0,0 @@ -# encoding: utf-8 - -""" -Provides mappings that embody aspects of the Open XML spec ISO/IEC 29500. -""" - -from .constants import CONTENT_TYPE as CT - - -default_content_types = ( - ('bin', CT.PML_PRINTER_SETTINGS), - ('bin', CT.SML_PRINTER_SETTINGS), - ('bin', CT.WML_PRINTER_SETTINGS), - ('bmp', CT.BMP), - ('emf', CT.X_EMF), - ('fntdata', CT.X_FONTDATA), - ('gif', CT.GIF), - ('jpe', CT.JPEG), - ('jpeg', CT.JPEG), - ('jpg', CT.JPEG), - ('png', CT.PNG), - ('rels', CT.OPC_RELATIONSHIPS), - ('tif', CT.TIFF), - ('tiff', CT.TIFF), - ('wdp', CT.MS_PHOTO), - ('wmf', CT.X_WMF), - ('xlsx', CT.SML_SHEET), - ('xml', CT.XML), -) diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py deleted file mode 100644 index c5938c7c8..000000000 --- a/docx/oxml/__init__.py +++ /dev/null @@ -1,163 +0,0 @@ -# encoding: utf-8 - -""" -Initializes oxml sub-package, including registering custom element classes -corresponding to Open XML elements. -""" - -from __future__ import absolute_import - -from lxml import etree - -from .ns import NamespacePrefixedTag, nsmap - - -# configure XML parser -element_class_lookup = etree.ElementNamespaceClassLookup() -oxml_parser = etree.XMLParser(remove_blank_text=True) -oxml_parser.set_element_class_lookup(element_class_lookup) - - -def parse_xml(xml): - """ - Return root lxml element obtained by parsing XML character string in - *xml*, which can be either a Python 2.x string or unicode. The custom - parser is used, so custom element classes are produced for elements in - *xml* that have them. - """ - root_element = etree.fromstring(xml, oxml_parser) - return root_element - - -def register_element_cls(tag, cls): - """ - Register *cls* to be constructed when the oxml parser encounters an - element with matching *tag*. *tag* is a string of the form - ``nspfx:tagroot``, e.g. ``'w:document'``. - """ - nspfx, tagroot = tag.split(':') - namespace = element_class_lookup.get_namespace(nsmap[nspfx]) - namespace[tagroot] = cls - - -def OxmlElement(nsptag_str, attrs=None, nsdecls=None): - """ - Return a 'loose' lxml element having the tag specified by *nsptag_str*. - *nsptag_str* must contain the standard namespace prefix, e.g. 'a:tbl'. - The resulting element is an instance of the custom element class for this - tag name if one is defined. A dictionary of attribute values may be - provided as *attrs*; they are set if present. All namespaces defined in - the dict *nsdecls* are declared in the element using the key as the - prefix and the value as the namespace name. If *nsdecls* is not provided, - a single namespace declaration is added based on the prefix on - *nsptag_str*. - """ - nsptag = NamespacePrefixedTag(nsptag_str) - if nsdecls is None: - nsdecls = nsptag.nsmap - return oxml_parser.makeelement( - nsptag.clark_name, attrib=attrs, nsmap=nsdecls - ) - - -# =========================================================================== -# custom element class mappings -# =========================================================================== - -from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String - -from docx.oxml.shape import ( - CT_Blip, CT_BlipFillProperties, CT_GraphicalObject, - CT_GraphicalObjectData, CT_Inline, CT_NonVisualDrawingProps, CT_Picture, - CT_PictureNonVisual, CT_Point2D, CT_PositiveSize2D, CT_ShapeProperties, - CT_Transform2D -) -register_element_cls('a:blip', CT_Blip) -register_element_cls('a:ext', CT_PositiveSize2D) -register_element_cls('a:graphic', CT_GraphicalObject) -register_element_cls('a:graphicData', CT_GraphicalObjectData) -register_element_cls('a:off', CT_Point2D) -register_element_cls('a:xfrm', CT_Transform2D) -register_element_cls('pic:blipFill', CT_BlipFillProperties) -register_element_cls('pic:cNvPr', CT_NonVisualDrawingProps) -register_element_cls('pic:nvPicPr', CT_PictureNonVisual) -register_element_cls('pic:pic', CT_Picture) -register_element_cls('pic:spPr', CT_ShapeProperties) -register_element_cls('wp:docPr', CT_NonVisualDrawingProps) -register_element_cls('wp:extent', CT_PositiveSize2D) -register_element_cls('wp:inline', CT_Inline) - -from docx.oxml.parts.document import CT_Body, CT_Document -register_element_cls('w:body', CT_Body) -register_element_cls('w:document', CT_Document) - -from docx.oxml.parts.numbering import ( - CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr -) -register_element_cls('w:abstractNumId', CT_DecimalNumber) -register_element_cls('w:ilvl', CT_DecimalNumber) -register_element_cls('w:lvlOverride', CT_NumLvl) -register_element_cls('w:num', CT_Num) -register_element_cls('w:numId', CT_DecimalNumber) -register_element_cls('w:numPr', CT_NumPr) -register_element_cls('w:numbering', CT_Numbering) -register_element_cls('w:startOverride', CT_DecimalNumber) - -from docx.oxml.parts.styles import CT_Style, CT_Styles -register_element_cls('w:style', CT_Style) -register_element_cls('w:styles', CT_Styles) - -from docx.oxml.section import CT_PageMar, CT_PageSz, CT_SectPr, CT_SectType -register_element_cls('w:pgMar', CT_PageMar) -register_element_cls('w:pgSz', CT_PageSz) -register_element_cls('w:sectPr', CT_SectPr) -register_element_cls('w:type', CT_SectType) - -from docx.oxml.table import ( - CT_Row, CT_Tbl, CT_TblGrid, CT_TblGridCol, CT_TblLayoutType, CT_TblPr, - CT_TblWidth, CT_Tc, CT_TcPr -) -register_element_cls('w:gridCol', CT_TblGridCol) -register_element_cls('w:tbl', CT_Tbl) -register_element_cls('w:tblGrid', CT_TblGrid) -register_element_cls('w:tblLayout', CT_TblLayoutType) -register_element_cls('w:tblPr', CT_TblPr) -register_element_cls('w:tblStyle', CT_String) -register_element_cls('w:tc', CT_Tc) -register_element_cls('w:tcPr', CT_TcPr) -register_element_cls('w:tcW', CT_TblWidth) -register_element_cls('w:tr', CT_Row) - -from docx.oxml.text import ( - CT_Br, CT_Jc, CT_P, CT_PPr, CT_R, CT_RPr, CT_Text, CT_Underline -) -register_element_cls('w:b', CT_OnOff) -register_element_cls('w:bCs', CT_OnOff) -register_element_cls('w:br', CT_Br) -register_element_cls('w:caps', CT_OnOff) -register_element_cls('w:cs', CT_OnOff) -register_element_cls('w:dstrike', CT_OnOff) -register_element_cls('w:emboss', CT_OnOff) -register_element_cls('w:i', CT_OnOff) -register_element_cls('w:iCs', CT_OnOff) -register_element_cls('w:imprint', CT_OnOff) -register_element_cls('w:jc', CT_Jc) -register_element_cls('w:noProof', CT_OnOff) -register_element_cls('w:oMath', CT_OnOff) -register_element_cls('w:outline', CT_OnOff) -register_element_cls('w:p', CT_P) -register_element_cls('w:pPr', CT_PPr) -register_element_cls('w:pStyle', CT_String) -register_element_cls('w:r', CT_R) -register_element_cls('w:rPr', CT_RPr) -register_element_cls('w:rStyle', CT_String) -register_element_cls('w:rtl', CT_OnOff) -register_element_cls('w:shadow', CT_OnOff) -register_element_cls('w:smallCaps', CT_OnOff) -register_element_cls('w:snapToGrid', CT_OnOff) -register_element_cls('w:specVanish', CT_OnOff) -register_element_cls('w:strike', CT_OnOff) -register_element_cls('w:t', CT_Text) -register_element_cls('w:u', CT_Underline) -register_element_cls('w:vanish', CT_OnOff) -register_element_cls('w:webHidden', CT_OnOff) diff --git a/docx/oxml/exceptions.py b/docx/oxml/exceptions.py deleted file mode 100644 index 4696f1e93..000000000 --- a/docx/oxml/exceptions.py +++ /dev/null @@ -1,16 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions for oxml sub-package -""" - - -class XmlchemyError(Exception): - """Generic error class.""" - - -class InvalidXmlError(XmlchemyError): - """ - Raised when invalid XML is encountered, such as on attempt to access a - missing required child element - """ diff --git a/docx/oxml/ns.py b/docx/oxml/ns.py deleted file mode 100644 index d4b3014db..000000000 --- a/docx/oxml/ns.py +++ /dev/null @@ -1,108 +0,0 @@ -# encoding: utf-8 - -""" -Namespace-related objects. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -nsmap = { - 'a': ('http://schemas.openxmlformats.org/drawingml/2006/main'), - 'c': ('http://schemas.openxmlformats.org/drawingml/2006/chart'), - 'dgm': ('http://schemas.openxmlformats.org/drawingml/2006/diagram'), - 'pic': ('http://schemas.openxmlformats.org/drawingml/2006/picture'), - 'r': ('http://schemas.openxmlformats.org/officeDocument/2006/relations' - 'hips'), - 'w': ('http://schemas.openxmlformats.org/wordprocessingml/2006/main'), - 'wp': ('http://schemas.openxmlformats.org/drawingml/2006/wordprocessing' - 'Drawing'), - 'xml': ('http://www.w3.org/XML/1998/namespace') -} - -pfxmap = dict((value, key) for key, value in nsmap.items()) - - -class NamespacePrefixedTag(str): - """ - Value object that knows the semantics of an XML tag having a namespace - prefix. - """ - def __new__(cls, nstag, *args): - return super(NamespacePrefixedTag, cls).__new__(cls, nstag) - - def __init__(self, nstag): - self._pfx, self._local_part = nstag.split(':') - self._ns_uri = nsmap[self._pfx] - - @property - def clark_name(self): - return '{%s}%s' % (self._ns_uri, self._local_part) - - @classmethod - def from_clark_name(cls, clark_name): - nsuri, local_name = clark_name[1:].split('}') - nstag = '%s:%s' % (pfxmap[nsuri], local_name) - return cls(nstag) - - @property - def local_part(self): - """ - Return the local part of the tag as a string. E.g. 'foobar' is - returned for tag 'f:foobar'. - """ - return self._local_part - - @property - def nsmap(self): - """ - Return a dict having a single member, mapping the namespace prefix of - this tag to it's namespace name (e.g. {'f': 'http://foo/bar'}). This - is handy for passing to xpath calls and other uses. - """ - return {self._pfx: self._ns_uri} - - @property - def nspfx(self): - """ - Return the string namespace prefix for the tag, e.g. 'f' is returned - for tag 'f:foobar'. - """ - return self._pfx - - @property - def nsuri(self): - """ - Return the namespace URI for the tag, e.g. 'http://foo/bar' would be - returned for tag 'f:foobar' if the 'f' prefix maps to - 'http://foo/bar' in nsmap. - """ - return self._ns_uri - - -def nsdecls(*prefixes): - """ - Return a string containing a namespace declaration for each of the - namespace prefix strings, e.g. 'p', 'ct', passed as *prefixes*. - """ - return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes]) - - -def nspfxmap(*nspfxs): - """ - Return a dict containing the subset namespace prefix mappings specified by - *nspfxs*. Any number of namespace prefixes can be supplied, e.g. - namespaces('a', 'r', 'p'). - """ - return dict((pfx, nsmap[pfx]) for pfx in nspfxs) - - -def qn(tag): - """ - Stands for "qualified name", a utility function to turn a namespace - prefixed tag name into a Clark-notation qualified tag name for lxml. For - example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``. - """ - prefix, tagroot = tag.split(':') - uri = nsmap[prefix] - return '{%s}%s' % (uri, tagroot) diff --git a/docx/oxml/parts/document.py b/docx/oxml/parts/document.py deleted file mode 100644 index ff5eedb91..000000000 --- a/docx/oxml/parts/document.py +++ /dev/null @@ -1,63 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes that correspond to the document part, e.g. -. -""" - -from ..table import CT_Tbl -from ..xmlchemy import BaseOxmlElement, ZeroOrOne, ZeroOrMore - - -class CT_Document(BaseOxmlElement): - """ - ```` element, the root element of a document.xml file. - """ - body = ZeroOrOne('w:body') - - @property - def sectPr_lst(self): - """ - Return a list containing a reference to each ```` element - in the document, in the order encountered. - """ - return self.xpath('.//w:sectPr') - - -class CT_Body(BaseOxmlElement): - """ - ````, the container element for the main document story in - ``document.xml``. - """ - p = ZeroOrMore('w:p', successors=('w:sectPr',)) - tbl = ZeroOrMore('w:tbl', successors=('w:sectPr',)) - sectPr = ZeroOrOne('w:sectPr', successors=()) - - def add_section_break(self): - """ - Return the current ```` element after adding a clone of it - in a new ```` element appended to the block content elements. - Note that the "current" ```` will always be the sentinel - sectPr in this case since we're always working at the end of the - block content. - """ - sentinel_sectPr = self.get_or_add_sectPr() - cloned_sectPr = sentinel_sectPr.clone() - p = self.add_p() - p.set_sectPr(cloned_sectPr) - return sentinel_sectPr - - def _new_tbl(self): - return CT_Tbl.new() - - def clear_content(self): - """ - Remove all content child elements from this element. Leave - the element if it is present. - """ - if self.sectPr is not None: - content_elms = self[:-1] - else: - content_elms = self[:] - for content_elm in content_elms: - self.remove(content_elm) diff --git a/docx/oxml/parts/numbering.py b/docx/oxml/parts/numbering.py deleted file mode 100644 index 31d97dbce..000000000 --- a/docx/oxml/parts/numbering.py +++ /dev/null @@ -1,131 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to the numbering part -""" - -from .. import OxmlElement -from ..shared import CT_DecimalNumber -from ..simpletypes import ST_DecimalNumber -from ..xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, RequiredAttribute, ZeroOrMore, ZeroOrOne -) - - -class CT_Num(BaseOxmlElement): - """ - ```` element, which represents a concrete list definition - instance, having a required child that references an - abstract numbering definition that defines most of the formatting details. - """ - abstractNumId = OneAndOnlyOne('w:abstractNumId') - lvlOverride = ZeroOrMore('w:lvlOverride') - numId = RequiredAttribute('w:numId', ST_DecimalNumber) - - def add_lvlOverride(self, ilvl): - """ - Return a newly added CT_NumLvl () element having its - ``ilvl`` attribute set to *ilvl*. - """ - return self._add_lvlOverride(ilvl=ilvl) - - @classmethod - def new(cls, num_id, abstractNum_id): - """ - Return a new ```` element having numId of *num_id* and having - a ```` child with val attribute set to - *abstractNum_id*. - """ - num = OxmlElement('w:num') - num.numId = num_id - abstractNumId = CT_DecimalNumber.new( - 'w:abstractNumId', abstractNum_id - ) - num.append(abstractNumId) - return num - - -class CT_NumLvl(BaseOxmlElement): - """ - ```` element, which identifies a level in a list - definition to override with settings it contains. - """ - startOverride = ZeroOrOne('w:startOverride', successors=('w:lvl',)) - ilvl = RequiredAttribute('w:ilvl', ST_DecimalNumber) - - def add_startOverride(self, val): - """ - Return a newly added CT_DecimalNumber element having tagname - ``w:startOverride`` and ``val`` attribute set to *val*. - """ - return self._add_startOverride(val=val) - - -class CT_NumPr(BaseOxmlElement): - """ - A ```` element, a container for numbering properties applied to - a paragraph. - """ - ilvl = ZeroOrOne('w:ilvl', successors=( - 'w:numId', 'w:numberingChange', 'w:ins' - )) - numId = ZeroOrOne('w:numId', successors=('w:numberingChange', 'w:ins')) - - # @ilvl.setter - # def _set_ilvl(self, val): - # """ - # Get or add a child and set its ``w:val`` attribute to *val*. - # """ - # ilvl = self.get_or_add_ilvl() - # ilvl.val = val - - # @numId.setter - # def numId(self, val): - # """ - # Get or add a child and set its ``w:val`` attribute to - # *val*. - # """ - # numId = self.get_or_add_numId() - # numId.val = val - - -class CT_Numbering(BaseOxmlElement): - """ - ```` element, the root element of a numbering part, i.e. - numbering.xml - """ - num = ZeroOrMore('w:num', successors=('w:numIdMacAtCleanup',)) - - def add_num(self, abstractNum_id): - """ - Return a newly added CT_Num () element referencing the - abstract numbering definition identified by *abstractNum_id*. - """ - next_num_id = self._next_numId - num = CT_Num.new(next_num_id, abstractNum_id) - return self._insert_num(num) - - def num_having_numId(self, numId): - """ - Return the ```` child element having ``numId`` attribute - matching *numId*. - """ - xpath = './w:num[@w:numId="%d"]' % numId - try: - return self.xpath(xpath)[0] - except IndexError: - raise KeyError('no element with numId %d' % numId) - - @property - def _next_numId(self): - """ - The first ``numId`` unused by a ```` element, starting at - 1 and filling any gaps in numbering between existing ```` - elements. - """ - numId_strs = self.xpath('./w:num/@w:numId') - num_ids = [int(numId_str) for numId_str in numId_strs] - for num in range(1, len(num_ids)+2): - if num not in num_ids: - break - return num diff --git a/docx/oxml/parts/styles.py b/docx/oxml/parts/styles.py deleted file mode 100644 index ed3054f13..000000000 --- a/docx/oxml/parts/styles.py +++ /dev/null @@ -1,35 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to the styles part -""" - -from ..xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne - - -class CT_Style(BaseOxmlElement): - """ - A ```` element, representing a style definition - """ - pPr = ZeroOrOne('w:pPr', successors=( - 'w:rPr', 'w:tblPr', 'w:trPr', 'w:tcPr', 'w:tblStylePr' - )) - - -class CT_Styles(BaseOxmlElement): - """ - ```` element, the root element of a styles part, i.e. - styles.xml - """ - style = ZeroOrMore('w:style', successors=()) - - def style_having_styleId(self, styleId): - """ - Return the ```` child element having ``styleId`` attribute - matching *styleId*. - """ - xpath = './w:style[@w:styleId="%s"]' % styleId - try: - return self.xpath(xpath)[0] - except IndexError: - raise KeyError('no element with styleId %d' % styleId) diff --git a/docx/oxml/section.py b/docx/oxml/section.py deleted file mode 100644 index cf76b67ed..000000000 --- a/docx/oxml/section.py +++ /dev/null @@ -1,264 +0,0 @@ -# encoding: utf-8 - -""" -Section-related custom element classes. -""" - -from __future__ import absolute_import, print_function - -from copy import deepcopy - -from ..enum.section import WD_ORIENTATION, WD_SECTION_START -from .simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure -from .xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrOne - - -class CT_PageMar(BaseOxmlElement): - """ - ```` element, defining page margins. - """ - top = OptionalAttribute('w:top', ST_SignedTwipsMeasure) - right = OptionalAttribute('w:right', ST_TwipsMeasure) - bottom = OptionalAttribute('w:bottom', ST_SignedTwipsMeasure) - left = OptionalAttribute('w:left', ST_TwipsMeasure) - header = OptionalAttribute('w:header', ST_TwipsMeasure) - footer = OptionalAttribute('w:footer', ST_TwipsMeasure) - gutter = OptionalAttribute('w:gutter', ST_TwipsMeasure) - - -class CT_PageSz(BaseOxmlElement): - """ - ```` element, defining page dimensions and orientation. - """ - w = OptionalAttribute('w:w', ST_TwipsMeasure) - h = OptionalAttribute('w:h', ST_TwipsMeasure) - orient = OptionalAttribute( - 'w:orient', WD_ORIENTATION, default=WD_ORIENTATION.PORTRAIT - ) - - -class CT_SectPr(BaseOxmlElement): - """ - ```` element, the container element for section properties. - """ - __child_sequence__ = ( - 'w:footnotePr', 'w:endnotePr', 'w:type', 'w:pgSz', 'w:pgMar', - 'w:paperSrc', 'w:pgBorders', 'w:lnNumType', 'w:pgNumType', 'w:cols', - 'w:formProt', 'w:vAlign', 'w:noEndnote', 'w:titlePg', - 'w:textDirection', 'w:bidi', 'w:rtlGutter', 'w:docGrid', - 'w:printerSettings', 'w:sectPrChange', - ) - type = ZeroOrOne('w:type', successors=( - __child_sequence__[__child_sequence__.index('w:type')+1:] - )) - pgSz = ZeroOrOne('w:pgSz', successors=( - __child_sequence__[__child_sequence__.index('w:pgSz')+1:] - )) - pgMar = ZeroOrOne('w:pgMar', successors=( - __child_sequence__[__child_sequence__.index('w:pgMar')+1:] - )) - - @property - def bottom_margin(self): - """ - The value of the ``w:bottom`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.bottom - - @bottom_margin.setter - def bottom_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.bottom = value - - def clone(self): - """ - Return an exact duplicate of this ```` element tree - suitable for use in adding a section break. All rsid* attributes are - removed from the root ```` element. - """ - clone_sectPr = deepcopy(self) - clone_sectPr.attrib.clear() - return clone_sectPr - - @property - def footer(self): - """ - The value of the ``w:footer`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.footer - - @footer.setter - def footer(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.footer = value - - @property - def gutter(self): - """ - The value of the ``w:gutter`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.gutter - - @gutter.setter - def gutter(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.gutter = value - - @property - def header(self): - """ - The value of the ``w:header`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.header - - @header.setter - def header(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.header = value - - @property - def left_margin(self): - """ - The value of the ``w:left`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.left - - @left_margin.setter - def left_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.left = value - - @property - def right_margin(self): - """ - The value of the ``w:right`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.right - - @right_margin.setter - def right_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.right = value - - @property - def orientation(self): - """ - The member of the ``WD_ORIENTATION`` enumeration corresponding to the - value of the ``orient`` attribute of the ```` child element, - or ``WD_ORIENTATION.PORTRAIT`` if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return WD_ORIENTATION.PORTRAIT - return pgSz.orient - - @orientation.setter - def orientation(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.orient = value - - @property - def page_height(self): - """ - Value in EMU of the ``h`` attribute of the ```` child - element, or |None| if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return None - return pgSz.h - - @page_height.setter - def page_height(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.h = value - - @property - def page_width(self): - """ - Value in EMU of the ``w`` attribute of the ```` child - element, or |None| if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return None - return pgSz.w - - @page_width.setter - def page_width(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.w = value - - @property - def start_type(self): - """ - The member of the ``WD_SECTION_START`` enumeration corresponding to - the value of the ``val`` attribute of the ```` child element, - or ``WD_SECTION_START.NEW_PAGE`` if not present. - """ - type = self.type - if type is None or type.val is None: - return WD_SECTION_START.NEW_PAGE - return type.val - - @start_type.setter - def start_type(self, value): - if value is None or value is WD_SECTION_START.NEW_PAGE: - self._remove_type() - return - type = self.get_or_add_type() - type.val = value - - @property - def top_margin(self): - """ - The value of the ``w:top`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.top - - @top_margin.setter - def top_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.top = value - - -class CT_SectType(BaseOxmlElement): - """ - ```` element, defining the section start type. - """ - val = OptionalAttribute('w:val', WD_SECTION_START) diff --git a/docx/oxml/shape.py b/docx/oxml/shape.py deleted file mode 100644 index ae58dd59d..000000000 --- a/docx/oxml/shape.py +++ /dev/null @@ -1,272 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes for shape-related elements like ```` -""" - -from . import parse_xml -from .ns import nsdecls -from .simpletypes import ( - ST_Coordinate, ST_DrawingElementId, ST_PositiveCoordinate, - ST_RelationshipId, XsdString, XsdToken -) -from .xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, OptionalAttribute, RequiredAttribute, - ZeroOrOne -) - - -class CT_Blip(BaseOxmlElement): - """ - ```` element, specifies image source and adjustments such as - alpha and tint. - """ - embed = OptionalAttribute('r:embed', ST_RelationshipId) - link = OptionalAttribute('r:link', ST_RelationshipId) - - -class CT_BlipFillProperties(BaseOxmlElement): - """ - ```` element, specifies picture properties - """ - blip = ZeroOrOne('a:blip', successors=( - 'a:srcRect', 'a:tile', 'a:stretch' - )) - - -class CT_GraphicalObject(BaseOxmlElement): - """ - ```` element, container for a DrawingML object - """ - graphicData = OneAndOnlyOne('a:graphicData') - - -class CT_GraphicalObjectData(BaseOxmlElement): - """ - ```` element, container for the XML of a DrawingML object - """ - pic = ZeroOrOne('pic:pic') - uri = RequiredAttribute('uri', XsdToken) - - -class CT_Inline(BaseOxmlElement): - """ - ```` element, container for an inline shape. - """ - extent = OneAndOnlyOne('wp:extent') - docPr = OneAndOnlyOne('wp:docPr') - graphic = OneAndOnlyOne('a:graphic') - - @classmethod - def new(cls, cx, cy, shape_id, pic): - """ - Return a new ```` element populated with the values passed - as parameters. - """ - inline = parse_xml(cls._inline_xml()) - inline.extent.cx = cx - inline.extent.cy = cy - inline.docPr.id = shape_id - inline.docPr.name = 'Picture %d' % shape_id - inline.graphic.graphicData.uri = ( - 'http://schemas.openxmlformats.org/drawingml/2006/picture' - ) - inline.graphic.graphicData._insert_pic(pic) - return inline - - @classmethod - def _inline_xml(cls): - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '' % nsdecls('wp', 'a', 'pic', 'r') - ) - - -class CT_NonVisualDrawingProps(BaseOxmlElement): - """ - Used for ```` element, and perhaps others. Specifies the id and - name of a DrawingML drawing. - """ - id = RequiredAttribute('id', ST_DrawingElementId) - name = RequiredAttribute('name', XsdString) - - -class CT_NonVisualPictureProperties(BaseOxmlElement): - """ - ```` element, specifies picture locking and resize - behaviors. - """ - - -class CT_Picture(BaseOxmlElement): - """ - ```` element, a DrawingML picture - """ - nvPicPr = OneAndOnlyOne('pic:nvPicPr') - blipFill = OneAndOnlyOne('pic:blipFill') - spPr = OneAndOnlyOne('pic:spPr') - - @classmethod - def new(cls, pic_id, filename, rId, cx, cy): - """ - Return a new ```` element populated with the minimal - contents required to define a viable picture element, based on the - values passed as parameters. - """ - pic = parse_xml(cls._pic_xml()) - pic.nvPicPr.cNvPr.id = pic_id - pic.nvPicPr.cNvPr.name = filename - pic.blipFill.blip.embed = rId - pic.spPr.cx = cx - pic.spPr.cy = cy - return pic - - @classmethod - def _pic_xml(cls): - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '' % nsdecls('pic', 'a', 'r') - ) - - -class CT_PictureNonVisual(BaseOxmlElement): - """ - ```` element, non-visual picture properties - """ - cNvPr = OneAndOnlyOne('pic:cNvPr') - - -class CT_Point2D(BaseOxmlElement): - """ - Used for ```` element, and perhaps others. Specifies an x, y - coordinate (point). - """ - x = RequiredAttribute('x', ST_Coordinate) - y = RequiredAttribute('y', ST_Coordinate) - - -class CT_PositiveSize2D(BaseOxmlElement): - """ - Used for ```` element, and perhaps others later. Specifies the - size of a DrawingML drawing. - """ - cx = RequiredAttribute('cx', ST_PositiveCoordinate) - cy = RequiredAttribute('cy', ST_PositiveCoordinate) - - -class CT_PresetGeometry2D(BaseOxmlElement): - """ - ```` element, specifies an preset autoshape geometry, such - as ``rect``. - """ - - -class CT_RelativeRect(BaseOxmlElement): - """ - ```` element, specifying picture should fill containing - rectangle shape. - """ - - -class CT_ShapeProperties(BaseOxmlElement): - """ - ```` element, specifies size and shape of picture container. - """ - xfrm = ZeroOrOne('a:xfrm', successors=( - 'a:custGeom', 'a:prstGeom', 'a:ln', 'a:effectLst', 'a:effectDag', - 'a:scene3d', 'a:sp3d', 'a:extLst' - )) - - @property - def cx(self): - """ - Shape width as an instance of Emu, or None if not present. - """ - xfrm = self.xfrm - if xfrm is None: - return None - return xfrm.cx - - @cx.setter - def cx(self, value): - xfrm = self.get_or_add_xfrm() - xfrm.cx = value - - @property - def cy(self): - """ - Shape height as an instance of Emu, or None if not present. - """ - xfrm = self.xfrm - if xfrm is None: - return None - return xfrm.cy - - @cy.setter - def cy(self, value): - xfrm = self.get_or_add_xfrm() - xfrm.cy = value - - -class CT_StretchInfoProperties(BaseOxmlElement): - """ - ```` element, specifies how picture should fill its containing - shape. - """ - - -class CT_Transform2D(BaseOxmlElement): - """ - ```` element, specifies size and shape of picture container. - """ - off = ZeroOrOne('a:off', successors=('a:ext',)) - ext = ZeroOrOne('a:ext', successors=()) - - @property - def cx(self): - ext = self.ext - if ext is None: - return None - return ext.cx - - @cx.setter - def cx(self, value): - ext = self.get_or_add_ext() - ext.cx = value - - @property - def cy(self): - ext = self.ext - if ext is None: - return None - return ext.cy - - @cy.setter - def cy(self, value): - ext = self.get_or_add_ext() - ext.cy = value diff --git a/docx/oxml/shared.py b/docx/oxml/shared.py deleted file mode 100644 index 1e21ba366..000000000 --- a/docx/oxml/shared.py +++ /dev/null @@ -1,55 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by modules in the docx.oxml subpackage. -""" - -from __future__ import absolute_import - -from . import OxmlElement -from .ns import qn -from .simpletypes import ST_DecimalNumber, ST_OnOff, ST_String -from .xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute - - -class CT_DecimalNumber(BaseOxmlElement): - """ - Used for ````, ````, ```` and several - others, containing a text representation of a decimal number (e.g. 42) in - its ``val`` attribute. - """ - val = RequiredAttribute('w:val', ST_DecimalNumber) - - @classmethod - def new(cls, nsptagname, val): - """ - Return a new ``CT_DecimalNumber`` element having tagname *nsptagname* - and ``val`` attribute set to *val*. - """ - return OxmlElement(nsptagname, attrs={qn('w:val'): str(val)}) - - -class CT_OnOff(BaseOxmlElement): - """ - Used for ````, ```` elements and others, containing a bool-ish - string in its ``val`` attribute, xsd:boolean plus 'on' and 'off'. - """ - val = OptionalAttribute('w:val', ST_OnOff, default=True) - - -class CT_String(BaseOxmlElement): - """ - Used for ```` and ```` elements and others, - containing a style name in its ``val`` attribute. - """ - val = RequiredAttribute('w:val', ST_String) - - @classmethod - def new(cls, nsptagname, val): - """ - Return a new ``CT_String`` element with tagname *nsptagname* and - ``val`` attribute set to *val*. - """ - elm = OxmlElement(nsptagname) - elm.val = val - return elm diff --git a/docx/oxml/simpletypes.py b/docx/oxml/simpletypes.py deleted file mode 100644 index 07b51d533..000000000 --- a/docx/oxml/simpletypes.py +++ /dev/null @@ -1,317 +0,0 @@ -# encoding: utf-8 - -""" -Simple type classes, providing validation and format translation for values -stored in XML element attributes. Naming generally corresponds to the simple -type in the associated XML schema. -""" - -from __future__ import absolute_import, print_function - -from ..exceptions import InvalidXmlError -from ..shared import Emu, Twips - - -class BaseSimpleType(object): - - @classmethod - def from_xml(cls, str_value): - return cls.convert_from_xml(str_value) - - @classmethod - def to_xml(cls, value): - cls.validate(value) - str_value = cls.convert_to_xml(value) - return str_value - - @classmethod - def validate_int(cls, value): - if not isinstance(value, int): - raise TypeError( - "value must be , got %s" % type(value) - ) - - @classmethod - def validate_int_in_range(cls, value, min_inclusive, max_inclusive): - cls.validate_int(value) - if value < min_inclusive or value > max_inclusive: - raise ValueError( - "value must be in range %d to %d inclusive, got %d" % - (min_inclusive, max_inclusive, value) - ) - - @classmethod - def validate_string(cls, value): - if isinstance(value, str): - return value - try: - if isinstance(value, basestring): - return value - except NameError: # means we're on Python 3 - pass - raise TypeError( - "value must be a string, got %s" % type(value) - ) - - -class BaseStringType(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - return str_value - - @classmethod - def convert_to_xml(cls, value): - return value - - @classmethod - def validate(cls, value): - cls.validate_string(value) - - -class BaseIntType(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - return int(str_value) - - @classmethod - def convert_to_xml(cls, value): - return str(value) - - @classmethod - def validate(cls, value): - cls.validate_int(value) - - -class XsdAnyUri(BaseStringType): - """ - There's a regular expression this is supposed to meet but so far thinking - spending cycles on validating wouldn't be worth it for the number of - programming errors it would catch. - """ - - -class XsdBoolean(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - if str_value not in ('1', '0', 'true', 'false'): - raise InvalidXmlError( - "value must be one of '1', '0', 'true' or 'false', got '%s'" - % str_value - ) - return str_value in ('1', 'true') - - @classmethod - def convert_to_xml(cls, value): - return {True: '1', False: '0'}[value] - - @classmethod - def validate(cls, value): - if value not in (True, False): - raise TypeError( - "only True or False (and possibly None) may be assigned, got" - " '%s'" % value - ) - - -class XsdId(BaseStringType): - """ - String that must begin with a letter or underscore and cannot contain any - colons. Not fully validated because not used in external API. - """ - pass - - -class XsdInt(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, -2147483648, 2147483647) - - -class XsdLong(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range( - value, -9223372036854775808, 9223372036854775807 - ) - - -class XsdString(BaseStringType): - pass - - -class XsdToken(BaseStringType): - """ - xsd:string with whitespace collapsing, e.g. multiple spaces reduced to - one, leading and trailing space stripped. - """ - pass - - -class XsdUnsignedInt(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 4294967295) - - -class XsdUnsignedLong(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 18446744073709551615) - - -class ST_BrClear(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('none', 'left', 'right', 'all') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_BrType(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('page', 'column', 'textWrapping') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_Coordinate(BaseIntType): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Emu(int(str_value)) - - @classmethod - def validate(cls, value): - ST_CoordinateUnqualified.validate(value) - - -class ST_CoordinateUnqualified(XsdLong): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, -27273042329600, 27273042316900) - - -class ST_DecimalNumber(XsdInt): - pass - - -class ST_DrawingElementId(XsdUnsignedInt): - pass - - -class ST_OnOff(XsdBoolean): - - @classmethod - def convert_from_xml(cls, str_value): - if str_value not in ('1', '0', 'true', 'false', 'on', 'off'): - raise InvalidXmlError( - "value must be one of '1', '0', 'true', 'false', 'on', or 'o" - "ff', got '%s'" % str_value - ) - return str_value in ('1', 'true', 'on') - - -class ST_PositiveCoordinate(XsdLong): - - @classmethod - def convert_from_xml(cls, str_value): - return Emu(int(str_value)) - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 27273042316900) - - -class ST_RelationshipId(XsdString): - pass - - -class ST_SignedTwipsMeasure(XsdInt): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Twips(int(str_value)) - - @classmethod - def convert_to_xml(cls, value): - emu = Emu(value) - twips = emu.twips - return str(twips) - - -class ST_String(XsdString): - pass - - -class ST_TblLayoutType(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('fixed', 'autofit') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_TblWidth(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('auto', 'dxa', 'nil', 'pct') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_TwipsMeasure(XsdUnsignedLong): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Twips(int(str_value)) - - @classmethod - def convert_to_xml(cls, value): - emu = Emu(value) - twips = emu.twips - return str(twips) - - -class ST_UniversalMeasure(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - float_part, units_part = str_value[:-2], str_value[-2:] - quantity = float(float_part) - multiplier = { - 'mm': 36000, 'cm': 360000, 'in': 914400, 'pt': 12700, - 'pc': 152400, 'pi': 152400 - }[units_part] - emu_value = Emu(int(round(quantity * multiplier))) - return emu_value diff --git a/docx/oxml/table.py b/docx/oxml/table.py deleted file mode 100644 index f2fbd540f..000000000 --- a/docx/oxml/table.py +++ /dev/null @@ -1,250 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes for tables -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from . import parse_xml -from .ns import nsdecls -from ..shared import Emu, Twips -from .simpletypes import ( - ST_TblLayoutType, ST_TblWidth, ST_TwipsMeasure, XsdInt -) -from .xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, OneOrMore, OptionalAttribute, - RequiredAttribute, ZeroOrOne, ZeroOrMore -) - - -class CT_Row(BaseOxmlElement): - """ - ```` element - """ - tc = ZeroOrMore('w:tc') - - def _new_tc(self): - return CT_Tc.new() - - -class CT_Tbl(BaseOxmlElement): - """ - ```` element - """ - tblPr = OneAndOnlyOne('w:tblPr') - tblGrid = OneAndOnlyOne('w:tblGrid') - tr = ZeroOrMore('w:tr') - - @classmethod - def new(cls): - """ - Return a new ```` element, containing the required - ```` and ```` child elements. - """ - tbl = parse_xml(cls._tbl_xml()) - return tbl - - @classmethod - def _tbl_xml(cls): - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - '' % nsdecls('w') - ) - - -class CT_TblGrid(BaseOxmlElement): - """ - ```` element, child of ````, holds ```` - elements that define column count, width, etc. - """ - gridCol = ZeroOrMore('w:gridCol', successors=('w:tblGridChange',)) - - -class CT_TblGridCol(BaseOxmlElement): - """ - ```` element, child of ````, defines a table - column. - """ - w = OptionalAttribute('w:w', ST_TwipsMeasure) - - -class CT_TblLayoutType(BaseOxmlElement): - """ - ```` element, specifying whether column widths are fixed or - can be automatically adjusted based on content. - """ - type = OptionalAttribute('w:type', ST_TblLayoutType) - - -class CT_TblPr(BaseOxmlElement): - """ - ```` element, child of ````, holds child elements that - define table properties such as style and borders. - """ - tblStyle = ZeroOrOne('w:tblStyle', successors=( - 'w:tblpPr', 'w:tblOverlap', 'w:bidiVisual', 'w:tblStyleRowBandSize', - 'w:tblStyleColBandSize', 'w:tblW', 'w:jc', 'w:tblCellSpacing', - 'w:tblInd', 'w:tblBorders', 'w:shd', 'w:tblLayout', 'w:tblCellMar', - 'w:tblLook', 'w:tblCaption', 'w:tblDescription', 'w:tblPrChange' - )) - tblLayout = ZeroOrOne('w:tblLayout', successors=( - 'w:tblLayout', 'w:tblCellMar', 'w:tblLook', 'w:tblCaption', - 'w:tblDescription', 'w:tblPrChange' - )) - - @property - def autofit(self): - """ - Return |False| if there is a ```` child with ``w:type`` - attribute set to ``'fixed'``. Otherwise return |True|. - """ - tblLayout = self.tblLayout - if tblLayout is None: - return True - return False if tblLayout.type == 'fixed' else True - - @autofit.setter - def autofit(self, value): - tblLayout = self.get_or_add_tblLayout() - tblLayout.type = 'autofit' if value else 'fixed' - - @property - def style(self): - """ - Return the value of the ``val`` attribute of the ```` - child or |None| if not present. - """ - tblStyle = self.tblStyle - if tblStyle is None: - return None - return tblStyle.val - - @style.setter - def style(self, value): - self._remove_tblStyle() - if value is None: - return - self._add_tblStyle(val=value) - - -class CT_TblWidth(BaseOxmlElement): - """ - Used for ```` and ```` elements and many others, to - specify a table-related width. - """ - # the type for `w` attr is actually ST_MeasurementOrPercent, but using - # XsdInt for now because only dxa (twips) values are being used. It's not - # entirely clear what the semantics are for other values like -01.4mm - w = RequiredAttribute('w:w', XsdInt) - type = RequiredAttribute('w:type', ST_TblWidth) - - @property - def width(self): - """ - Return the EMU length value represented by the combined ``w:w`` and - ``w:type`` attributes. - """ - if self.type != 'dxa': - return None - return Twips(self.w) - - @width.setter - def width(self, value): - self.type = 'dxa' - self.w = Emu(value).twips - - -class CT_Tc(BaseOxmlElement): - """ - ```` table cell element - """ - tcPr = ZeroOrOne('w:tcPr') # bunches of successors, overriding insert - p = OneOrMore('w:p') - tbl = OneOrMore('w:tbl') - - def _insert_tcPr(self, tcPr): - """ - ``tcPr`` has a bunch of successors, but it comes first if it appears, - so just overriding and using insert(0, ...) rather than spelling out - successors. - """ - self.insert(0, tcPr) - return tcPr - - def _new_tbl(self): - return CT_Tbl.new() - - def clear_content(self): - """ - Remove all content child elements, preserving the ```` - element if present. Note that this leaves the ```` element in - an invalid state because it doesn't contain at least one block-level - element. It's up to the caller to add a ````child element as the - last content element. - """ - new_children = [] - tcPr = self.tcPr - if tcPr is not None: - new_children.append(tcPr) - self[:] = new_children - - @classmethod - def new(cls): - """ - Return a new ```` element, containing an empty paragraph as the - required EG_BlockLevelElt. - """ - return parse_xml( - '\n' - ' \n' - '' % nsdecls('w') - ) - - @property - def width(self): - """ - Return the EMU length value represented in the ``./w:tcPr/w:tcW`` - child element or |None| if not present. - """ - tcPr = self.tcPr - if tcPr is None: - return None - return tcPr.width - - @width.setter - def width(self, value): - tcPr = self.get_or_add_tcPr() - tcPr.width = value - - -class CT_TcPr(BaseOxmlElement): - """ - ```` element, defining table cell properties - """ - tcW = ZeroOrOne('w:tcW', successors=( - 'w:gridSpan', 'w:hMerge', 'w:vMerge', 'w:tcBorders', 'w:shd', - 'w:noWrap', 'w:tcMar', 'w:textDirection', 'w:tcFitText', 'w:vAlign', - 'w:hideMark', 'w:headers', 'w:cellIns', 'w:cellDel', 'w:cellMerge', - 'w:tcPrChange' - )) - - @property - def width(self): - """ - Return the EMU length value represented in the ```` child - element or |None| if not present or its type is not 'dxa'. - """ - tcW = self.tcW - if tcW is None: - return None - return tcW.width - - @width.setter - def width(self, value): - tcW = self.get_or_add_tcW() - tcW.width = value diff --git a/docx/oxml/text.py b/docx/oxml/text.py deleted file mode 100644 index 9fdd1d64b..000000000 --- a/docx/oxml/text.py +++ /dev/null @@ -1,431 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to text, such as paragraph (CT_P) and runs -(CT_R). -""" - -from ..enum.text import WD_ALIGN_PARAGRAPH, WD_UNDERLINE -from .ns import qn -from .simpletypes import ST_BrClear, ST_BrType -from .xmlchemy import ( - BaseOxmlElement, OptionalAttribute, OxmlElement, RequiredAttribute, - ZeroOrMore, ZeroOrOne -) - - -class CT_Br(BaseOxmlElement): - """ - ```` element, indicating a line, page, or column break in a run. - """ - type = OptionalAttribute('w:type', ST_BrType) - clear = OptionalAttribute('w:clear', ST_BrClear) - - -class CT_Jc(BaseOxmlElement): - """ - ```` element, specifying paragraph justification. - """ - val = RequiredAttribute('w:val', WD_ALIGN_PARAGRAPH) - - -class CT_P(BaseOxmlElement): - """ - ```` element, containing the properties and text for a paragraph. - """ - pPr = ZeroOrOne('w:pPr') - r = ZeroOrMore('w:r') - - def _insert_pPr(self, pPr): - self.insert(0, pPr) - return pPr - - def add_p_before(self): - """ - Return a new ```` element inserted directly prior to this one. - """ - new_p = OxmlElement('w:p') - self.addprevious(new_p) - return new_p - - @property - def alignment(self): - """ - The value of the ```` grandchild element or |None| if not - present. - """ - pPr = self.pPr - if pPr is None: - return None - return pPr.alignment - - @alignment.setter - def alignment(self, value): - pPr = self.get_or_add_pPr() - pPr.alignment = value - - def clear_content(self): - """ - Remove all child elements, except the ```` element if present. - """ - for child in self[:]: - if child.tag == qn('w:pPr'): - continue - self.remove(child) - - def set_sectPr(self, sectPr): - """ - Unconditionally replace or add *sectPr* as a grandchild in the - correct sequence. - """ - pPr = self.get_or_add_pPr() - pPr._remove_sectPr() - pPr._insert_sectPr(sectPr) - - @property - def style(self): - """ - String contained in w:val attribute of ./w:pPr/w:pStyle grandchild, - or |None| if not present. - """ - pPr = self.pPr - if pPr is None: - return None - return pPr.style - - @style.setter - def style(self, style): - pPr = self.get_or_add_pPr() - pPr.style = style - - -class CT_PPr(BaseOxmlElement): - """ - ```` element, containing the properties for a paragraph. - """ - __child_sequence__ = ( - 'w:pStyle', 'w:keepNext', 'w:keepLines', 'w:pageBreakBefore', - 'w:framePr', 'w:widowControl', 'w:numPr', 'w:suppressLineNumbers', - 'w:pBdr', 'w:shd', 'w:tabs', 'w:suppressAutoHyphens', 'w:kinsoku', - 'w:wordWrap', 'w:overflowPunct', 'w:topLinePunct', 'w:autoSpaceDE', - 'w:autoSpaceDN', 'w:bidi', 'w:adjustRightInd', 'w:snapToGrid', - 'w:spacing', 'w:ind', 'w:contextualSpacing', 'w:mirrorIndents', - 'w:suppressOverlap', 'w:jc', 'w:textDirection', 'w:textAlignment', - 'w:textboxTightWrap', 'w:outlineLvl', 'w:divId', 'w:cnfStyle', - 'w:rPr', 'w:sectPr', 'w:pPrChange' - ) - pStyle = ZeroOrOne('w:pStyle') - numPr = ZeroOrOne('w:numPr', successors=__child_sequence__[7:]) - jc = ZeroOrOne('w:jc', successors=__child_sequence__[27:]) - sectPr = ZeroOrOne('w:sectPr', successors=('w:pPrChange',)) - - def _insert_pStyle(self, pStyle): - self.insert(0, pStyle) - return pStyle - - @property - def alignment(self): - """ - The value of the ```` child element or |None| if not present. - """ - jc = self.jc - if jc is None: - return None - return jc.val - - @alignment.setter - def alignment(self, value): - if value is None: - self._remove_jc() - return - jc = self.get_or_add_jc() - jc.val = value - - @property - def style(self): - """ - String contained in child, or None if that element is not - present. - """ - pStyle = self.pStyle - if pStyle is None: - return None - return pStyle.val - - @style.setter - def style(self, style): - """ - Set val attribute of child element to *style*, adding a - new element if necessary. If *style* is |None|, remove the - element if present. - """ - if style is None: - self._remove_pStyle() - return - pStyle = self.get_or_add_pStyle() - pStyle.val = style - - -class CT_R(BaseOxmlElement): - """ - ```` element, containing the properties and text for a run. - """ - rPr = ZeroOrOne('w:rPr') - t = ZeroOrMore('w:t') - br = ZeroOrMore('w:br') - cr = ZeroOrMore('w:cr') - tab = ZeroOrMore('w:tab') - drawing = ZeroOrMore('w:drawing') - - def _insert_rPr(self, rPr): - self.insert(0, rPr) - return rPr - - def add_t(self, text): - """ - Return a newly added ```` element containing *text*. - """ - t = self._add_t(text=text) - if len(text.strip()) < len(text): - t.set(qn('xml:space'), 'preserve') - return t - - def add_drawing(self, inline_or_anchor): - """ - Return a newly appended ``CT_Drawing`` (````) child - element having *inline_or_anchor* as its child. - """ - drawing = self._add_drawing() - drawing.append(inline_or_anchor) - return drawing - - def clear_content(self): - """ - Remove all child elements except the ```` element if present. - """ - content_child_elms = self[1:] if self.rPr is not None else self[:] - for child in content_child_elms: - self.remove(child) - - @property - def style(self): - """ - String contained in w:val attribute of grandchild, or - |None| if that element is not present. - """ - rPr = self.rPr - if rPr is None: - return None - return rPr.style - - @style.setter - def style(self, style): - """ - Set the character style of this element to *style*. If *style* - is None, remove the style element. - """ - rPr = self.get_or_add_rPr() - rPr.style = style - - @property - def text(self): - """ - A string representing the textual content of this run, with content - child elements like ```` translated to their Python - equivalent. - """ - text = '' - for child in self: - if child.tag == qn('w:t'): - t_text = child.text - text += t_text if t_text is not None else '' - elif child.tag == qn('w:tab'): - text += '\t' - elif child.tag in (qn('w:br'), qn('w:cr')): - text += '\n' - return text - - @text.setter - def text(self, text): - self.clear_content() - _RunContentAppender.append_to_run_from_text(self, text) - - @property - def underline(self): - """ - String contained in w:val attribute of ./w:rPr/w:u grandchild, or - |None| if not present. - """ - rPr = self.rPr - if rPr is None: - return None - return rPr.underline - - @underline.setter - def underline(self, value): - rPr = self.get_or_add_rPr() - rPr.underline = value - - -class CT_RPr(BaseOxmlElement): - """ - ```` element, containing the properties for a run. - """ - rStyle = ZeroOrOne('w:rStyle', successors=('w:rPrChange',)) - b = ZeroOrOne('w:b', successors=('w:rPrChange',)) - bCs = ZeroOrOne('w:bCs', successors=('w:rPrChange',)) - caps = ZeroOrOne('w:caps', successors=('w:rPrChange',)) - cs = ZeroOrOne('w:cs', successors=('w:rPrChange',)) - dstrike = ZeroOrOne('w:dstrike', successors=('w:rPrChange',)) - emboss = ZeroOrOne('w:emboss', successors=('w:rPrChange',)) - i = ZeroOrOne('w:i', successors=('w:rPrChange',)) - iCs = ZeroOrOne('w:iCs', successors=('w:rPrChange',)) - imprint = ZeroOrOne('w:imprint', successors=('w:rPrChange',)) - noProof = ZeroOrOne('w:noProof', successors=('w:rPrChange',)) - oMath = ZeroOrOne('w:oMath', successors=('w:rPrChange',)) - outline = ZeroOrOne('w:outline', successors=('w:rPrChange',)) - rtl = ZeroOrOne('w:rtl', successors=('w:rPrChange',)) - shadow = ZeroOrOne('w:shadow', successors=('w:rPrChange',)) - smallCaps = ZeroOrOne('w:smallCaps', successors=('w:rPrChange',)) - snapToGrid = ZeroOrOne('w:snapToGrid', successors=('w:rPrChange',)) - specVanish = ZeroOrOne('w:specVanish', successors=('w:rPrChange',)) - strike = ZeroOrOne('w:strike', successors=('w:rPrChange',)) - u = ZeroOrOne('w:u', successors=('w:rPrChange',)) - vanish = ZeroOrOne('w:vanish', successors=('w:rPrChange',)) - webHidden = ZeroOrOne('w:webHidden', successors=('w:rPrChange',)) - - @property - def style(self): - """ - String contained in child, or None if that element is not - present. - """ - rStyle = self.rStyle - if rStyle is None: - return None - return rStyle.val - - @style.setter - def style(self, style): - """ - Set val attribute of child element to *style*, adding a - new element if necessary. If *style* is |None|, remove the - element if present. - """ - if style is None: - self._remove_rStyle() - elif self.rStyle is None: - self._add_rStyle(val=style) - else: - self.rStyle.val = style - - @property - def underline(self): - """ - Underline type specified in child, or None if that element is - not present. - """ - u = self.u - if u is None: - return None - return u.val - - @underline.setter - def underline(self, value): - self._remove_u() - if value is not None: - u = self._add_u() - u.val = value - - -class CT_Text(BaseOxmlElement): - """ - ```` element, containing a sequence of characters within a run. - """ - - -class CT_Underline(BaseOxmlElement): - """ - ```` element, specifying the underlining style for a run. - """ - @property - def val(self): - """ - The underline type corresponding to the ``w:val`` attribute value. - """ - val = self.get(qn('w:val')) - underline = WD_UNDERLINE.from_xml(val) - if underline == WD_UNDERLINE.SINGLE: - return True - if underline == WD_UNDERLINE.NONE: - return False - return underline - - @val.setter - def val(self, value): - # works fine without these two mappings, but only because True == 1 - # and False == 0, which happen to match the mapping for WD_UNDERLINE - # .SINGLE and .NONE respectively. - if value is True: - value = WD_UNDERLINE.SINGLE - elif value is False: - value = WD_UNDERLINE.NONE - - val = WD_UNDERLINE.to_xml(value) - self.set(qn('w:val'), val) - - -class _RunContentAppender(object): - """ - Service object that knows how to translate a Python string into run - content elements appended to a specified ```` element. Contiguous - sequences of regular characters are appended in a single ```` - element. Each tab character ('\t') causes a ```` element to be - appended. Likewise a newline or carriage return character ('\n', '\r') - causes a ```` element to be appended. - """ - def __init__(self, r): - self._r = r - self._bfr = [] - - @classmethod - def append_to_run_from_text(cls, r, text): - """ - Create a "one-shot" ``_RunContentAppender`` instance and use it to - append the run content elements corresponding to *text* to the - ```` element *r*. - """ - appender = cls(r) - appender.add_text(text) - - def add_text(self, text): - """ - Append the run content elements corresponding to *text* to the - ```` element of this instance. - """ - for char in text: - self.add_char(char) - self.flush() - - def add_char(self, char): - """ - Process the next character of input through the translation finite - state maching (FSM). There are two possible states, buffer pending - and not pending, but those are hidden behind the ``.flush()`` method - which must be called at the end of text to ensure any pending - ```` element is written. - """ - if char == '\t': - self.flush() - self._r.add_tab() - elif char in '\r\n': - self.flush() - self._r.add_br() - else: - self._bfr.append(char) - - def flush(self): - text = ''.join(self._bfr) - if text: - self._r.add_t(text) - del self._bfr[:] diff --git a/docx/oxml/xmlchemy.py b/docx/oxml/xmlchemy.py deleted file mode 100644 index 40df33494..000000000 --- a/docx/oxml/xmlchemy.py +++ /dev/null @@ -1,761 +0,0 @@ -# encoding: utf-8 - -""" -Provides a wrapper around lxml that enables declarative definition of custom -element classes. -""" - -from __future__ import absolute_import - -from lxml import etree - -import re - -from . import OxmlElement -from ..compat import Unicode -from .exceptions import InvalidXmlError -from .ns import NamespacePrefixedTag, nsmap, qn -from ..shared import lazyproperty - - -def serialize_for_reading(element): - """ - Serialize *element* to human-readable XML suitable for tests. No XML - declaration. - """ - xml = etree.tostring(element, encoding='unicode', pretty_print=True) - return XmlString(xml) - - -class XmlString(Unicode): - """ - Provides string comparison override suitable for serialized XML that is - useful for tests. - """ - - # ' text' - # | | || | - # +----------+------------------------------------------++-----------+ - # front attrs | text - # close - - _xml_elm_line_patt = re.compile( - '( *)([^<]*)?$' - ) - - def __eq__(self, other): - lines = self.splitlines() - lines_other = other.splitlines() - if len(lines) != len(lines_other): - return False - for line, line_other in zip(lines, lines_other): - if not self._eq_elm_strs(line, line_other): - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def _attr_seq(self, attrs): - """ - Return a sequence of attribute strings parsed from *attrs*. Each - attribute string is stripped of whitespace on both ends. - """ - attrs = attrs.strip() - attr_lst = attrs.split() - return sorted(attr_lst) - - def _eq_elm_strs(self, line, line_2): - """ - Return True if the element in *line_2* is XML equivalent to the - element in *line*. - """ - front, attrs, close, text = self._parse_line(line) - front_2, attrs_2, close_2, text_2 = self._parse_line(line_2) - if front != front_2: - return False - if self._attr_seq(attrs) != self._attr_seq(attrs_2): - return False - if close != close_2: - return False - if text != text_2: - return False - return True - - @classmethod - def _parse_line(cls, line): - """ - Return front, attrs, close, text 4-tuple result of parsing XML element - string *line*. - """ - match = cls._xml_elm_line_patt.match(line) - front, attrs, close, text = [match.group(n) for n in range(1, 5)] - return front, attrs, close, text - - -class MetaOxmlElement(type): - """ - Metaclass for BaseOxmlElement - """ - def __init__(cls, clsname, bases, clsdict): - dispatchable = ( - OneAndOnlyOne, OneOrMore, OptionalAttribute, RequiredAttribute, - ZeroOrMore, ZeroOrOne, ZeroOrOneChoice - ) - for key, value in clsdict.items(): - if isinstance(value, dispatchable): - value.populate_class_members(cls, key) - - -class BaseAttribute(object): - """ - Base class for OptionalAttribute and RequiredAttribute, providing common - methods. - """ - def __init__(self, attr_name, simple_type): - super(BaseAttribute, self).__init__() - self._attr_name = attr_name - self._simple_type = simple_type - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - self._element_cls = element_cls - self._prop_name = prop_name - - self._add_attr_property() - - def _add_attr_property(self): - """ - Add a read/write ``{prop_name}`` property to the element class that - returns the interpreted value of this attribute on access and changes - the attribute value to its ST_* counterpart on assignment. - """ - property_ = property(self._getter, self._setter, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - @property - def _clark_name(self): - if ':' in self._attr_name: - return qn(self._attr_name) - return self._attr_name - - -class OptionalAttribute(BaseAttribute): - """ - Defines an optional attribute on a custom element class. An optional - attribute returns a default value when not present for reading. When - assigned |None|, the attribute is removed. - """ - def __init__(self, attr_name, simple_type, default=None): - super(OptionalAttribute, self).__init__(attr_name, simple_type) - self._default = default - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the attribute - property descriptor. - """ - def get_attr_value(obj): - attr_str_value = obj.get(self._clark_name) - if attr_str_value is None: - return self._default - return self._simple_type.from_xml(attr_str_value) - get_attr_value.__doc__ = self._docstring - return get_attr_value - - @property - def _docstring(self): - """ - Return the string to use as the ``__doc__`` attribute of the property - for this attribute. - """ - return ( - '%s type-converted value of ``%s`` attribute, or |None| (or spec' - 'ified default value) if not present. Assigning the default valu' - 'e causes the attribute to be removed from the element.' % - (self._simple_type.__name__, self._attr_name) - ) - - @property - def _setter(self): - """ - Return a function object suitable for the "set" side of the attribute - property descriptor. - """ - def set_attr_value(obj, value): - if value is None or value == self._default: - if self._clark_name in obj.attrib: - del obj.attrib[self._clark_name] - return - str_value = self._simple_type.to_xml(value) - obj.set(self._clark_name, str_value) - return set_attr_value - - -class RequiredAttribute(BaseAttribute): - """ - Defines a required attribute on a custom element class. A required - attribute is assumed to be present for reading, so does not have - a default value; its actual value is always used. If missing on read, - an |InvalidXmlError| is raised. It also does not remove the attribute if - |None| is assigned. Assigning |None| raises |TypeError| or |ValueError|, - depending on the simple type of the attribute. - """ - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the attribute - property descriptor. - """ - def get_attr_value(obj): - attr_str_value = obj.get(self._clark_name) - if attr_str_value is None: - raise InvalidXmlError( - "required '%s' attribute not present on element %s" % - (self._attr_name, obj.tag) - ) - return self._simple_type.from_xml(attr_str_value) - get_attr_value.__doc__ = self._docstring - return get_attr_value - - @property - def _docstring(self): - """ - Return the string to use as the ``__doc__`` attribute of the property - for this attribute. - """ - return ( - '%s type-converted value of ``%s`` attribute.' % - (self._simple_type.__name__, self._attr_name) - ) - - @property - def _setter(self): - """ - Return a function object suitable for the "set" side of the attribute - property descriptor. - """ - def set_attr_value(obj, value): - str_value = self._simple_type.to_xml(value) - obj.set(self._clark_name, str_value) - return set_attr_value - - -class _BaseChildElement(object): - """ - Base class for the child element classes corresponding to varying - cardinalities, such as ZeroOrOne and ZeroOrMore. - """ - def __init__(self, nsptagname, successors=()): - super(_BaseChildElement, self).__init__() - self._nsptagname = nsptagname - self._successors = successors - - def populate_class_members(self, element_cls, prop_name): - """ - Baseline behavior for adding the appropriate methods to - *element_cls*. - """ - self._element_cls = element_cls - self._prop_name = prop_name - - def _add_adder(self): - """ - Add an ``_add_x()`` method to the element class for this child - element. - """ - def _add_child(obj, **attrs): - new_method = getattr(obj, self._new_method_name) - child = new_method() - for key, value in attrs.items(): - setattr(child, key, value) - insert_method = getattr(obj, self._insert_method_name) - insert_method(child) - return child - - _add_child.__doc__ = ( - 'Add a new ``<%s>`` child element unconditionally, inserted in t' - 'he correct sequence.' % self._nsptagname - ) - self._add_to_class(self._add_method_name, _add_child) - - def _add_creator(self): - """ - Add a ``_new_{prop_name}()`` method to the element class that creates - a new, empty element of the correct type, having no attributes. - """ - creator = self._creator - creator.__doc__ = ( - 'Return a "loose", newly created ``<%s>`` element having no attri' - 'butes, text, or children.' % self._nsptagname - ) - self._add_to_class(self._new_method_name, creator) - - def _add_getter(self): - """ - Add a read-only ``{prop_name}`` property to the element class for - this child element. - """ - property_ = property(self._getter, None, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - def _add_inserter(self): - """ - Add an ``_insert_x()`` method to the element class for this child - element. - """ - def _insert_child(obj, child): - obj.insert_element_before(child, *self._successors) - return child - - _insert_child.__doc__ = ( - 'Return the passed ``<%s>`` element after inserting it as a chil' - 'd in the correct sequence.' % self._nsptagname - ) - self._add_to_class(self._insert_method_name, _insert_child) - - def _add_list_getter(self): - """ - Add a read-only ``{prop_name}_lst`` property to the element class to - retrieve a list of child elements matching this type. - """ - prop_name = '%s_lst' % self._prop_name - property_ = property(self._list_getter, None, None) - setattr(self._element_cls, prop_name, property_) - - @lazyproperty - def _add_method_name(self): - return '_add_%s' % self._prop_name - - def _add_public_adder(self): - """ - Add a public ``add_x()`` method to the parent element class. - """ - def add_child(obj): - private_add_method = getattr(obj, self._add_method_name) - child = private_add_method() - return child - - add_child.__doc__ = ( - 'Add a new ``<%s>`` child element unconditionally, inserted in t' - 'he correct sequence.' % self._nsptagname - ) - self._add_to_class(self._public_add_method_name, add_child) - - def _add_to_class(self, name, method): - """ - Add *method* to the target class as *name*, unless *name* is already - defined on the class. - """ - if hasattr(self._element_cls, name): - return - setattr(self._element_cls, name, method) - - @property - def _creator(self): - """ - Return a function object that creates a new, empty element of the - right type, having no attributes. - """ - def new_child_element(obj): - return OxmlElement(self._nsptagname) - return new_child_element - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. This default getter returns the child element with - matching tag name or |None| if not present. - """ - def get_child_element(obj): - return obj.find(qn(self._nsptagname)) - get_child_element.__doc__ = ( - '``<%s>`` child element or |None| if not present.' - % self._nsptagname - ) - return get_child_element - - @lazyproperty - def _insert_method_name(self): - return '_insert_%s' % self._prop_name - - @property - def _list_getter(self): - """ - Return a function object suitable for the "get" side of a list - property descriptor. - """ - def get_child_element_list(obj): - return obj.findall(qn(self._nsptagname)) - get_child_element_list.__doc__ = ( - 'A list containing each of the ``<%s>`` child elements, in the o' - 'rder they appear.' % self._nsptagname - ) - return get_child_element_list - - @lazyproperty - def _public_add_method_name(self): - """ - add_childElement() is public API for a repeating element, allowing - new elements to be added to the sequence. May be overridden to - provide a friendlier API to clients having domain appropriate - parameter names for required attributes. - """ - return 'add_%s' % self._prop_name - - @lazyproperty - def _remove_method_name(self): - return '_remove_%s' % self._prop_name - - @lazyproperty - def _new_method_name(self): - return '_new_%s' % self._prop_name - - -class Choice(_BaseChildElement): - """ - Defines a child element belonging to a group, only one of which may - appear as a child. - """ - @property - def nsptagname(self): - return self._nsptagname - - def populate_class_members( - self, element_cls, group_prop_name, successors): - """ - Add the appropriate methods to *element_cls*. - """ - self._element_cls = element_cls - self._group_prop_name = group_prop_name - self._successors = successors - - self._add_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_get_or_change_to_method() - - def _add_get_or_change_to_method(self): - """ - Add a ``get_or_change_to_x()`` method to the element class for this - child element. - """ - def get_or_change_to_child(obj): - child = getattr(obj, self._prop_name) - if child is not None: - return child - remove_group_method = getattr( - obj, self._remove_group_method_name - ) - remove_group_method() - add_method = getattr(obj, self._add_method_name) - child = add_method() - return child - - get_or_change_to_child.__doc__ = ( - 'Return the ``<%s>`` child, replacing any other group element if' - ' found.' - ) % self._nsptagname - self._add_to_class( - self._get_or_change_to_method_name, get_or_change_to_child - ) - - @property - def _prop_name(self): - """ - Calculate property name from tag name, e.g. a:schemeClr -> schemeClr. - """ - if ':' in self._nsptagname: - start = self._nsptagname.index(':')+1 - else: - start = 0 - return self._nsptagname[start:] - - @lazyproperty - def _get_or_change_to_method_name(self): - return 'get_or_change_to_%s' % self._prop_name - - @lazyproperty - def _remove_group_method_name(self): - return '_remove_%s' % self._group_prop_name - - -class OneAndOnlyOne(_BaseChildElement): - """ - Defines a required child element for MetaOxmlElement. - """ - def __init__(self, nsptagname): - super(OneAndOnlyOne, self).__init__(nsptagname, None) - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(OneAndOnlyOne, self).populate_class_members( - element_cls, prop_name - ) - self._add_getter() - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. - """ - def get_child_element(obj): - child = obj.find(qn(self._nsptagname)) - if child is None: - raise InvalidXmlError( - "required ``<%s>`` child element not present" % - self._nsptagname - ) - return child - - get_child_element.__doc__ = ( - 'Required ``<%s>`` child element.' - % self._nsptagname - ) - return get_child_element - - -class OneOrMore(_BaseChildElement): - """ - Defines a repeating child element for MetaOxmlElement that must appear at - least once. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(OneOrMore, self).populate_class_members( - element_cls, prop_name - ) - self._add_list_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_public_adder() - delattr(element_cls, prop_name) - - -class ZeroOrMore(_BaseChildElement): - """ - Defines an optional repeating child element for MetaOxmlElement. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrMore, self).populate_class_members( - element_cls, prop_name - ) - self._add_list_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_public_adder() - delattr(element_cls, prop_name) - - -class ZeroOrOne(_BaseChildElement): - """ - Defines an optional child element for MetaOxmlElement. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrOne, self).populate_class_members(element_cls, prop_name) - self._add_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_get_or_adder() - self._add_remover() - - def _add_get_or_adder(self): - """ - Add a ``get_or_add_x()`` method to the element class for this - child element. - """ - def get_or_add_child(obj): - child = getattr(obj, self._prop_name) - if child is None: - add_method = getattr(obj, self._add_method_name) - child = add_method() - return child - get_or_add_child.__doc__ = ( - 'Return the ``<%s>`` child element, newly added if not present.' - ) % self._nsptagname - self._add_to_class(self._get_or_add_method_name, get_or_add_child) - - def _add_remover(self): - """ - Add a ``_remove_x()`` method to the element class for this child - element. - """ - def _remove_child(obj): - obj.remove_all(self._nsptagname) - _remove_child.__doc__ = ( - 'Remove all ``<%s>`` child elements.' - ) % self._nsptagname - self._add_to_class(self._remove_method_name, _remove_child) - - @lazyproperty - def _get_or_add_method_name(self): - return 'get_or_add_%s' % self._prop_name - - -class ZeroOrOneChoice(_BaseChildElement): - """ - Correspondes to an ``EG_*`` element group where at most one of its - members may appear as a child. - """ - def __init__(self, choices, successors=()): - self._choices = choices - self._successors = successors - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrOneChoice, self).populate_class_members( - element_cls, prop_name - ) - self._add_choice_getter() - for choice in self._choices: - choice.populate_class_members( - element_cls, self._prop_name, self._successors - ) - self._add_group_remover() - - def _add_choice_getter(self): - """ - Add a read-only ``{prop_name}`` property to the element class that - returns the present member of this group, or |None| if none are - present. - """ - property_ = property(self._choice_getter, None, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - def _add_group_remover(self): - """ - Add a ``_remove_eg_x()`` method to the element class for this choice - group. - """ - def _remove_choice_group(obj): - for tagname in self._member_nsptagnames: - obj.remove_all(tagname) - - _remove_choice_group.__doc__ = ( - 'Remove the current choice group child element if present.' - ) - self._add_to_class( - self._remove_choice_group_method_name, _remove_choice_group - ) - - @property - def _choice_getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. - """ - def get_group_member_element(obj): - return obj.first_child_found_in(*self._member_nsptagnames) - get_group_member_element.__doc__ = ( - 'Return the child element belonging to this element group, or ' - '|None| if no member child is present.' - ) - return get_group_member_element - - @lazyproperty - def _member_nsptagnames(self): - """ - Sequence of namespace-prefixed tagnames, one for each of the member - elements of this choice group. - """ - return [choice.nsptagname for choice in self._choices] - - @lazyproperty - def _remove_choice_group_method_name(self): - return '_remove_%s' % self._prop_name - - -class _OxmlElementBase(etree.ElementBase): - """ - Effective base class for all custom element classes, to add standardized - behavior to all classes in one place. Actual inheritance is from - BaseOxmlElement below, needed to manage Python 2-3 metaclass declaration - compatibility. - """ - - __metaclass__ = MetaOxmlElement - - def __repr__(self): - return "<%s '<%s>' at 0x%0x>" % ( - self.__class__.__name__, self._nsptag, id(self) - ) - - def first_child_found_in(self, *tagnames): - """ - Return the first child found with tag in *tagnames*, or None if - not found. - """ - for tagname in tagnames: - child = self.find(qn(tagname)) - if child is not None: - return child - return None - - def insert_element_before(self, elm, *tagnames): - successor = self.first_child_found_in(*tagnames) - if successor is not None: - successor.addprevious(elm) - else: - self.append(elm) - return elm - - def remove_all(self, *tagnames): - """ - Remove all child elements whose tagname (e.g. 'a:p') appears in - *tagnames*. - """ - for tagname in tagnames: - matching = self.findall(qn(tagname)) - for child in matching: - self.remove(child) - - @property - def xml(self): - """ - Return XML string for this element, suitable for testing purposes. - Pretty printed for readability and without an XML declaration at the - top. - """ - return serialize_for_reading(self) - - def xpath(self, xpath_str): - """ - Override of ``lxml`` _Element.xpath() method to provide standard Open - XML namespace mapping (``nsmap``) in centralized location. - """ - return super(BaseOxmlElement, self).xpath( - xpath_str, namespaces=nsmap - ) - - @property - def _nsptag(self): - return NamespacePrefixedTag.from_clark_name(self.tag) - - -BaseOxmlElement = MetaOxmlElement( - 'BaseOxmlElement', (etree.ElementBase,), dict(_OxmlElementBase.__dict__) -) diff --git a/docx/package.py b/docx/package.py deleted file mode 100644 index 4c9a6f6a1..000000000 --- a/docx/package.py +++ /dev/null @@ -1,115 +0,0 @@ -# encoding: utf-8 - -""" -WordprocessingML Package class and related objects -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from docx.image.image import Image -from docx.opc.constants import RELATIONSHIP_TYPE as RT -from docx.opc.package import OpcPackage -from docx.opc.packuri import PackURI -from docx.parts.image import ImagePart -from docx.shared import lazyproperty - - -class Package(OpcPackage): - """ - Customizations specific to a WordprocessingML package. - """ - def after_unmarshal(self): - """ - Called by loading code after all parts and relationships have been - loaded, to afford the opportunity for any required post-processing. - """ - self._gather_image_parts() - - @lazyproperty - def image_parts(self): - """ - Collection of all image parts in this package. - """ - return ImageParts() - - def _gather_image_parts(self): - """ - Load the image part collection with all the image parts in package. - """ - for rel in self.iter_rels(): - if rel.is_external: - continue - if rel.reltype != RT.IMAGE: - continue - if rel.target_part in self.image_parts: - continue - self.image_parts.append(rel.target_part) - - -class ImageParts(object): - """ - Collection of |ImagePart| instances corresponding to each image part in - the package. - """ - def __init__(self): - super(ImageParts, self).__init__() - self._image_parts = [] - - def __contains__(self, item): - return self._image_parts.__contains__(item) - - def __iter__(self): - return self._image_parts.__iter__() - - def __len__(self): - return self._image_parts.__len__() - - def append(self, item): - self._image_parts.append(item) - - def get_or_add_image_part(self, image_descriptor): - """ - Return an |ImagePart| instance containing the image identified by - *image_descriptor*, newly created if a matching one is not present in - the collection. - """ - image = Image.from_file(image_descriptor) - matching_image_part = self._get_by_sha1(image.sha1) - if matching_image_part is not None: - return matching_image_part - return self._add_image_part(image) - - def _add_image_part(self, image): - """ - Return an |ImagePart| instance newly created from image and appended - to the collection. - """ - partname = self._next_image_partname(image.ext) - image_part = ImagePart.from_image(image, partname) - self.append(image_part) - return image_part - - def _get_by_sha1(self, sha1): - """ - Return the image part in this collection having a SHA1 hash matching - *sha1*, or |None| if not found. - """ - for image_part in self._image_parts: - if image_part.sha1 == sha1: - return image_part - return None - - def _next_image_partname(self, ext): - """ - The next available image partname, starting from - ``/word/media/image1.{ext}`` where unused numbers are reused. The - partname is unique by number, without regard to the extension. *ext* - does not include the leading period. - """ - def image_partname(n): - return PackURI('/word/media/image%d.%s' % (n, ext)) - used_numbers = [image_part.partname.idx for image_part in self] - for n in range(1, len(self)+1): - if n not in used_numbers: - return image_partname(n) - return image_partname(len(self)+1) diff --git a/docx/parts/document.py b/docx/parts/document.py deleted file mode 100644 index e7ff08e8b..000000000 --- a/docx/parts/document.py +++ /dev/null @@ -1,201 +0,0 @@ -# encoding: utf-8 - -""" -|DocumentPart| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from collections import Sequence - -from ..blkcntnr import BlockItemContainer -from ..enum.section import WD_SECTION -from ..opc.constants import RELATIONSHIP_TYPE as RT -from ..opc.package import XmlPart -from ..section import Section -from ..shape import InlineShape -from ..shared import lazyproperty, Parented - - -class DocumentPart(XmlPart): - """ - Main document part of a WordprocessingML (WML) package, aka a .docx file. - """ - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of body content. - """ - return self.body.add_paragraph(text, style) - - def add_section(self, start_type=WD_SECTION.NEW_PAGE): - """ - Return a |Section| object representing a new section added at the end - of the document. - """ - new_sectPr = self._element.body.add_section_break() - new_sectPr.start_type = start_type - return Section(new_sectPr) - - def add_table(self, rows, cols): - """ - Return a table having *rows* rows and *cols* columns, newly appended - to the main document story. - """ - return self.body.add_table(rows, cols) - - @lazyproperty - def body(self): - """ - The |_Body| instance containing the content for this document. - """ - return _Body(self._element.body, self) - - def get_or_add_image_part(self, image_descriptor): - """ - Return an ``(image_part, rId)`` 2-tuple for the image identified by - *image_descriptor*. *image_part* is an |Image| instance corresponding - to the image, newly created if no matching image part is found. *rId* - is the key for the relationship between this document part and the - image part, reused if already present, newly created if not. - """ - image_parts = self._package.image_parts - image_part = image_parts.get_or_add_image_part(image_descriptor) - rId = self.relate_to(image_part, RT.IMAGE) - return (image_part, rId) - - @lazyproperty - def inline_shapes(self): - """ - The |InlineShapes| instance containing the inline shapes in the - document. - """ - return InlineShapes(self._element.body, self) - - @property - def next_id(self): - """ - The next available positive integer id value in this document. Gaps - in id sequence are filled. The id attribute value is unique in the - document, without regard to the element type it appears on. - """ - id_str_lst = self._element.xpath('//@id') - used_ids = [int(id_str) for id_str in id_str_lst if id_str.isdigit()] - for n in range(1, len(used_ids)+2): - if n not in used_ids: - return n - - @property - def paragraphs(self): - """ - A list of |Paragraph| instances corresponding to the paragraphs in - the document, in document order. Note that paragraphs within revision - marks such as inserted or deleted do not appear in this list. - """ - return self.body.paragraphs - - @lazyproperty - def sections(self): - """ - The |Sections| instance organizing the sections in this document. - """ - return Sections(self._element) - - @property - def tables(self): - """ - A list of |Table| instances corresponding to the tables in the - document, in document order. Note that tables within revision marks - such as ```` or ```` do not appear in this list. - """ - return self.body.tables - - -class _Body(BlockItemContainer): - """ - Proxy for ```` element in this document, having primarily a - container role. - """ - def __init__(self, body_elm, parent): - super(_Body, self).__init__(body_elm, parent) - self._body = body_elm - - def clear_content(self): - """ - Return this |_Body| instance after clearing it of all content. - Section properties for the main document story, if present, are - preserved. - """ - self._body.clear_content() - return self - - -class InlineShapes(Parented): - """ - Sequence of |InlineShape| instances, supporting len(), iteration, and - indexed access. - """ - def __init__(self, body_elm, parent): - super(InlineShapes, self).__init__(parent) - self._body = body_elm - - def __getitem__(self, idx): - """ - Provide indexed access, e.g. 'inline_shapes[idx]' - """ - try: - inline = self._inline_lst[idx] - except IndexError: - msg = "inline shape index [%d] out of range" % idx - raise IndexError(msg) - return InlineShape(inline) - - def __iter__(self): - return (InlineShape(inline) for inline in self._inline_lst) - - def __len__(self): - return len(self._inline_lst) - - def add_picture(self, image_descriptor, run): - """ - Return an |InlineShape| instance containing the picture identified by - *image_descriptor* and added to the end of *run*. The picture shape - has the native size of the image. *image_descriptor* can be a path (a - string) or a file-like object containing a binary image. - """ - image_part, rId = self.part.get_or_add_image_part(image_descriptor) - shape_id = self.part.next_id - r = run._r - picture = InlineShape.new_picture(r, image_part, rId, shape_id) - return picture - - @property - def _inline_lst(self): - body = self._body - xpath = '//w:p/w:r/w:drawing/wp:inline' - return body.xpath(xpath) - - -class Sections(Sequence): - """ - Sequence of |Section| objects corresponding to the sections in the - document. Supports ``len()``, iteration, and indexed access. - """ - def __init__(self, document_elm): - super(Sections, self).__init__() - self._document_elm = document_elm - - def __getitem__(self, key): - if isinstance(key, slice): - sectPr_lst = self._document_elm.sectPr_lst[key] - return [Section(sectPr) for sectPr in sectPr_lst] - sectPr = self._document_elm.sectPr_lst[key] - return Section(sectPr) - - def __iter__(self): - for sectPr in self._document_elm.sectPr_lst: - yield Section(sectPr) - - def __len__(self): - return len(self._document_elm.sectPr_lst) diff --git a/docx/parts/image.py b/docx/parts/image.py deleted file mode 100644 index 9cc698697..000000000 --- a/docx/parts/image.py +++ /dev/null @@ -1,89 +0,0 @@ -# encoding: utf-8 - -""" -The proxy class for an image part, and related objects. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import hashlib - -from docx.image.image import Image -from docx.opc.package import Part -from docx.shared import Emu, Inches - - -class ImagePart(Part): - """ - An image part. Corresponds to the target part of a relationship with type - RELATIONSHIP_TYPE.IMAGE. - """ - def __init__(self, partname, content_type, blob, image=None): - super(ImagePart, self).__init__(partname, content_type, blob) - self._image = image - - @property - def default_cx(self): - """ - Native width of this image, calculated from its width in pixels and - horizontal dots per inch (dpi). - """ - px_width = self.image.px_width - horz_dpi = self.image.horz_dpi - width_in_inches = px_width / horz_dpi - return Inches(width_in_inches) - - @property - def default_cy(self): - """ - Native height of this image, calculated from its height in pixels and - vertical dots per inch (dpi). - """ - px_height = self.image.px_height - horz_dpi = self.image.horz_dpi - height_in_emu = 914400 * px_height / horz_dpi - return Emu(height_in_emu) - - @property - def filename(self): - """ - Filename from which this image part was originally created. A generic - name, e.g. 'image.png', is substituted if no name is available, for - example when the image was loaded from an unnamed stream. In that - case a default extension is applied based on the detected MIME type - of the image. - """ - if self._image is not None: - return self._image.filename - return 'image.%s' % self.partname.ext - - @classmethod - def from_image(cls, image, partname): - """ - Return an |ImagePart| instance newly created from *image* and - assigned *partname*. - """ - return ImagePart(partname, image.content_type, image.blob, image) - - @property - def image(self): - if self._image is None: - self._image = Image.from_blob(self.blob) - return self._image - - @classmethod - def load(cls, partname, content_type, blob, package): - """ - Called by ``docx.opc.package.PartFactory`` to load an image part from - a package being opened by ``Document(...)`` call. - """ - return cls(partname, content_type, blob) - - @property - def sha1(self): - """ - SHA1 hash digest of the blob of this image part. - """ - return hashlib.sha1(self._blob).hexdigest() diff --git a/docx/parts/numbering.py b/docx/parts/numbering.py deleted file mode 100644 index e9c8f713d..000000000 --- a/docx/parts/numbering.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -""" -|NumberingPart| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..opc.package import XmlPart -from ..shared import lazyproperty - - -class NumberingPart(XmlPart): - """ - Proxy for the numbering.xml part containing numbering definitions for - a document or glossary. - """ - @classmethod - def new(cls): - """ - Return newly created empty numbering part, containing only the root - ```` element. - """ - raise NotImplementedError - - @lazyproperty - def numbering_definitions(self): - """ - The |_NumberingDefinitions| instance containing the numbering - definitions ( element proxies) for this numbering part. - """ - return _NumberingDefinitions(self._element) - - -class _NumberingDefinitions(object): - """ - Collection of |_NumberingDefinition| instances corresponding to the - ```` elements in a numbering part. - """ - def __init__(self, numbering_elm): - super(_NumberingDefinitions, self).__init__() - self._numbering = numbering_elm - - def __len__(self): - return len(self._numbering.num_lst) diff --git a/docx/parts/styles.py b/docx/parts/styles.py deleted file mode 100644 index d9f4cfda9..000000000 --- a/docx/parts/styles.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -""" -Provides StylesPart and related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..opc.package import XmlPart -from ..shared import lazyproperty - - -class StylesPart(XmlPart): - """ - Proxy for the styles.xml part containing style definitions for a document - or glossary. - """ - @classmethod - def new(cls): - """ - Return newly created empty styles part, containing only the root - ```` element. - """ - raise NotImplementedError - - @lazyproperty - def styles(self): - """ - The |_Styles| instance containing the styles ( element - proxies) for this styles part. - """ - return _Styles(self._element) - - -class _Styles(object): - """ - Collection of |_Style| instances corresponding to the ```` - elements in a styles part. - """ - def __init__(self, styles_elm): - super(_Styles, self).__init__() - self._styles_elm = styles_elm - - def __len__(self): - return len(self._styles_elm.style_lst) diff --git a/docx/section.py b/docx/section.py deleted file mode 100644 index 0bdcd17dd..000000000 --- a/docx/section.py +++ /dev/null @@ -1,159 +0,0 @@ -# encoding: utf-8 - -""" -The |Section| object and related proxy classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class Section(object): - """ - Document section, providing access to section and page setup settings. - """ - def __init__(self, sectPr): - super(Section, self).__init__() - self._sectPr = sectPr - - @property - def bottom_margin(self): - """ - |Length| object representing the bottom margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.bottom_margin - - @bottom_margin.setter - def bottom_margin(self, value): - self._sectPr.bottom_margin = value - - @property - def footer_distance(self): - """ - |Length| object representing the distance from the bottom edge of the - page to the bottom edge of the footer. |None| if no setting is present - in the XML. - """ - return self._sectPr.footer - - @footer_distance.setter - def footer_distance(self, value): - self._sectPr.footer = value - - @property - def gutter(self): - """ - |Length| object representing the page gutter size in English Metric - Units for all pages in this section. The page gutter is extra spacing - added to the *inner* margin to ensure even margins after page - binding. - """ - return self._sectPr.gutter - - @gutter.setter - def gutter(self, value): - self._sectPr.gutter = value - - @property - def header_distance(self): - """ - |Length| object representing the distance from the top edge of the - page to the top edge of the header. |None| if no setting is present - in the XML. - """ - return self._sectPr.header - - @header_distance.setter - def header_distance(self, value): - self._sectPr.header = value - - @property - def left_margin(self): - """ - |Length| object representing the left margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.left_margin - - @left_margin.setter - def left_margin(self, value): - self._sectPr.left_margin = value - - @property - def orientation(self): - """ - Member of the :ref:`WdOrientation` enumeration specifying the page - orientation for this section, one of ``WD_ORIENT.PORTRAIT`` or - ``WD_ORIENT.LANDSCAPE``. - """ - return self._sectPr.orientation - - @orientation.setter - def orientation(self, value): - self._sectPr.orientation = value - - @property - def page_height(self): - """ - Total page height used for this section, inclusive of all edge spacing - values such as margins. Page orientation is taken into account, so - for example, its expected value would be ``Inches(8.5)`` for - letter-sized paper when orientation is landscape. - """ - return self._sectPr.page_height - - @page_height.setter - def page_height(self, value): - self._sectPr.page_height = value - - @property - def page_width(self): - """ - Total page width used for this section, inclusive of all edge spacing - values such as margins. Page orientation is taken into account, so - for example, its expected value would be ``Inches(11)`` for - letter-sized paper when orientation is landscape. - """ - return self._sectPr.page_width - - @page_width.setter - def page_width(self, value): - self._sectPr.page_width = value - - @property - def right_margin(self): - """ - |Length| object representing the right margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.right_margin - - @right_margin.setter - def right_margin(self, value): - self._sectPr.right_margin = value - - @property - def start_type(self): - """ - The member of the :ref:`WdSectionStart` enumeration corresponding to - the initial break behavior of this section, e.g. - ``WD_SECTION.ODD_PAGE`` if the section should begin on the next odd - page. - """ - return self._sectPr.start_type - - @start_type.setter - def start_type(self, value): - self._sectPr.start_type = value - - @property - def top_margin(self): - """ - |Length| object representing the top margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.top_margin - - @top_margin.setter - def top_margin(self, value): - self._sectPr.top_margin = value diff --git a/docx/shape.py b/docx/shape.py deleted file mode 100644 index c1fe9742a..000000000 --- a/docx/shape.py +++ /dev/null @@ -1,88 +0,0 @@ -# encoding: utf-8 - -""" -Objects related to shapes, visual objects that appear on the drawing layer of -a document. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .enum.shape import WD_INLINE_SHAPE -from .oxml.shape import CT_Inline, CT_Picture -from .oxml.ns import nsmap - - -class InlineShape(object): - """ - Proxy for an ```` element, representing the container for an - inline graphical object. - """ - def __init__(self, inline): - super(InlineShape, self).__init__() - self._inline = inline - - @property - def height(self): - """ - Read/write. The display height of this inline shape as an |Emu| - instance. - """ - return self._inline.extent.cy - - @height.setter - def height(self, cy): - assert isinstance(cy, int) - assert 0 < cy - self._inline.extent.cy = cy - - @classmethod - def new_picture(cls, r, image_part, rId, shape_id): - """ - Return a new |InlineShape| instance containing an inline picture - placement of *image_part* appended to run *r* and uniquely identified - by *shape_id*. - """ - cx, cy, filename = ( - image_part.default_cx, image_part.default_cy, image_part.filename - ) - pic_id = 0 - pic = CT_Picture.new(pic_id, filename, rId, cx, cy) - inline = CT_Inline.new(cx, cy, shape_id, pic) - r.add_drawing(inline) - return cls(inline) - - @property - def type(self): - """ - The type of this inline shape as a member of - ``docx.enum.shape.WD_INLINE_SHAPE``, e.g. ``LINKED_PICTURE``. - Read-only. - """ - graphicData = self._inline.graphic.graphicData - uri = graphicData.uri - if uri == nsmap['pic']: - blip = graphicData.pic.blipFill.blip - if blip.link is not None: - return WD_INLINE_SHAPE.LINKED_PICTURE - return WD_INLINE_SHAPE.PICTURE - if uri == nsmap['c']: - return WD_INLINE_SHAPE.CHART - if uri == nsmap['dgm']: - return WD_INLINE_SHAPE.SMART_ART - return WD_INLINE_SHAPE.NOT_IMPLEMENTED - - @property - def width(self): - """ - Read/write. The display width of this inline shape as an |Emu| - instance. - """ - return self._inline.extent.cx - - @width.setter - def width(self, cx): - assert isinstance(cx, int) - assert 0 < cx - self._inline.extent.cx = cx diff --git a/docx/shared.py b/docx/shared.py deleted file mode 100644 index f7cd4e147..000000000 --- a/docx/shared.py +++ /dev/null @@ -1,183 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by docx modules. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class Length(int): - """ - Base class for length constructor classes Inches, Cm, Mm, Px, and Emu. - Behaves as an int count of English Metric Units, 914,400 to the inch, - 36,000 to the mm. Provides convenience unit conversion methods in the form - of read-only properties. Immutable. - """ - _EMUS_PER_INCH = 914400 - _EMUS_PER_CM = 360000 - _EMUS_PER_MM = 36000 - _EMUS_PER_PX = 12700 - _EMUS_PER_TWIP = 635 - - def __new__(cls, emu): - return int.__new__(cls, emu) - - @property - def cm(self): - """ - The equivalent length expressed in centimeters (float). - """ - return self / float(self._EMUS_PER_CM) - - @property - def emu(self): - """ - The equivalent length expressed in English Metric Units (int). - """ - return self - - @property - def inches(self): - """ - The equivalent length expressed in inches (float). - """ - return self / float(self._EMUS_PER_INCH) - - @property - def mm(self): - """ - The equivalent length expressed in millimeters (float). - """ - return self / float(self._EMUS_PER_MM) - - @property - def px(self): - # round can somtimes return values like x.999999 which are truncated - # to x by int(); adding the 0.1 prevents this - return int(round(self / float(self._EMUS_PER_PX)) + 0.1) - - @property - def twips(self): - """ - The equivalent length expressed in twips (int). - """ - return int(round(self / float(self._EMUS_PER_TWIP))) - - -class Inches(Length): - """ - Convenience constructor for length in inches, e.g. - ``width = Inches(0.5)``. - """ - def __new__(cls, inches): - emu = int(inches * Length._EMUS_PER_INCH) - return Length.__new__(cls, emu) - - -class Cm(Length): - """ - Convenience constructor for length in centimeters, e.g. - ``height = Cm(12)``. - """ - def __new__(cls, cm): - emu = int(cm * Length._EMUS_PER_CM) - return Length.__new__(cls, emu) - - -class Emu(Length): - """ - Convenience constructor for length in English Metric Units, e.g. - ``width = Emu(457200)``. - """ - def __new__(cls, emu): - return Length.__new__(cls, int(emu)) - - -class Mm(Length): - """ - Convenience constructor for length in millimeters, e.g. - ``width = Mm(240.5)``. - """ - def __new__(cls, mm): - emu = int(mm * Length._EMUS_PER_MM) - return Length.__new__(cls, emu) - - -class Pt(int): - """ - Convenience class for setting font sizes in points - """ - _UNITS_PER_POINT = 100 - - def __new__(cls, pts): - units = int(pts * Pt._UNITS_PER_POINT) - return int.__new__(cls, units) - - -class Px(Length): - """ - Convenience constructor for length in pixels. - """ - def __new__(cls, px): - emu = int(px * Length._EMUS_PER_PX) - return Length.__new__(cls, emu) - - -class Twips(Length): - """ - Convenience constructor for length in twips, e.g. ``width = Twips(42)``. - A twip is a twentieth of a point, 635 EMU. - """ - def __new__(cls, twips): - emu = int(twips * Length._EMUS_PER_TWIP) - return Length.__new__(cls, emu) - - -def lazyproperty(f): - """ - @lazyprop decorator. Decorated method will be called only on first access - to calculate a cached property value. After that, the cached value is - returned. - """ - cache_attr_name = '_%s' % f.__name__ # like '_foobar' for prop 'foobar' - docstring = f.__doc__ - - def get_prop_value(obj): - try: - return getattr(obj, cache_attr_name) - except AttributeError: - value = f(obj) - setattr(obj, cache_attr_name, value) - return value - - return property(get_prop_value, doc=docstring) - - -def write_only_property(f): - """ - @write_only_property decorator. Creates a property (descriptor attribute) - that accepts assignment, but not getattr (use in an expression). - """ - docstring = f.__doc__ - - return property(fset=f, doc=docstring) - - -class Parented(object): - """ - Provides common services for document elements that occur below a part - but may occasionally require an ancestor object to provide a service, - such as add or drop a relationship. Provides ``self._parent`` attribute - to subclasses. - """ - def __init__(self, parent): - super(Parented, self).__init__() - self._parent = parent - - @property - def part(self): - """ - The package part containing this object - """ - return self._parent.part diff --git a/docx/table.py b/docx/table.py deleted file mode 100644 index 544553b1e..000000000 --- a/docx/table.py +++ /dev/null @@ -1,342 +0,0 @@ -# encoding: utf-8 - -""" -The |Table| object and related proxy classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .blkcntnr import BlockItemContainer -from .shared import lazyproperty, Parented, write_only_property - - -class Table(Parented): - """ - Proxy class for a WordprocessingML ```` element. - """ - def __init__(self, tbl, parent): - super(Table, self).__init__(parent) - self._tbl = tbl - - def add_column(self): - """ - Return a |_Column| instance, newly added rightmost to the table. - """ - tblGrid = self._tbl.tblGrid - gridCol = tblGrid.add_gridCol() - for tr in self._tbl.tr_lst: - tr.add_tc() - return _Column(gridCol, self._tbl, self) - - def add_row(self): - """ - Return a |_Row| instance, newly added bottom-most to the table. - """ - tbl = self._tbl - tr = tbl.add_tr() - for gridCol in tbl.tblGrid.gridCol_lst: - tr.add_tc() - return _Row(tr, self) - - @property - def autofit(self): - """ - |True| if column widths can be automatically adjusted to improve the - fit of cell contents. |False| if table layout is fixed. Column widths - are adjusted in either case if total column width exceeds page width. - Read/write boolean. - """ - return self._tblPr.autofit - - @autofit.setter - def autofit(self, value): - self._tblPr.autofit = value - - def cell(self, row_idx, col_idx): - """ - Return |_Cell| instance correponding to table cell at *row_idx*, - *col_idx* intersection, where (0, 0) is the top, left-most cell. - """ - row = self.rows[row_idx] - return row.cells[col_idx] - - @lazyproperty - def columns(self): - """ - |_Columns| instance containing the sequence of rows in this table. - """ - return _Columns(self._tbl, self) - - @lazyproperty - def rows(self): - """ - |_Rows| instance containing the sequence of rows in this table. - """ - return _Rows(self._tbl, self) - - @property - def style(self): - """ - String name of style to be applied to this table, e.g. - 'LightShading-Accent1'. Name is derived by removing spaces from the - table style name displayed in the Word UI. - """ - return self._tblPr.style - - @style.setter - def style(self, value): - self._tblPr.style = value - - @property - def _tblPr(self): - return self._tbl.tblPr - - -class _Cell(BlockItemContainer): - """ - Table cell - """ - def __init__(self, tc, parent): - super(_Cell, self).__init__(tc, parent) - self._tc = tc - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the content in this - cell. If present, *text* is added to the paragraph in a single run. - If specified, the paragraph style *style* is applied. If *style* is - not specified or is |None|, the result is as though the 'Normal' - style was applied. Note that the formatting of text in a cell can be - influenced by the table style. *text* can contain tab (``\\t``) - characters, which are converted to the appropriate XML form for - a tab. *text* can also include newline (``\\n``) or carriage return - (``\\r``) characters, each of which is converted to a line break. - """ - return super(_Cell, self).add_paragraph(text, style) - - def add_table(self, rows, cols): - """ - Return a table newly added to this cell after any existing cell - content, having *rows* rows and *cols* columns. An empty paragraph is - added after the table because Word requires a paragraph element as - the last element in every cell. - """ - new_table = super(_Cell, self).add_table(rows, cols) - self.add_paragraph() - return new_table - - @property - def paragraphs(self): - """ - List of paragraphs in the cell. A table cell is required to contain - at least one block-level element and end with a paragraph. By - default, a new cell contains a single paragraph. Read-only - """ - return super(_Cell, self).paragraphs - - @property - def tables(self): - """ - List of tables in the cell, in the order they appear. Read-only. - """ - return super(_Cell, self).tables - - @write_only_property - def text(self, text): - """ - Write-only. Set entire contents of cell to the string *text*. Any - existing content or revisions are replaced. - """ - tc = self._tc - tc.clear_content() - p = tc.add_p() - r = p.add_r() - r.text = text - - @property - def width(self): - """ - The width of this cell in EMU, or |None| if no explicit width is set. - """ - return self._tc.width - - @width.setter - def width(self, value): - self._tc.width = value - - -class _Column(Parented): - """ - Table column - """ - def __init__(self, gridCol, tbl, parent): - super(_Column, self).__init__(parent) - self._gridCol = gridCol - self._tbl = tbl - - @lazyproperty - def cells(self): - """ - Sequence of |_Cell| instances corresponding to cells in this column. - Supports ``len()``, iteration and indexed access. - """ - return _ColumnCells(self._tbl, self._gridCol, self) - - @property - def width(self): - """ - The width of this column in EMU, or |None| if no explicit width is - set. - """ - return self._gridCol.w - - @width.setter - def width(self, value): - self._gridCol.w = value - - -class _ColumnCells(Parented): - """ - Sequence of |_Cell| instances corresponding to the cells in a table - column. - """ - def __init__(self, tbl, gridCol, parent): - super(_ColumnCells, self).__init__(parent) - self._tbl = tbl - self._gridCol = gridCol - - def __getitem__(self, idx): - """ - Provide indexed access, (e.g. 'cells[0]') - """ - try: - tr = self._tr_lst[idx] - except IndexError: - msg = "cell index [%d] is out of range" % idx - raise IndexError(msg) - tc = tr.tc_lst[self._col_idx] - return _Cell(tc, self) - - def __iter__(self): - for tr in self._tr_lst: - tc = tr.tc_lst[self._col_idx] - yield _Cell(tc, self) - - def __len__(self): - return len(self._tr_lst) - - @property - def _col_idx(self): - gridCol_lst = self._tbl.tblGrid.gridCol_lst - return gridCol_lst.index(self._gridCol) - - @property - def _tr_lst(self): - return self._tbl.tr_lst - - -class _Columns(Parented): - """ - Sequence of |_Column| instances corresponding to the columns in a table. - Supports ``len()``, iteration and indexed access. - """ - def __init__(self, tbl, parent): - super(_Columns, self).__init__(parent) - self._tbl = tbl - - def __getitem__(self, idx): - """ - Provide indexed access, e.g. 'columns[0]' - """ - try: - gridCol = self._gridCol_lst[idx] - except IndexError: - msg = "column index [%d] is out of range" % idx - raise IndexError(msg) - return _Column(gridCol, self._tbl, self) - - def __iter__(self): - for gridCol in self._gridCol_lst: - yield _Column(gridCol, self._tbl, self) - - def __len__(self): - return len(self._gridCol_lst) - - @property - def _gridCol_lst(self): - """ - Sequence containing ```` elements for this table, each - representing a table column. - """ - tblGrid = self._tbl.tblGrid - return tblGrid.gridCol_lst - - -class _Row(Parented): - """ - Table row - """ - def __init__(self, tr, parent): - super(_Row, self).__init__(parent) - self._tr = tr - - @lazyproperty - def cells(self): - """ - Sequence of |_Cell| instances corresponding to cells in this row. - Supports ``len()``, iteration and indexed access. - """ - return _RowCells(self._tr, self) - - -class _RowCells(Parented): - """ - Sequence of |_Cell| instances corresponding to the cells in a table row. - """ - def __init__(self, tr, parent): - super(_RowCells, self).__init__(parent) - self._tr = tr - - def __getitem__(self, idx): - """ - Provide indexed access, (e.g. 'cells[0]') - """ - try: - tc = self._tr.tc_lst[idx] - except IndexError: - msg = "cell index [%d] is out of range" % idx - raise IndexError(msg) - return _Cell(tc, self) - - def __iter__(self): - return (_Cell(tc, self) for tc in self._tr.tc_lst) - - def __len__(self): - return len(self._tr.tc_lst) - - -class _Rows(Parented): - """ - Sequence of |_Row| instances corresponding to the rows in a table. - Supports ``len()``, iteration and indexed access. - """ - def __init__(self, tbl, parent): - super(_Rows, self).__init__(parent) - self._tbl = tbl - - def __getitem__(self, idx): - """ - Provide indexed access, (e.g. 'rows[0]') - """ - try: - tr = self._tbl.tr_lst[idx] - except IndexError: - msg = "row index [%d] out of range" % idx - raise IndexError(msg) - return _Row(tr, self) - - def __iter__(self): - return (_Row(tr, self) for tr in self._tbl.tr_lst) - - def __len__(self): - return len(self._tbl.tr_lst) diff --git a/docx/templates/default-src.docx b/docx/templates/default-src.docx deleted file mode 100644 index 31c8e20b4..000000000 Binary files a/docx/templates/default-src.docx and /dev/null differ diff --git a/docx/templates/default.docx b/docx/templates/default.docx deleted file mode 100644 index 62c580eb5..000000000 Binary files a/docx/templates/default.docx and /dev/null differ diff --git a/docx/text.py b/docx/text.py deleted file mode 100644 index 0c551beeb..000000000 --- a/docx/text.py +++ /dev/null @@ -1,489 +0,0 @@ -# encoding: utf-8 - -""" -Text-related proxy types for python-docx, such as Paragraph and Run. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .enum.text import WD_BREAK -from .shared import Parented - - -def boolproperty(f): - """ - @boolproperty decorator. Decorated method must return the XML element - name of the boolean property element occuring under rPr. Causes - a read/write tri-state property to be added to the class having the name - of the decorated function. - """ - def _get_prop_value(parent, attr_name): - return getattr(parent, attr_name) - - def _remove_prop(parent, attr_name): - remove_method_name = '_remove_%s' % attr_name - remove_method = getattr(parent, remove_method_name) - remove_method() - - def _add_prop(parent, attr_name): - add_method_name = '_add_%s' % attr_name - add_method = getattr(parent, add_method_name) - return add_method() - - def getter(obj): - r, attr_name = obj._r, f(obj) - if r.rPr is None: - return None - prop_value = _get_prop_value(r.rPr, attr_name) - if prop_value is None: - return None - return prop_value.val - - def setter(obj, value): - if value not in (True, False, None): - raise ValueError( - "assigned value must be True, False, or None, got '%s'" - % value - ) - r, attr_name = obj._r, f(obj) - rPr = r.get_or_add_rPr() - _remove_prop(rPr, attr_name) - if value is not None: - elm = _add_prop(rPr, attr_name) - elm.val = value - - return property(getter, setter, doc=f.__doc__) - - -class Paragraph(Parented): - """ - Proxy object wrapping ```` element. - """ - def __init__(self, p, parent): - super(Paragraph, self).__init__(parent) - self._p = p - - def add_run(self, text=None, style=None): - """ - Append a run to this paragraph containing *text* and having character - style identified by style ID *style*. *text* can contain tab - (``\\t``) characters, which are converted to the appropriate XML form - for a tab. *text* can also include newline (``\\n``) or carriage - return (``\\r``) characters, each of which is converted to a line - break. - """ - r = self._p.add_r() - run = Run(r, self) - if text: - run.text = text - if style: - run.style = style - return run - - @property - def alignment(self): - """ - A member of the :ref:`WdParagraphAlignment` enumeration specifying - the justification setting for this paragraph. A value of |None| - indicates the paragraph has no directly-applied alignment value and - will inherit its alignment value from its style hierarchy. Assigning - |None| to this property removes any directly-applied alignment value. - """ - return self._p.alignment - - @alignment.setter - def alignment(self, value): - self._p.alignment = value - - def clear(self): - """ - Return this same paragraph after removing all its content. - Paragraph-level formatting, such as style, is preserved. - """ - self._p.clear_content() - return self - - def insert_paragraph_before(self, text=None, style=None): - """ - Return a newly created paragraph, inserted directly before this - paragraph. If *text* is supplied, the new paragraph contains that - text in a single run. If *style* is provided, that style is assigned - to the new paragraph. - """ - p = self._p.add_p_before() - paragraph = Paragraph(p, self._parent) - if text: - paragraph.add_run(text) - if style is not None: - paragraph.style = style - return paragraph - - @property - def runs(self): - """ - Sequence of |Run| instances corresponding to the elements in - this paragraph. - """ - return [Run(r, self) for r in self._p.r_lst] - - @property - def style(self): - """ - Paragraph style for this paragraph. Read/Write. - """ - style = self._p.style - return style if style is not None else 'Normal' - - @style.setter - def style(self, style): - self._p.style = None if style == 'Normal' else style - - @property - def text(self): - """ - String formed by concatenating the text of each run in the paragraph. - Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` - characters respectively. - - Assigning text to this property causes all existing paragraph content - to be replaced with a single run containing the assigned text. - A ``\\t`` character in the text is mapped to a ```` element - and each ``\\n`` or ``\\r`` character is mapped to a line break. - Paragraph-level formatting, such as style, is preserved. All - run-level formatting, such as bold or italic, is removed. - """ - text = '' - for run in self.runs: - text += run.text - return text - - @text.setter - def text(self, text): - self.clear() - self.add_run(text) - - -class Run(Parented): - """ - Proxy object wrapping ```` element. Several of the properties on Run - take a tri-state value, |True|, |False|, or |None|. |True| and |False| - correspond to on and off respectively. |None| indicates the property is - not specified directly on the run and its effective value is taken from - the style hierarchy. - """ - def __init__(self, r, parent): - super(Run, self).__init__(parent) - self._r = r - - def add_break(self, break_type=WD_BREAK.LINE): - """ - Add a break element of *break_type* to this run. *break_type* can - take the values `WD_BREAK.LINE`, `WD_BREAK.PAGE`, and - `WD_BREAK.COLUMN` where `WD_BREAK` is imported from `docx.enum.text`. - *break_type* defaults to `WD_BREAK.LINE`. - """ - type_, clear = { - WD_BREAK.LINE: (None, None), - WD_BREAK.PAGE: ('page', None), - WD_BREAK.COLUMN: ('column', None), - WD_BREAK.LINE_CLEAR_LEFT: ('textWrapping', 'left'), - WD_BREAK.LINE_CLEAR_RIGHT: ('textWrapping', 'right'), - WD_BREAK.LINE_CLEAR_ALL: ('textWrapping', 'all'), - }[break_type] - br = self._r.add_br() - if type_ is not None: - br.type = type_ - if clear is not None: - br.clear = clear - - def add_picture(self, image_path_or_stream, width=None, height=None): - """ - Return an |InlineShape| instance containing the image identified by - *image_path_or_stream*, added to the end of this run. - *image_path_or_stream* can be a path (a string) or a file-like object - containing a binary image. If neither width nor height is specified, - the picture appears at its native size. If only one is specified, it - is used to compute a scaling factor that is then applied to the - unspecified dimension, preserving the aspect ratio of the image. The - native size of the picture is calculated using the dots-per-inch - (dpi) value specified in the image file, defaulting to 72 dpi if no - value is specified, as is often the case. - """ - inline_shapes = self.part.inline_shapes - picture = inline_shapes.add_picture(image_path_or_stream, self) - - # scale picture dimensions if width and/or height provided - if width is not None or height is not None: - native_width, native_height = picture.width, picture.height - if width is None: - scaling_factor = float(height) / float(native_height) - width = int(round(native_width * scaling_factor)) - elif height is None: - scaling_factor = float(width) / float(native_width) - height = int(round(native_height * scaling_factor)) - # set picture to scaled dimensions - picture.width = width - picture.height = height - - return picture - - def add_tab(self): - """ - Add a ```` element at the end of the run, which Word - interprets as a tab character. - """ - self._r._add_tab() - - def add_text(self, text): - """ - Returns a newly appended |Text| object (corresponding to a new - ```` child element) to the run, containing *text*. Compare with - the possibly more friendly approach of assigning text to the - :attr:`Run.text` property. - """ - t = self._r.add_t(text) - return Text(t) - - @boolproperty - def all_caps(self): - """ - Read/write. Causes the text of the run to appear in capital letters. - """ - return 'caps' - - @boolproperty - def bold(self): - """ - Read/write. Causes the text of the run to appear in bold. - """ - return 'b' - - def clear(self): - """ - Return reference to this run after removing all its content. All run - formatting is preserved. - """ - self._r.clear_content() - return self - - @boolproperty - def complex_script(self): - """ - Read/write tri-state value. When |True|, causes the characters in the - run to be treated as complex script regardless of their Unicode - values. - """ - return 'cs' - - @boolproperty - def cs_bold(self): - """ - Read/write tri-state value. When |True|, causes the complex script - characters in the run to be displayed in bold typeface. - """ - return 'bCs' - - @boolproperty - def cs_italic(self): - """ - Read/write tri-state value. When |True|, causes the complex script - characters in the run to be displayed in italic typeface. - """ - return 'iCs' - - @boolproperty - def double_strike(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear with double strikethrough. - """ - return 'dstrike' - - @boolproperty - def emboss(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear as if raised off the page in relief. - """ - return 'emboss' - - @boolproperty - def hidden(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to be hidden from display, unless applications settings force hidden - text to be shown. - """ - return 'vanish' - - @boolproperty - def italic(self): - """ - Read/write tri-state value. When |True|, causes the text of the run - to appear in italics. - """ - return 'i' - - @boolproperty - def imprint(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear as if pressed into the page. - """ - return 'imprint' - - @boolproperty - def math(self): - """ - Read/write tri-state value. When |True|, specifies this run contains - WML that should be handled as though it was Office Open XML Math. - """ - return 'oMath' - - @boolproperty - def no_proof(self): - """ - Read/write tri-state value. When |True|, specifies that the contents - of this run should not report any errors when the document is scanned - for spelling and grammar. - """ - return 'noProof' - - @boolproperty - def outline(self): - """ - Read/write tri-state value. When |True| causes the characters in the - run to appear as if they have an outline, by drawing a one pixel wide - border around the inside and outside borders of each character glyph. - """ - return 'outline' - - @boolproperty - def rtl(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to have right-to-left characteristics. - """ - return 'rtl' - - @boolproperty - def shadow(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to appear as if each character has a shadow. - """ - return 'shadow' - - @boolproperty - def small_caps(self): - """ - Read/write tri-state value. When |True| causes the lowercase - characters in the run to appear as capital letters two points smaller - than the font size specified for the run. - """ - return 'smallCaps' - - @boolproperty - def snap_to_grid(self): - """ - Read/write tri-state value. When |True| causes the run to use the - document grid characters per line settings defined in the docGrid - element when laying out the characters in this run. - """ - return 'snapToGrid' - - @boolproperty - def spec_vanish(self): - """ - Read/write tri-state value. When |True|, specifies that the given run - shall always behave as if it is hidden, even when hidden text is - being displayed in the current document. The property has a very - narrow, specialized use related to the table of contents. Consult the - spec (§17.3.2.36) for more details. - """ - return 'specVanish' - - @boolproperty - def strike(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to appear with a single horizontal line through the center of the - line. - """ - return 'strike' - - @property - def style(self): - """ - Read/write. The string style ID of the character style applied to - this run, or |None| if it has no directly-applied character style. - Setting this property to |None| causes any directly-applied character - style to be removed such that the run inherits character formatting - from its containing paragraph. - """ - return self._r.style - - @style.setter - def style(self, char_style): - self._r.style = char_style - - @property - def text(self): - """ - String formed by concatenating the text equivalent of each run - content child element into a Python string. Each ```` element - adds the text characters it contains. A ```` element adds - a ``\\t`` character. A ```` or ```` element each add - a ``\\n`` character. Note that a ```` element can indicate - a page break or column break as well as a line break. All ```` - elements translate to a single ``\\n`` character regardless of their - type. All other content child elements, such as ````, are - ignored. - - Assigning text to this property has the reverse effect, translating - each ``\\t`` character to a ```` element and each ``\\n`` or - ``\\r`` character to a ```` element. Any existing run content - is replaced. Run formatting is preserved. - """ - return self._r.text - - @text.setter - def text(self, text): - self._r.text = text - - @property - def underline(self): - """ - The underline style for this |Run|, one of |None|, |True|, |False|, - or a value from :ref:`WdUnderline`. A value of |None| indicates the - run has no directly-applied underline value and so will inherit the - underline value of its containing paragraph. Assigning |None| to this - property removes any directly-applied underline value. A value of - |False| indicates a directly-applied setting of no underline, - overriding any inherited value. A value of |True| indicates single - underline. The values from :ref:`WdUnderline` are used to specify - other outline styles such as double, wavy, and dotted. - """ - return self._r.underline - - @underline.setter - def underline(self, value): - self._r.underline = value - - @boolproperty - def web_hidden(self): - """ - Read/write tri-state value. When |True|, specifies that the contents - of this run shall be hidden when the document is displayed in web - page view. - """ - return 'webHidden' - - -class Text(object): - """ - Proxy object wrapping ```` element. - """ - def __init__(self, t_elm): - super(Text, self).__init__() - self._t = t_elm diff --git a/features/api-add-heading.feature b/features/api-add-heading.feature deleted file mode 100644 index 145f818f3..000000000 --- a/features/api-add-heading.feature +++ /dev/null @@ -1,28 +0,0 @@ -Feature: Add a section heading with text - In order add a section heading to a document - As a programmer using the basic python-docx API - I need a method to add a heading with its text in a single step - - Scenario: Add a heading specifying only its text - Given a document - When I add a heading specifying only its text - Then the style of the last paragraph is 'Heading1' - And the last paragraph contains the heading text - - Scenario Outline: Add a heading specifying level - Given a document - When I add a heading specifying level= - Then the style of the last paragraph is '' - - Examples: Heading level styles - | heading level | paragraph style | - | 0 | Title | - | 1 | Heading1 | - | 2 | Heading2 | - | 3 | Heading3 | - | 4 | Heading4 | - | 5 | Heading5 | - | 6 | Heading6 | - | 7 | Heading7 | - | 8 | Heading8 | - | 9 | Heading9 | diff --git a/features/api-add-table.feature b/features/api-add-table.feature deleted file mode 100644 index 555385502..000000000 --- a/features/api-add-table.feature +++ /dev/null @@ -1,16 +0,0 @@ -Feature: Add a table - In order to include tablular information in a document - As a programmer using the basic python-docx API - I need a method that adds a table at the end of the document - - Scenario: Add a table specifying only row and column count - Given a document - When I add a 2 x 2 table specifying only row and column count - Then the document contains a 2 x 2 table - And the table style is 'LightShading-Accent1' - - Scenario: Add a table specifying style - Given a document - When I add a 2 x 2 table specifying style 'foobar' - Then the document contains a 2 x 2 table - And the table style is 'foobar' diff --git a/features/api-open-document.feature b/features/api-open-document.feature new file mode 100644 index 000000000..9f9f67c70 --- /dev/null +++ b/features/api-open-document.feature @@ -0,0 +1,16 @@ +Feature: Open a document + In order work on a document + As a developer using python-docx + I need a way to open a document + + + Scenario: Open a specified document + Given I have python-docx installed + When I call docx.Document() with the path of a .docx file + Then document is a Document object + + + Scenario: Open the default document + Given I have python-docx installed + When I call docx.Document() with no arguments + Then document is a Document object diff --git a/features/blk-add-paragraph.feature b/features/blk-add-paragraph.feature index 73e42c4c2..f873b3775 100644 --- a/features/blk-add-paragraph.feature +++ b/features/blk-add-paragraph.feature @@ -1,6 +1,6 @@ Feature: Add a paragraph of text In order to populate the text of a document - As an python-docx developer + As a developer using python-docx I need the ability to add a paragraph Scenario: Add a paragraph using low-level text API diff --git a/features/blk-add-table.feature b/features/blk-add-table.feature index 3e3696a0f..e13143e56 100644 --- a/features/blk-add-table.feature +++ b/features/blk-add-table.feature @@ -1,6 +1,6 @@ Feature: Add a table In order to fulfill a requirement for a table in a document - As an python-docx developer + As a developer using python-docx I need the ability to add a table Scenario: Access a table diff --git a/features/blk-iter-inner-content.feature b/features/blk-iter-inner-content.feature new file mode 100644 index 000000000..047efb9ee --- /dev/null +++ b/features/blk-iter-inner-content.feature @@ -0,0 +1,24 @@ +Feature: Iterate paragraphs and tables in document-order + In order to access paragraphs and tables in the same order they appear in the document + As a developer using python-docx + I need the ability to iterate the inner-content of a block-item-container + + + Scenario: Document.iter_inner_content() + Given a Document object with paragraphs and tables + Then document.iter_inner_content() produces the block-items in document order + + + Scenario: Header.iter_inner_content() + Given a Header object with paragraphs and tables + Then header.iter_inner_content() produces the block-items in document order + + + Scenario: Footer.iter_inner_content() + Given a Footer object with paragraphs and tables + Then footer.iter_inner_content() produces the block-items in document order + + + Scenario: _Cell.iter_inner_content() + Given a _Cell object with paragraphs and tables + Then cell.iter_inner_content() produces the block-items in document order diff --git a/features/cmt-mutations.feature b/features/cmt-mutations.feature new file mode 100644 index 000000000..1ef9ad2db --- /dev/null +++ b/features/cmt-mutations.feature @@ -0,0 +1,59 @@ +Feature: Comment mutations + In order to add and modify the content of a comment + As a developer using python-docx + I need mutation methods on Comment objects + + + Scenario: Comments.add_comment() + Given a Comments object with 0 comments + When I assign comment = comments.add_comment() + Then comment.comment_id == 0 + And len(comment.paragraphs) == 1 + And comment.paragraphs[0].style.name == "CommentText" + And len(comments) == 1 + And comments.get(0) == comment + + + Scenario: Comments.add_comment() specifying author and initials + Given a Comments object with 0 comments + When I assign comment = comments.add_comment(author="John Doe", initials="JD") + Then comment.author == "John Doe" + And comment.initials == "JD" + + + Scenario: Comment.add_paragraph() specifying text and style + Given a default Comment object + When I assign paragraph = comment.add_paragraph(text, style) + Then len(comment.paragraphs) == 2 + And paragraph.text == text + And paragraph.style == style + And comment.paragraphs[-1] == paragraph + + + Scenario: Comment.add_paragraph() not specifying text or style + Given a default Comment object + When I assign paragraph = comment.add_paragraph() + Then len(comment.paragraphs) == 2 + And paragraph.text == "" + And paragraph.style == "CommentText" + And comment.paragraphs[-1] == paragraph + + + Scenario: Add image to comment + Given a default Comment object + When I assign paragraph = comment.add_paragraph() + And I assign run = paragraph.add_run() + And I call run.add_picture() + Then run.iter_inner_content() yields a single Picture drawing + + + Scenario: update Comment.author + Given a Comment object + When I assign "Jane Smith" to comment.author + Then comment.author == "Jane Smith" + + + Scenario: update Comment.initials + Given a Comment object + When I assign "JS" to comment.initials + Then comment.initials == "JS" diff --git a/features/cmt-props.feature b/features/cmt-props.feature new file mode 100644 index 000000000..e4e620828 --- /dev/null +++ b/features/cmt-props.feature @@ -0,0 +1,35 @@ +Feature: Get comment properties + In order to characterize comments by their metadata + As a developer using python-docx + I need methods to access comment metadata properties + + + Scenario: Comment.id + Given a Comment object + Then comment.comment_id is the comment identifier + + + Scenario: Comment.author + Given a Comment object + Then comment.author is the author of the comment + + + Scenario: Comment.initials + Given a Comment object + Then comment.initials is the initials of the comment author + + + Scenario: Comment.timestamp + Given a Comment object + Then comment.timestamp is the date and time the comment was authored + + + Scenario: Comment.paragraphs[0].text + Given a Comment object + When I assign para_text = comment.paragraphs[0].text + Then para_text is the text of the first paragraph in the comment + + + Scenario: Retrieve embedded image from a comment + Given a Comment object containing an embedded image + Then I can extract the image from the comment diff --git a/features/doc-access-collections.feature b/features/doc-access-collections.feature new file mode 100644 index 000000000..0233d5989 --- /dev/null +++ b/features/doc-access-collections.feature @@ -0,0 +1,29 @@ +Feature: Access document collections + In order to operate on objects related to a document + As a developer using python-docx + I need a way to access each of the document's collections + + + Scenario: Access the inline shapes collection of a document + Given a document having inline shapes + Then document.inline_shapes is an InlineShapes object + + + Scenario: Access the paragraphs in the document body as a list + Given a document containing three paragraphs + Then document.paragraphs is a list containing three paragraphs + + + Scenario: Access the section collection of a document + Given a document having sections + Then document.sections is a Sections object + + + Scenario: Access the styles collection of a document + Given a document having styles + Then document.styles is a Styles object + + + Scenario: Access the tables collection of a document + Given a document having three tables + Then document.tables is a list containing three tables diff --git a/features/doc-access-sections.feature b/features/doc-access-sections.feature index 8cb836c42..ad2a58ad8 100644 --- a/features/doc-access-sections.feature +++ b/features/doc-access-sections.feature @@ -1,16 +1,11 @@ Feature: Access document sections - In order to discover and apply section-level settings + In order to operate on an individual section As a developer using python-docx - I need a way to access document sections - - - Scenario: Access section collection of a document - Given a document having three sections - Then I can access the section collection of the document - And the length of the section collection is 3 + I need access to each section in the section collection Scenario: Access section in section collection - Given a section collection - Then I can iterate over the sections + Given a section collection containing 3 sections + Then len(sections) is 3 + And I can iterate over the sections And I can access a section by index diff --git a/features/doc-add-comment.feature b/features/doc-add-comment.feature new file mode 100644 index 000000000..36f46244a --- /dev/null +++ b/features/doc-add-comment.feature @@ -0,0 +1,13 @@ +Feature: Add a comment to a document + In order add a comment to a document + As a developer using python-docx + I need a way to add a comment specifying both its content and its reference + + + Scenario: Document.add_comment(runs, text, author, initials) + Given a document having a comments part + When I assign comment = document.add_comment(runs, "A comment", "John Doe", "JD") + Then comment is a Comment object + And comment.text == "A comment" + And comment.author == "John Doe" + And comment.initials == "JD" diff --git a/features/doc-add-heading.feature b/features/doc-add-heading.feature new file mode 100644 index 000000000..8c23137b7 --- /dev/null +++ b/features/doc-add-heading.feature @@ -0,0 +1,25 @@ +Feature: Add a heading paragraph + In order add a heading to a document + As a developer using python-docx + I need a way to add a heading with its text and level in a single step + + + Scenario: Add a heading specifying only its text + Given a document having built-in styles + When I add a heading specifying only its text + Then the style of the last paragraph is 'Heading 1' + And the last paragraph contains the heading text + + + Scenario Outline: Add a heading specifying level + Given a document having built-in styles + When I add a heading specifying level= + Then the style of the last paragraph is '