diff --git a/.fdignore b/.fdignore new file mode 100644 index 000000000..41bdd3828 --- /dev/null +++ b/.fdignore @@ -0,0 +1,7 @@ +.tox +Session.vim +build/ +docs/.build +features/_scratch +__pycache__/ +src/*.egg-info diff --git a/.gitignore b/.gitignore index de25a6f76..5aabfd8cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ +/build/ .coverage /dist/ /docs/.build/ -/*.egg-info +/src/*.egg-info *.pyc +.pytest_cache/ _scratch/ Session.vim /.tox/ diff --git a/.projections.json b/.projections.json new file mode 100644 index 000000000..7d68dd4c5 --- /dev/null +++ b/.projections.json @@ -0,0 +1,14 @@ +{ + "src/docx/*.py" : { + "alternate" : [ + "tests/{dirname}/test_{basename}.py" + ], + "type" : "source" + }, + "tests/**/test_*.py" : { + "alternate" : [ + "src/docx/{dirname}/{basename}.py" + ], + "type" : "test" + } +} diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..125538586 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +version: 2 + +# -- set the OS, Python version and other tools you might need -- +build: + os: ubuntu-22.04 + tools: + python: "3.9" + +# -- build documentation in the "docs/" directory with Sphinx -- +sphinx: + configuration: docs/conf.py + # -- fail on all warnings to avoid broken references -- + # fail_on_warning: true + +# -- package versions required to build your documentation -- +# -- see https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -- +python: + install: + - requirements: requirements-docs.txt diff --git a/.rgignore b/.rgignore new file mode 100644 index 000000000..12d71b5b4 --- /dev/null +++ b/.rgignore @@ -0,0 +1,9 @@ +.tox +Session.vim +build/ +docs/.build +features/_scratch +__pycache__/ +ref/ +src/*.egg-info +tests/test_files diff --git a/.travis.yml b/.travis.yml index 3345ff24f..6ce09e8e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,8 @@ language: python python: - - "3.4" - - "3.3" + - "3.8" + - "3.6" - "2.7" - - "2.6" # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors install: pip install -r requirements.txt # command to run tests, e.g. python setup.py test diff --git a/HISTORY.rst b/HISTORY.rst index 5b947d561..69bba4161 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,101 @@ Release History --------------- +1.2.0 (2025-06-16) +++++++++++++++++++ + +- Add support for comments +- Drop support for Python 3.8, add testing for Python 3.13 + + +1.1.2 (2024-05-01) +++++++++++++++++++ + +- Fix #1383 Revert lxml<=4.9.2 pin that breaks Python 3.12 install +- Fix #1385 Support use of Part._rels by python-docx-template +- Add support and testing for Python 3.12 + + +1.1.1 (2024-04-29) +++++++++++++++++++ + +- Fix #531, #1146 Index error on table with misaligned borders +- Fix #1335 Tolerate invalid float value in bottom-margin +- Fix #1337 Do not require typing-extensions at runtime + + +1.1.0 (2023-11-03) +++++++++++++++++++ + +- Add BlockItemContainer.iter_inner_content() + + +1.0.1 (2023-10-12) +++++++++++++++++++ + +- Fix #1256: parse_xml() and OxmlElement moved. +- Add Hyperlink.fragment and .url + + +1.0.0 (2023-10-01) ++++++++++++++++++++ + +- Remove Python 2 support. Supported versions are 3.7+ +- Fix #85: Paragraph.text includes hyperlink text +- Add #1113: Hyperlink.address +- Add Hyperlink.contains_page_break +- Add Hyperlink.runs +- Add Hyperlink.text +- Add Paragraph.contains_page_break +- Add Paragraph.hyperlinks +- Add Paragraph.iter_inner_content() +- Add Paragraph.rendered_page_breaks +- Add RenderedPageBreak.following_paragraph_fragment +- Add RenderedPageBreak.preceding_paragraph_fragment +- Add Run.contains_page_break +- Add Run.iter_inner_content() +- Add Section.iter_inner_content() + + +0.8.11 (2021-05-15) ++++++++++++++++++++ + +- Small build changes and Python 3.8 version changes like collections.abc location. + + +0.8.10 (2019-01-08) ++++++++++++++++++++ + +- Revert use of expanded package directory for default.docx to work around setup.py + problem with filenames containing square brackets. + + +0.8.9 (2019-01-08) +++++++++++++++++++ + +- Fix gap in MANIFEST.in that excluded default document template directory + + +0.8.8 (2019-01-07) +++++++++++++++++++ + +- Add support for headers and footers + + +0.8.7 (2018-08-18) +++++++++++++++++++ + +- Add _Row.height_rule +- Add _Row.height +- Add _Cell.vertical_alignment +- Fix #455: increment next_id, don't fill gaps +- Add #375: import docx failure on --OO optimization +- Add #254: remove default zoom percentage +- Add #266: miscellaneous documentation fixes +- Add #175: refine MANIFEST.ini +- Add #168: Unicode error on core-props in Python 2 + + 0.8.6 (2016-06-22) ++++++++++++++++++ diff --git a/MANIFEST.in b/MANIFEST.in index 6419bc8a0..b2d3fadcf 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,10 @@ include HISTORY.rst LICENSE README.rst tox.ini -recursive-include tests *.py -recursive-include features * -recursive-include docx/templates * -recursive-include tests/test_files * - +include requirements*.txt +graft src/docx/templates +graft features +graft tests +graft docs +prune docs/.build +global-exclude .DS_Store +global-exclude __pycache__ +global-exclude *.py[co] diff --git a/Makefile b/Makefile index f335818fe..2b2fb4121 100644 --- a/Makefile +++ b/Makefile @@ -1,46 +1,64 @@ BEHAVE = behave MAKE = make PYTHON = python -SETUP = $(PYTHON) ./setup.py +TWINE = $(PYTHON) -m twine -.PHONY: accept clean coverage docs readme register sdist test upload +.PHONY: accept build clean cleandocs coverage docs install opendocs sdist test +.PHONY: test-upload wheel help: @echo "Please use \`make ' where is one or more of" - @echo " accept run acceptance tests using behave" - @echo " clean delete intermediate work product and start fresh" - @echo " cleandocs delete intermediate documentation files" - @echo " coverage run nosetests with coverage" - @echo " docs generate documentation" - @echo " opendocs open browser to local version of documentation" - @echo " register update metadata (README.rst) on PyPI" - @echo " sdist generate a source distribution into dist/" - @echo " upload upload distribution tarball to PyPI" + @echo " accept run acceptance tests using behave" + @echo " build generate both sdist and wheel suitable for upload to PyPI" + @echo " clean delete intermediate work product and start fresh" + @echo " cleandocs delete intermediate documentation files" + @echo " coverage run pytest with coverage" + @echo " docs generate documentation" + @echo " opendocs open browser to local version of documentation" + @echo " register update metadata (README.rst) on PyPI" + @echo " sdist generate a source distribution into dist/" + @echo " test run unit tests using pytest" + @echo " test-upload upload distribution to TestPyPI" + @echo " upload upload distribution tarball to PyPI" + @echo " wheel generate a binary distribution into dist/" accept: - $(BEHAVE) --stop + uv run $(BEHAVE) --stop + +build: + uv build clean: - find . -type f -name \*.pyc -exec rm {} \; + # find . -type f -name \*.pyc -exec rm {} \; + fd -e pyc -I -x rm rm -rf dist *.egg-info .coverage .DS_Store cleandocs: $(MAKE) -C docs clean coverage: - py.test --cov-report term-missing --cov=docx tests/ + uv run pytest --cov-report term-missing --cov=docx tests/ docs: $(MAKE) -C docs html +install: + pip install -Ue . + opendocs: open docs/.build/html/index.html -register: - $(SETUP) register - sdist: - $(SETUP) sdist + uv build --sdist + +test: + uv run pytest -x + +test-upload: sdist wheel + uv run $(TWINE) upload --repository testpypi dist/* + +upload: clean sdist wheel + uv run $(TWINE) upload dist/* -upload: - $(SETUP) sdist upload +wheel: + uv build --wheel diff --git a/README.md b/README.md new file mode 100644 index 000000000..c35cf0200 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# python-docx + +*python-docx* is a Python library for reading, creating, and updating Microsoft Word 2007+ (.docx) files. + +## Installation + +``` +pip install python-docx +``` + +## Example + +```python +>>> from docx import Document + +>>> document = Document() +>>> document.add_paragraph("It was a dark and stormy night.") + +>>> document.save("dark-and-stormy.docx") + +>>> document = Document("dark-and-stormy.docx") +>>> document.paragraphs[0].text +'It was a dark and stormy night.' +``` + +More information is available in the [python-docx documentation](https://python-docx.readthedocs.org/en/latest/) diff --git a/README.rst b/README.rst deleted file mode 100644 index 82d1f0bd7..000000000 --- a/README.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. image:: https://travis-ci.org/python-openxml/python-docx.svg?branch=master - :target: https://travis-ci.org/python-openxml/python-docx - -*python-docx* is a Python library for creating and updating Microsoft Word -(.docx) files. - -More information is available in the `python-docx documentation`_. - -.. _`python-docx documentation`: - https://python-docx.readthedocs.org/en/latest/ diff --git a/docs/_static/img/comment-parts.png b/docs/_static/img/comment-parts.png new file mode 100644 index 000000000..c7db1be54 Binary files /dev/null and b/docs/_static/img/comment-parts.png differ diff --git a/docs/_static/img/hdrftr-01.png b/docs/_static/img/hdrftr-01.png new file mode 100644 index 000000000..5309ef2a3 Binary files /dev/null and b/docs/_static/img/hdrftr-01.png differ diff --git a/docs/_static/img/hdrftr-02.png b/docs/_static/img/hdrftr-02.png new file mode 100644 index 000000000..01af2981a Binary files /dev/null and b/docs/_static/img/hdrftr-02.png differ diff --git a/docs/api/comments.rst b/docs/api/comments.rst new file mode 100644 index 000000000..a54ecc9ce --- /dev/null +++ b/docs/api/comments.rst @@ -0,0 +1,27 @@ + +.. _comments_api: + +Comment-related objects +======================= + +.. currentmodule:: docx.comments + + +|Comments| objects +------------------ + +.. autoclass:: Comments() + :members: + :inherited-members: + :exclude-members: + part + + +|Comment| objects +------------------ + +.. autoclass:: Comment() + :members: + :inherited-members: + :exclude-members: + part diff --git a/docs/api/document.rst b/docs/api/document.rst index 8ab9ecfe4..42ec0211f 100644 --- a/docs/api/document.rst +++ b/docs/api/document.rst @@ -50,68 +50,68 @@ if that behavior is desired. .. attribute:: author - *string* -- An entity primarily responsible for making the content of the + `string` -- An entity primarily responsible for making the content of the resource. .. attribute:: category - *string* -- A categorization of the content of this package. Example + `string` -- A categorization of the content of this package. Example values might include: Resume, Letter, Financial Forecast, Proposal, or Technical Presentation. .. attribute:: comments - *string* -- An account of the content of the resource. + `string` -- An account of the content of the resource. .. attribute:: content_status - *string* -- completion status of the document, e.g. 'draft' + `string` -- completion status of the document, e.g. 'draft' .. attribute:: created - *datetime* -- time of intial creation of the document + `datetime` -- time of intial creation of the document .. attribute:: identifier - *string* -- An unambiguous reference to the resource within a given + `string` -- An unambiguous reference to the resource within a given context, e.g. ISBN. .. attribute:: keywords - *string* -- descriptive words or short phrases likely to be used as + `string` -- descriptive words or short phrases likely to be used as search terms for this document .. attribute:: language - *string* -- language the document is written in + `string` -- language the document is written in .. attribute:: last_modified_by - *string* -- name or other identifier (such as email address) of person + `string` -- name or other identifier (such as email address) of person who last modified the document .. attribute:: last_printed - *datetime* -- time the document was last printed + `datetime` -- time the document was last printed .. attribute:: modified - *datetime* -- time the document was last modified + `datetime` -- time the document was last modified .. attribute:: revision - *int* -- number of this revision, incremented by Word each time the + `int` -- number of this revision, incremented by Word each time the document is saved. Note however |docx| does not automatically increment the revision number when it saves a document. .. attribute:: subject - *string* -- The topic of the content of the resource. + `string` -- The topic of the content of the resource. .. attribute:: title - *string* -- The name given to the resource. + `string` -- The name given to the resource. .. attribute:: version - *string* -- free-form version string + `string` -- free-form version string diff --git a/docs/api/enum/WdCellVerticalAlignment.rst b/docs/api/enum/WdCellVerticalAlignment.rst new file mode 100644 index 000000000..8a2f76022 --- /dev/null +++ b/docs/api/enum/WdCellVerticalAlignment.rst @@ -0,0 +1,32 @@ +.. _WdCellVerticalAlignment: + +``WD_CELL_VERTICAL_ALIGNMENT`` +============================== + +alias: **WD_ALIGN_VERTICAL** + +Specifies the vertical alignment of text in one or more cells of a table. + +Example:: + + from docx.enum.table import WD_ALIGN_VERTICAL + + table = document.add_table(3, 3) + table.cell(0, 0).vertical_alignment = WD_ALIGN_VERTICAL.BOTTOM + +---- + +TOP + Text is aligned to the top border of the cell. + +CENTER + Text is aligned to the center of the cell. + +BOTTOM + Text is aligned to the bottom border of the cell. + +BOTH + This is an option in the OpenXml spec, but not in Word itself. It's not + clear what Word behavior this setting produces. If you find out please let + us know and we'll update this documentation. Otherwise, probably best to + avoid this option. diff --git a/docs/api/enum/WdLineSpacing.rst b/docs/api/enum/WdLineSpacing.rst index b03e7dd17..f28142e2d 100644 --- a/docs/api/enum/WdLineSpacing.rst +++ b/docs/api/enum/WdLineSpacing.rst @@ -10,7 +10,7 @@ Example:: from docx.enum.text import WD_LINE_SPACING paragraph = document.add_paragraph() - paragraph.line_spacing_rule = WD_LINE_SPACING.EXACTLY + paragraph.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY ---- diff --git a/docs/api/enum/WdRowHeightRule.rst b/docs/api/enum/WdRowHeightRule.rst new file mode 100644 index 000000000..a72d4bdae --- /dev/null +++ b/docs/api/enum/WdRowHeightRule.rst @@ -0,0 +1,26 @@ +.. _WdRowHeightRule: + +``WD_ROW_HEIGHT_RULE`` +====================== + +alias: **WD_ROW_HEIGHT** + +Specifies the rule for determining the height of a table row + +Example:: + + from docx.enum.table import WD_ROW_HEIGHT_RULE + + table = document.add_table(3, 3) + table.rows[0].height_rule = WD_ROW_HEIGHT_RULE.EXACTLY + +---- + +AUTO + The row height is adjusted to accommodate the tallest value in the row. + +AT_LEAST + The row height is at least a minimum specified value. + +EXACTLY + The row height is an exact value. diff --git a/docs/api/enum/index.rst b/docs/api/enum/index.rst index f6ba1e261..ce76e7f51 100644 --- a/docs/api/enum/index.rst +++ b/docs/api/enum/index.rst @@ -12,10 +12,12 @@ can be found here: MsoThemeColorIndex WdAlignParagraph WdBuiltinStyle + WdCellVerticalAlignment WdColorIndex WdLineSpacing WdOrientation WdRowAlignment + WdRowHeightRule WdSectionStart WdStyleType WdTabAlignment diff --git a/docs/api/section.rst b/docs/api/section.rst index 9aeb6ca1a..e2d547c75 100644 --- a/docs/api/section.rst +++ b/docs/api/section.rst @@ -22,4 +22,20 @@ Provides access to section properties such as margins and page orientation. .. autoclass:: Section - :members: + :members: + + +|_Header| and |_Footer| objects +------------------------------- + + +.. autoclass:: _Header() + :inherited-members: + :members: + :exclude-members: part + + +.. autoclass:: _Footer() + :inherited-members: + :members: + :exclude-members: part diff --git a/docs/api/shared.rst b/docs/api/shared.rst index 215e5338c..161b8bac4 100644 --- a/docs/api/shared.rst +++ b/docs/api/shared.rst @@ -52,7 +52,7 @@ allowing values to be expressed in the units most appropriate to the context. :members: :undoc-members: - *r*, *g*, and *b* are each an integer in the range 0-255 inclusive. Using + `r`, `g`, and `b` are each an integer in the range 0-255 inclusive. Using the hexidecimal integer notation, e.g. `0x42` may enhance readability where hex RGB values are in use:: diff --git a/docs/api/style.rst b/docs/api/style.rst index e1647caac..afee95c00 100644 --- a/docs/api/style.rst +++ b/docs/api/style.rst @@ -6,7 +6,7 @@ Style-related objects A style is used to collect a set of formatting properties under a single name and apply those properties to a content object all at once. This promotes -formatting consistency thoroughout a document and across related documents +formatting consistency throughout a document and across related documents and allows formatting changes to be made globally by changing the definition in the appropriate style. @@ -35,10 +35,10 @@ in the appropriate style. part, style_id -|_CharacterStyle| objects +|CharacterStyle| objects ------------------------- -.. autoclass:: _CharacterStyle() +.. autoclass:: CharacterStyle() :show-inheritance: :members: :inherited-members: @@ -46,10 +46,10 @@ in the appropriate style. element, part, style_id, type -|_ParagraphStyle| objects +|ParagraphStyle| objects ------------------------- -.. autoclass:: _ParagraphStyle() +.. autoclass:: ParagraphStyle() :show-inheritance: :members: :inherited-members: diff --git a/docs/api/table.rst b/docs/api/table.rst index 215bf807c..6f27670fa 100644 --- a/docs/api/table.rst +++ b/docs/api/table.rst @@ -22,7 +22,9 @@ Table objects are constructed using the ``add_table()`` method on |Document|. ------------------------ .. autoclass:: _Cell + :inherited-members: :members: + :exclude-members: part |_Row| objects diff --git a/docs/api/text.rst b/docs/api/text.rst index cc9b4892f..f76e3ba33 100644 --- a/docs/api/text.rst +++ b/docs/api/text.rst @@ -19,6 +19,13 @@ Text-related objects :members: +|Hyperlink| objects +------------------- + +.. autoclass:: docx.text.hyperlink.Hyperlink() + :members: + + |Run| objects ------------- @@ -33,6 +40,13 @@ Text-related objects :members: +|RenderedPageBreak| objects +--------------------------- + +.. autoclass:: docx.text.pagebreak.RenderedPageBreak() + :members: + + |TabStop| objects ----------------- diff --git a/docs/conf.py b/docs/conf.py index 5041e8fd7..883ecb81d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,9 +18,9 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) -from docx import __version__ +from docx import __version__ # noqa # -- General configuration --------------------------------------------------- @@ -31,28 +31,28 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode' + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'python-docx' -copyright = u'2013, Steve Canny' +project = "python-docx" +copyright = "2013, Steve Canny" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -79,7 +79,9 @@ .. |_Cell| replace:: :class:`._Cell` -.. |_CharacterStyle| replace:: :class:`._CharacterStyle` +.. |_CharacterStyle| replace:: :class:`.CharacterStyle` + +.. |CharacterStyle| replace:: :class:`.CharacterStyle` .. |Cm| replace:: :class:`.Cm` @@ -89,6 +91,10 @@ .. |_Columns| replace:: :class:`._Columns` +.. |Comment| replace:: :class:`.Comment` + +.. |Comments| replace:: :class:`.Comments` + .. |CoreProperties| replace:: :class:`.CoreProperties` .. |datetime| replace:: :class:`.datetime.datetime` @@ -107,6 +113,18 @@ .. |Font| replace:: :class:`.Font` +.. |_Footer| replace:: :class:`._Footer` + +.. |FooterPart| replace:: :class:`.FooterPart` + +.. |_Header| replace:: :class:`._Header` + +.. |HeaderPart| replace:: :class:`.HeaderPart` + +.. |Hyperlink| replace:: :class:`.Hyperlink` + +.. |ImageParts| replace:: :class:`.ImageParts` + .. |Inches| replace:: :class:`.Inches` .. |InlineShape| replace:: :class:`.InlineShape` @@ -135,7 +153,9 @@ .. |ParagraphFormat| replace:: :class:`.ParagraphFormat` -.. |_ParagraphStyle| replace:: :class:`._ParagraphStyle` +.. |_ParagraphStyle| replace:: :class:`.ParagraphStyle` + +.. |ParagraphStyle| replace:: :class:`.ParagraphStyle` .. |Part| replace:: :class:`.Part` @@ -145,6 +165,8 @@ .. |Relationships| replace:: :class:`._Relationships` +.. |RenderedPageBreak| replace:: :class:`.RenderedPageBreak` + .. |RGBColor| replace:: :class:`.RGBColor` .. |_Row| replace:: :class:`._Row` @@ -183,7 +205,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['.build'] +exclude_patterns = [".build"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -201,7 +223,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -211,7 +233,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'armstrong' +html_theme = "armstrong" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -219,7 +241,7 @@ # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['_themes'] +html_theme_path = ["_themes"] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -240,7 +262,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -252,10 +274,7 @@ # Custom sidebar templates, maps document names to template names. # html_sidebars = {} -html_sidebars = { - '**': ['localtoc.html', 'relations.html', 'sidebarlinks.html', - 'searchbox.html'] -} +html_sidebars = {"**": ["localtoc.html", "relations.html", "sidebarlinks.html", "searchbox.html"]} # Additional templates that should be rendered to pages, maps page names to # template names. @@ -288,7 +307,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'python-docxdoc' +htmlhelp_basename = "python-docxdoc" # -- Options for LaTeX output ----------------------------------------------- @@ -296,10 +315,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'preamble': '', } @@ -311,8 +328,7 @@ # author, # documentclass [howto/manual]). latex_documents = [ - ('index', 'python-docx.tex', u'python-docx Documentation', - u'Steve Canny', 'manual'), + ("index", "python-docx.tex", "python-docx Documentation", "Steve Canny", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -340,10 +356,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'python-docx', u'python-docx Documentation', - [u'Steve Canny'], 1) -] +man_pages = [("index", "python-docx", "python-docx Documentation", ["Steve Canny"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -355,9 +368,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'python-docx', u'python-docx Documentation', - u'Steve Canny', 'python-docx', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "python-docx", + "python-docx Documentation", + "Steve Canny", + "python-docx", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. @@ -371,4 +390,4 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} +intersphinx_mapping = {"http://docs.python.org/3/": None} diff --git a/docs/dev/analysis/features/comments.rst b/docs/dev/analysis/features/comments.rst new file mode 100644 index 000000000..153079caf --- /dev/null +++ b/docs/dev/analysis/features/comments.rst @@ -0,0 +1,419 @@ + +Comments +======== + +Word allows *comments* to be added to a document. This is an aspect of the *reviewing* +feature-set and is typically used by a second party to provide feedback to the author +without changing the document itself. + +The procedure is simple: + +- You select some range of text with the mouse or Shift+Arrow keys +- You press the *New Comment* button (Review toolbar) +- You type or paste in your comment + +.. image:: /_static/img/comment-parts.png + +**Comment Anatomy.** Each comment has two parts, the *comment-reference* and the +*comment-content*: + +The *comment-refererence*, sometimes *comment-anchor*, is the text you selected before +pressing the *New Comment* button. It is a *range* in the document content delimited by +a start marker and an end marker, and containing the *id* of the comment that refers to +it. + +The *comment-content* is whatever content you typed or pasted in. The content for each +comment is stored in the separate *comments-part* (part-name ``word/comments.xml``) as a +distinct comment object. Each comment has a unique id, allowing a comment reference to +be associated with its content and vice versa. + +**Comment Reference.** The comment-reference is a *range*. A range must both start and +end at an even *run* boundary. Intuitively, a range corresponds to a *selection* of text +in the Word UI, one formed by dragging with the mouse or using the *Shift-Arrow* keys. + +In general a range can span "run containers", such as paragraphs, such that the range +begins in one paragraph and ends in a later paragraph. However, a range must enclose +*contiguous* runs, such that a range that contains only two vertically adjacent cells in +a multi-column table is not possible (even though such a selection with the mouse is +possible). + +**Comment Content.** Interestingly, although commonly used to contain a single line of +plain text, the comment-content can contain essentially any content that can appear in +the document body. This includes rich text with emphasis, runs with a different typeface +and size, both paragraph and character styles, hyperlinks, images, and tables. Note that +tables do not appear in the comment as displayed in the *comment-sidebar* although they +do apper in the *reviewing-pane*. + +**Comment Metadata.** Each comment can be assigned *author*, *initals*, and *date* +metadata. In Word, these fields are assigned automatically based on values in ``Settings +> User`` of the installed Word application. These may be configured automatically in an +enterprise installation, based on the user account, but by default they are empty. + +*author* metadata is required, although silently assigned the empty string by Word if +the user name is not configured. *initials* is optional, but always set by Word, to the +empty string if not configured. *date* is also optional, but always set by Word to the +date and time the comment was added (seconds resolution, UTC). + +**Additional Features.** Later versions of Word allow a comment to be *resolved*. A +comment in this state will appear grayed-out in the Word UI. Later versions of Word also +allow a comment to be *replied to*, forming a *comment thread*. Neither of these +features is supported by the initial implementation of comments in *python-docx*. + +The resolved-status and replies features are implemented as *extensions* and involve two +additional comment-related parts: + +- `commentsExtended.xml` - contains completion (resolved) status and parent-id for + threading comment responses; keys to `w15:paraId` of comment paragraph in + `comments.xml` +- `commentsIds.xml` - maps `w16cid:paraId` to `w16cid:durableId`, not sure what that is + exactly. + +**Applicability.** Note that comments cannot be added to a header or footer and cannot +be nested inside a comment itself. In general the *python-docx* API will not allow these +operations but if you outsmart it then the resulting comment will either be silently +removed or trigger a repair error when the document is loaded by Word. + + +Word Behavior +------------- + +- A DOCX package does not contain a ``comments.xml`` part by default. It is added to the + package when the first comment is added to the document. + +- A newly-created comment contains a single paragraph + +- Word starts `w:id` at 0 and increments from there. It appears to use a + `max(comment_ids) + 1` algorithm rather than aggressively filling in id numbering + gaps. + +- Word-behavior: looks like Word doesn't allow a "zero-length" comment reference; if you + insert a comment when no text is selected, the word prior to the insertion-point is + selected. + +- Word allows a comment to be applied to a range that starts before any character and + ends after any later character. However, the XML range-markers can only be placed + between runs. Word accommodates this be breaking runs as necessary to start and stop + at the desired character positions. + + +MS API +------ + +.. highlight:: python + +**Document**:: + + Document.Comments + +**Comments** + +https://learn.microsoft.com/en-us/office/vba/api/word.comments:: + + Comments.Add(Range, Text) -> Comment + + # -- retrieve comment by array idx, not comment_id key -- + Comments.Item(idx: Long) -> Comment + + Comments.Count() -> Long + + # -- restrict visible comments to those by a particular reviewer + Comments.ShowBy = "Travis McGuillicuddy" + +**Comment** + +https://learn.microsoft.com/en-us/office/vba/api/word.comment:: + + # -- delete comment and all replies to it -- + Comment.DeleteRecursively() -> void + + # -- open OLE object embedded in comment for editing -- + Comment.Edit() -> void + + # -- get the "parent" comment when this comment is a reply -- + Comment.Ancestor() -> Comment | Nothing + + # -- author of this comment, with email and name fields -- + Comment.Contact -> CoAuthor + + Comment.Date -> Date + Comment.Done -> bool + Comment.IsInk -> bool + + # -- content of the comment, contrast with `Reference` below -- + Comment.Range -> Range + + # -- content within document this comment refers to -- + Comment.Reference -> Range + + Comment.Replies -> Comments + + # -- described in API docs like the same thing as `Reference` -- + Comment.Scope -> Range + + +Candidate Protocol +------------------ + +.. highlight:: python + +The critical required reference for adding a comment is the *range* referred to by the +comment; i.e. the "selection" of text that is being commented on. Because this range +must start and end at an even run boundary, it is enough to specify the first and last +run in the range, where a single run can be both the start and end run:: + + >>> paragraph = document.add_paragraph("Hello, world!") + >>> document.add_comment( + ... runs=paragraph.runs, + ... text="I have this to say about that" + ... author="Steve Canny", + ... initials="SC", + ... ) + + +A single run can be provided when that is more convenient:: + + >>> paragraph = document.add_paragraph("Summary: ") + >>> run = paragraph.add_run("{{place-summary-here}} + >>> document.add_comment( + ... run, text="The AI model will replace this placeholder with a summary" + ... ) + + +Note that `author` and `initials` are optional parameters; both default to the empty +string. + +`text` is also an optional parameter and also defaults to the empty string. Omitting a +`text` argument (or passing `text=""`) produces a comment containing a single paragraph +you can immediately add runs to and add additional paragraphs after: + + >>> paragraph = document.add_paragraph("Summary: ") + >>> run = paragraph.add_run("{{place-summary-here}}") + >>> comment = document.add_comment(run) + >>> paragraph = comment.paragraphs[0] + >>> paragraph.add_run("The ") + >>> paragraph.add_run("AI model").bold = True + >>> paragraph.add_run(" will replace this placeholder with a ") + >>> paragraph.add_run("summary").bold = True + + +A method directly on |Run| may also be convenient, since you will always have the first +run of the range in hand when adding a comment but may not have ready access to the +``document`` object:: + + >>> runs = find_sequence_of_one_or_more_runs_to_comment_on() + >>> runs[0].add_comment( + ... last_run=runs[-1], + ... text="The AI model will replace this placeholder with a summary", + ... ) + + +However, in this situation we would need to qualify the runs as being inside the +document part and not in a header or footer or comment, and perhaps other invalid +comment locations. I believe comments can be applied to footnotes and endnotes though. + + +Specimen XML +------------ + +.. highlight:: xml + +``comments.xml`` (namespace declarations may vary):: + + + + > + + + + + + + + + + I have this to say about that + + + + + + +Comment reference in document body:: + + + + + Hello, world! + + + + + + + + + + + +**Notes** + +- `w:comment` is a *block-item* container, and can contain any content that can appear + in a document body or table cell, including both paragraphs and tables (and whatever + can go inside those, like images, hyperlinks, etc. + +- Word places the `w:annotationRef`-containing run as the first run in the first + paragraph of the comment. I haven't been able to detect any behavior change caused by + leaving this out or placing it elsewhere in the comment content. + +- Relationships referenced from within `w:comment` content are relationships *from the + comments part* to the image part, hyperlink, etc. + +- `w:commentRangeStart` and `w:commentRangeEnd` elements are *optional*. The + authoritative position of the comment is the required `w:commentReference` element. + This means the *ending* location of a comment anchor can be efficiently found using + XPath. + + +Schema Excerpt +-------------- + +**Notes:** + +- `commentRangeStart` and `commentRangeEnd` are both type `CT_MarkupRange` and both + belong to `EG_RunLevelElts` (peers of `w:r`) which gives them their positioning in the + document structure. + +- These two markers can occur at the *block* level, at the *run* level, or at the *table + row* or *cell* level. However Word only seems to use them as peers of `w:r`. These can + occur as a sibling to: + + - a *paragraph* (`w:p`) + - a *table* (`w:tbl`) + - a *run* (`w:r`) + - a *table row* (`w:tr`) + - a *table cell* (`w:tc`) + +.. code-block:: xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/header.rst b/docs/dev/analysis/features/header.rst index 5eac6f5de..1fe75f316 100644 --- a/docs/dev/analysis/features/header.rst +++ b/docs/dev/analysis/features/header.rst @@ -3,39 +3,54 @@ Header and Footer ================= -In a WordprocessingML document, a page header is text that is separated from -the main body of text and appears at the top of a printed page. The page -headers in a document are often the same from page to page, with only small -differences in content, such as a section title or page number. Such a header -is also known as a running head. - -In book-printed documents, where pages are intended to be bound on the long -edge and presented side-by-side, the header on a right-hand (recto) page is -often different than that on a left-hand (verso) page. Supporting this -difference gives rise to the option to have an even-page header that differs -from the default odd-page header in a document. - -A page footer is analogous in every way to a page header except that it -appears at the bottom of a page. It should not be confused with a footnote, -which is not uniform between pages. For brevity's sake, the term *header* is -often used below to refer to what may be either a header or footer object, -trusting the reader to understand its applicability to both object types. - -In WordprocessingML, a header or footer appears within the margin area of -a page. With a few exceptions, a header or footer may contain all the types -of content that can appear in the main body, including text and images. Each -header and footer has access to the styles defined in ``/word/styles.xml``. - -Each section has its own set of headers and footers, although a section can -be configured to "inherit" headers and footers from the prior section. Each -section can have three header definitions, the default header, even header, -and first page header. When different even/odd headers are not enabled, the -default header appears on both even and odd numbered pages. If even/odd -headers are enabled, the default header is used for odd pages. -A corresponding set of three footer definitions are also possible. All +In a WordprocessingML document, a page header is text that is separated from the main +body of text and appears at the top of a printed page. The page headers in a document +are often the same from page to page, with only small differences in content, such as +a section title or page number. Such a header is also known as a running head. + +A page footer is analogous in every way to a page header except that it appears at the +bottom of a page. It should not be confused with a footnote, which is not uniform +between pages. For brevity's sake, the term `header` is often used here to refer to what +may be either a header or footer object, trusting the reader to understand its +applicability to both object types. + +In book-printed documents, where pages are printed on both sides, when opened, the front +or `recto` side of each page appears to the right of the bound edge and the back or +`verso` side of each page appears on the left. The first printed page receives the +page-number "1", and is always a recto page. Because pages are numbered consecutively, +each recto page receives an `odd` page number and each verso page receives an `even` +page number. + +The header appearing on a recto page often differs from that on a verso page. Supporting +this difference gives rise to the option to have an even-page header that differs from +the default odd-page header in a document. This "both odd-and-even headers" option is +applied at the document level and affects all sections of the document. + +The header appearing on the first page of a section (e.g. a chapter) may differ from +that appearing on subsequent pages. Supporting this difference gives rise to the option +to set a distinct first-page header. This "different first-page-header" option is +applied at the section level and may differ from section-to-section in the document. + +In WordprocessingML, a header or footer appears within the margin area of a page. With +a few exceptions, a header or footer may contain all the types of content that can +appear in the main body, including text and images. Each header and footer has access to +the styles defined in ``/word/styles.xml``. + +Each section has its own set of headers and footers, although a section can be +configured to "inherit" headers and footers from the prior section. Each section can +have three header definitions, the default header, even header, and first page header. +When different even/odd headers are not enabled, the default header appears on both even +and odd numbered pages. If even/odd headers are enabled, the default header is used for +odd pages. A corresponding set of three footer definitions are also possible. All header/footer definitions are optional. +Open Questions +-------------- + +* What about a continuous section break? What is the header/footer behavior there? + + Candidate Protocol ------------------ @@ -96,12 +111,12 @@ Conversely, an existing header is deleted from a section by assigning True to >>> header.is_linked_to_previous True -The document settings object has a read/write -`.odd_and_even_pages_header_footer` property that indicates verso and recto -pages will have a different header. An existing even page header definition is -preserved when `.odd_and_even_pages_header_footer` is False; it is simply not -rendered by Word. Assigning `True` to `.odd_and_even_pages_header_footer` -does not automatically create a new even header definition:: +The document settings object has a read/write `.odd_and_even_pages_header_footer` +property that indicates verso and recto pages will have a different header. Any existing +even page header definitions are preserved when `.odd_and_even_pages_header_footer` is +False; they are simply not rendered by Word. Assigning `True` to +`.odd_and_even_pages_header_footer` does not automatically create new even header +definitions:: >>> document.settings.odd_and_even_pages_header_footer False @@ -174,6 +189,19 @@ Distinct first, even, and odd page headers:: ... +A header part:: + + + + + + + + Header for section-1 + + + + Word Behavior ------------- @@ -250,7 +278,7 @@ Schema Excerpt - + diff --git a/docs/dev/analysis/features/sections.rst b/docs/dev/analysis/features/sections.rst index f57c0b4bf..7f9dce91f 100644 --- a/docs/dev/analysis/features/sections.rst +++ b/docs/dev/analysis/features/sections.rst @@ -2,7 +2,7 @@ Sections ======== -Word supports the notion of a *section*, having distinct page layout settings. +Word supports the notion of a `section`, having distinct page layout settings. This is how, for example, a document can contain some pages in portrait layout and others in landscape. Section breaks are implemented completely differently from line, page, and column breaks. The former adds a ```` diff --git a/docs/dev/analysis/features/settings.rst b/docs/dev/analysis/features/settings.rst index 0db461e17..46c816fba 100644 --- a/docs/dev/analysis/features/settings.rst +++ b/docs/dev/analysis/features/settings.rst @@ -25,6 +25,66 @@ Candidate Protocol +Specimen XML +------------ + +.. highlight:: xml + +Default `settings.xml` part for a new document in Word 2016:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Schema Excerpts --------------- diff --git a/docs/dev/analysis/features/shapes/index.rst b/docs/dev/analysis/features/shapes/index.rst index 37b4c49f4..19e42de0e 100644 --- a/docs/dev/analysis/features/shapes/index.rst +++ b/docs/dev/analysis/features/shapes/index.rst @@ -2,8 +2,8 @@ Shapes (in general) =================== -A graphical object that appears in a Word document is known as a *shape*. -A shape can be *inline* or *floating*. An inline shape appears on a text +A graphical object that appears in a Word document is known as a `shape`. +A shape can be `inline` or `floating`. An inline shape appears on a text baseline as though it were a character glyph and affects the line height. A floating shape appears at an arbitrary location on the document and text may wrap around it. Several types of shape can be placed, including a picture, a diff --git a/docs/dev/analysis/features/shapes/picture.rst b/docs/dev/analysis/features/shapes/picture.rst index a98c6e906..ca327512a 100644 --- a/docs/dev/analysis/features/shapes/picture.rst +++ b/docs/dev/analysis/features/shapes/picture.rst @@ -12,7 +12,7 @@ Candidate protocol :: >>> run = paragraph.add_run() - >>> inline_shape = run.add_inline_picture(file_like_image, MIME_type=None) + >>> inline_shape = run.add_picture(file_like_image, MIME_type=None) >>> inline_shape.width = width >>> inline_shape.height = height diff --git a/docs/dev/analysis/features/styles/character-style.rst b/docs/dev/analysis/features/styles/character-style.rst index d06046daa..1779872fa 100644 --- a/docs/dev/analysis/features/styles/character-style.rst +++ b/docs/dev/analysis/features/styles/character-style.rst @@ -62,7 +62,7 @@ A baseline regular run:: -Adding *Emphasis* character style:: +Adding `Emphasis` character style:: diff --git a/docs/dev/analysis/features/styles/index.rst b/docs/dev/analysis/features/styles/index.rst index 3cbfcbb27..ddcec1c1b 100644 --- a/docs/dev/analysis/features/styles/index.rst +++ b/docs/dev/analysis/features/styles/index.rst @@ -11,7 +11,7 @@ Styles character-style latent-styles -Word supports the definition of *styles* to allow a group of formatting +Word supports the definition of `styles` to allow a group of formatting properties to be easily and consistently applied to a paragraph, run, table, or numbering scheme, all at once. The mechanism is similar to how Cascading Style Sheets (CSS) works with HTML. diff --git a/docs/dev/analysis/features/styles/latent-styles.rst b/docs/dev/analysis/features/styles/latent-styles.rst index 1423e5303..497b0b9f9 100644 --- a/docs/dev/analysis/features/styles/latent-styles.rst +++ b/docs/dev/analysis/features/styles/latent-styles.rst @@ -132,7 +132,7 @@ The `w:latentStyles` element used in the default Word 2011 template:: Latent style behavior --------------------- -* A style has two categories of attribute, *behavioral* and *formatting*. +* A style has two categories of attribute, `behavioral` and `formatting`. Behavioral attributes specify where and when the style should appear in the user interface. Behavioral attributes can be specified for latent styles using the ```` element and its ```` child @@ -157,14 +157,14 @@ Latent style behavior value is 0 if not specified. * **semiHidden**. The `semiHidden` attribute causes the style to be excluded - from the recommended list. The notion of *semi* in this context is that + from the recommended list. The notion of `semi` in this context is that while the style is hidden from the recommended list, it still appears in the "All Styles" list. This attribute is removed on first application of the style if an `unhideWhenUsed` attribute set |True| is also present. * **unhideWhenUsed**. The `unhideWhenUsed` attribute causes any `semiHidden` attribute to be removed when the style is first applied to content. Word - does *not* remove the `semiHidden` attribute just because there exists an + does `not` remove the `semiHidden` attribute just because there exists an object in the document having that style. The `unhideWhenUsed` attribute is not removed along with the `semiHidden` attribute when the style is applied. diff --git a/docs/dev/analysis/features/styles/style.rst b/docs/dev/analysis/features/styles/style.rst index 5121b074b..a00ede05d 100644 --- a/docs/dev/analysis/features/styles/style.rst +++ b/docs/dev/analysis/features/styles/style.rst @@ -16,7 +16,7 @@ There are six behavior properties: hidden Style operates to assign formatting properties, but does not appear in - the UI under any circumstances. Used for *internal* styles assigned by an + the UI under any circumstances. Used for `internal` styles assigned by an application that should not be under the control of an end-user. priority @@ -98,10 +98,10 @@ semi-hidden ----------- The `w:semiHidden` element specifies visibility of the style in the so-called -*main* user interface. For Word, this means the style gallery and the +`main` user interface. For Word, this means the style gallery and the recommended, styles-in-use, and in-current-document lists. The all-styles list and current-style dropdown in the styles pane would then be considered -part of an *advanced* user interface. +part of an `advanced` user interface. Behavior ~~~~~~~~ @@ -182,6 +182,8 @@ contents does not cause the `w:semiHidden` element to be removed. Candidate protocol ~~~~~~~~~~~~~~~~~~ +.. highlight:: python + :: >>> style = document.styles['Foo'] @@ -243,6 +245,8 @@ will appear in the gallery in the order specified by `w:uiPriority`. Candidate protocol ~~~~~~~~~~~~~~~~~~ +.. highlight:: python + :: >>> style = document.styles['Foo'] @@ -300,6 +304,8 @@ Behavior Candidate protocol ~~~~~~~~~~~~~~~~~~ +.. highlight:: python + :: >>> style = document.styles['Foo'] @@ -344,6 +350,8 @@ used when writing XML:: Candidate protocols ------------------- +.. highlight:: python + Identification:: >>> style = document.styles['Body Text'] @@ -385,7 +393,7 @@ Example XML - + diff --git a/docs/dev/analysis/features/table/cell-merge.rst b/docs/dev/analysis/features/table/cell-merge.rst index 2b432dfbf..31451cd5b 100644 --- a/docs/dev/analysis/features/table/cell-merge.rst +++ b/docs/dev/analysis/features/table/cell-merge.rst @@ -60,8 +60,8 @@ Basic merge protocol A merge is specified using two diagonal cells:: >>> table = document.add_table(3, 3) - >>> a = table.cells(0, 0) - >>> b = table.cells(1, 1) + >>> a = table.cell(0, 0) + >>> b = table.cell(1, 1) >>> A = a.merge(b) :: diff --git a/docs/dev/analysis/features/table/index.rst b/docs/dev/analysis/features/table/index.rst index 82836a746..e1212b7be 100644 --- a/docs/dev/analysis/features/table/index.rst +++ b/docs/dev/analysis/features/table/index.rst @@ -15,6 +15,7 @@ feature analyses: :titlesonly: table-props + table-row table-cell cell-merge diff --git a/docs/dev/analysis/features/table/table-cell.rst b/docs/dev/analysis/features/table/table-cell.rst index 40be36b32..e7d177719 100644 --- a/docs/dev/analysis/features/table/table-cell.rst +++ b/docs/dev/analysis/features/table/table-cell.rst @@ -7,6 +7,22 @@ properties affecting its size, appearance, and how the content it contains is formatted. +Candidate protocol +------------------ + +Cell.vertical_alignment:: + + >>> from docx.enum.table import WD_CELL_ALIGN_VERTICAL + >>> cell = table.add_row().cells[0] + >>> cell + + >>> cell.vertical_alignment + None + >>> cell.vertical_alignment = WD_CELL_ALIGN_VERTICAL.CENTER + >>> print(cell.vertical_alignment) + CENTER (1) + + MS API - Partial Summary ------------------------ @@ -29,6 +45,25 @@ MS API - Partial Summary * WordWrap +WD_ALIGN_VERTICAL Enumeration +--------------------------------- + +wdAlignVerticalBoth (101) + This is an option in the OpenXml spec, but not in Word itself. It's not + clear what Word behavior this setting produces. If you find out please let + us know and we'll update the documentation. Otherwise, probably best to + avoid this option. + +wdAlignVerticalBottom (3) + Text is aligned to the bottom border of the cell. + +wdAlignVerticalCenter (1) + Text is aligned to the center of the cell. + +wdAlignVerticalTop (0) + Text is aligned to the top border of the cell. + + Specimen XML ------------ @@ -39,6 +74,7 @@ Specimen XML + @@ -127,22 +163,18 @@ Schema Definitions - - - + + + + + - - - - - - - - + + @@ -160,6 +192,25 @@ Schema Definitions + + + + + + + + + + + + + + + + + + + .. _`WdRowHeightRule`: http://msdn.microsoft.com/en-us/library/office/ff193620(v=office.15).aspx diff --git a/docs/dev/analysis/features/table/table-props.rst b/docs/dev/analysis/features/table/table-props.rst index 8485c7bc8..73e97449e 100644 --- a/docs/dev/analysis/features/table/table-props.rst +++ b/docs/dev/analysis/features/table/table-props.rst @@ -23,7 +23,7 @@ a table:: Autofit ------- -Word has two algorithms for laying out a table, *fixed-width* or *autofit*. +Word has two algorithms for laying out a table, *fixed-width* or `autofit`. The default is autofit. Word will adjust column widths in an autofit table based on cell contents. A fixed-width table retains its column widths regardless of the contents. Either algorithm will adjust column widths diff --git a/docs/dev/analysis/features/table/table-row.rst b/docs/dev/analysis/features/table/table-row.rst new file mode 100644 index 000000000..9593e6db3 --- /dev/null +++ b/docs/dev/analysis/features/table/table-row.rst @@ -0,0 +1,133 @@ + +Table Row +========= + +A table row has certain properties such as height. + + +Row.height +---------- + +Candidate protocol:: + + >>> from docx.enum.table import WD_ROW_HEIGHT + >>> row = table.add_row() + >>> row + + >>> row.height_rule + None + >>> row.height_rule = WD_ROW_HEIGHT.EXACTLY + >>> row.height + None + >>> row.height = Pt(24) + + +MS API +------ + +https://msdn.microsoft.com/en-us/library/office/ff193915.aspx + +Methods +~~~~~~~ + +* Delete() +* SetHeight() +* SetLeftIndent() + +Properties +~~~~~~~~~~ + +* Alignment +* AllowBreakAcrossPages +* Borders +* Cells +* HeadingFormat +* Height +* HeightRule +* Index +* IsFirst +* IsLast +* LeftIndent +* NestingLevel +* Next +* Previous +* Shading +* SpaceBetweenColumns + + +WD_ROW_HEIGHT_RULE Enumeration +------------------------------ + +Alias: WD_ROW_HEIGHT + +* wdRowHeightAtLeast (1) The row height is at least a minimum specified value. +* wdRowHeightAuto (0) The row height is adjusted to accommodate the tallest + value in the row. +* wdRowHeightExactly (2) The row height is an exact value. + + +Schema Definitions +------------------ + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/font.rst b/docs/dev/analysis/features/text/font.rst index 1682b5c76..626065006 100644 --- a/docs/dev/analysis/features/text/font.rst +++ b/docs/dev/analysis/features/text/font.rst @@ -138,16 +138,16 @@ The semantics of the three values are as follows: +-------+---------------------------------------------------------------+ | value | meaning | +=======+===============================================================+ -| True | The effective value of the property is unconditionally *on*. | +| True | The effective value of the property is unconditionally `on`. | | | Contrary settings in the style hierarchy have no effect. | +-------+---------------------------------------------------------------+ -| False | The effective value of the property is unconditionally *off*. | +| False | The effective value of the property is unconditionally `off`. | | | Contrary settings in the style hierarchy have no effect. | +-------+---------------------------------------------------------------+ | None | The element is not present. The effective value is | | | inherited from the style hierarchy. If no value for this | | | property is present in the style hierarchy, the effective | -| | value is *off*. | +| | value is `off`. | +-------+---------------------------------------------------------------+ @@ -155,7 +155,7 @@ Toggle properties ----------------- Certain of the boolean run properties are *toggle properties*. A toggle -property is one that behaves like a *toggle* at certain places in the style +property is one that behaves like a `toggle` at certain places in the style hierarchy. Toggle here means that setting the property on has the effect of reversing the prior setting rather than unconditionally setting the property on. diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..cfd451fe1 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,383 @@ + +Hyperlink +========= + +Word allows a hyperlink to be placed in a document wherever a paragraph can appear. The +actual hyperlink element is a peer of |Run|. + +The link may be to an external resource such as a web site, or internal, to another +location in the document. The link may also be a `mailto:` URI or a reference to a file +on an accessible local or network filesystem. + +The visible text of a hyperlink is held in one or more runs. Technically a hyperlink can +have zero runs, but this occurs only in contrived cases (otherwise there would be +nothing to click on). As usual, each run can have its own distinct text formatting +(font), so for example one word in the hyperlink can be bold, etc. By default, Word +applies the built-in `Hyperlink` character style to a newly inserted hyperlink. Like +other text, the hyperlink text may often be broken into multiple runs as a result of +edits in different "revision-save" editing sessions (between "Save" commands). + +Note that rendered page-breaks can occur in the middle of a hyperlink. + +A |Hyperlink| is a child of |Paragraph|, a peer of |Run|. + + +TODO: What about URL-encoding/decoding (like %20) behaviors, if any? + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal hyperlink has +only an anchor. An anchor is more precisely known as a *URI fragment* in a web URL and +follows a hash mark ("#"). The fragment-separator hash character is not stored in the +XML. + +Note that the anchor and address are stored in two distinct attributes, so you need to +concatenate `.address` and `.anchor` like `f"{address}#{anchor}"` if you want the whole +thing. + +Also note that Word does not rigorously separate a fragment in a web URI so it may +appear as part of the address or separately in the anchor attribute, depending on how +the hyperlink was authored. Hyperlinks inserted using the dialog-box seem to separate it +and addresses typed into the document directly don't, based on my limited experience. + +.. highlight:: python + +**Access hyperlinks in a paragraph**:: + + >>> hyperlinks = paragraph.hyperlinks + [] + +**Access hyperlinks in a paragraph in document order with runs**:: + + >>> list(paragraph.iter_inner_content()) + [ + + + + ] + +**Access hyperlink address**:: + + >>> hyperlink.address + 'https://google.com/' + +**Access hyperlink fragment**:: + + >>> hyperlink.fragment + 'introduction' + +**Access hyperlink history (visited or not, True means not visited yet)**:: + + >>> hyperlink.history + True + +**Access hyperlinks runs**:: + + >>> hyperlink.runs + [ + + + + ] + +**Access hyperlink URL**:: + + >>> hyperlink.url + 'https://us.com#introduction' + +**Determine whether a hyperlink contains a rendered page-break**:: + + >>> hyperlink.contains_page_break + False + +**Access visible text of a hyperlink**:: + + >>> hyperlink.text + 'an excellent Wikipedia article on ferrets' + +**Add an external hyperlink** (not yet implemented):: + + >>> hyperlink = paragraph.add_hyperlink( + ... 'About', address='http://us.com', fragment='about' + ... ) + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.fragment + 'about' + >>> hyperlink.url + 'http://us.com#about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', fragment='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.fragment + 'Section_1' + >>> hyperlink.address + '' + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other stuff, at least +as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst index 2fff03924..b1e2fa7f8 100644 --- a/docs/dev/analysis/features/text/index.rst +++ b/docs/dev/analysis/features/text/index.rst @@ -5,6 +5,7 @@ Text .. toctree:: :titlesonly: + hyperlink tab-stops font-highlight-color paragraph-format diff --git a/docs/dev/analysis/features/text/paragraph-format.rst b/docs/dev/analysis/features/text/paragraph-format.rst index febc9300a..6e5398a13 100644 --- a/docs/dev/analysis/features/text/paragraph-format.rst +++ b/docs/dev/analysis/features/text/paragraph-format.rst @@ -10,7 +10,7 @@ spacing, space before and after, and widow/orphan control. Alignment (justification) ------------------------- -In Word, each paragraph has an *alignment* attribute that specifies how to +In Word, each paragraph has an `alignment` attribute that specifies how to justify the lines of the paragraph when the paragraph is laid out on the page. Common values are left, right, centered, and justified. @@ -45,7 +45,7 @@ Paragraph spacing Spacing between subsequent paragraphs is controlled by the paragraph spacing attributes. Spacing can be applied either before the paragraph, after it, or -both. The concept is similar to that of *padding* or *margin* in CSS. +both. The concept is similar to that of `padding` or `margin` in CSS. WordprocessingML supports paragraph spacing specified as either a length value or as a multiple of the line height; however only a length value is supported via the Word UI. Inter-paragraph spacing "overlaps", such that the diff --git a/docs/dev/analysis/index.rst b/docs/dev/analysis/index.rst index b32bf5cc1..25bf5fb4e 100644 --- a/docs/dev/analysis/index.rst +++ b/docs/dev/analysis/index.rst @@ -10,6 +10,7 @@ Feature Analysis .. toctree:: :titlesonly: + features/comments features/header features/settings features/text/index diff --git a/docs/index.rst b/docs/index.rst index a3432a514..aee0acfbf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -31,27 +31,33 @@ Here's an example of what |docx| can do: p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) - document.add_paragraph('Intense quote', style='IntenseQuote') + document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph( - 'first item in unordered list', style='ListBullet' + 'first item in unordered list', style='List Bullet' ) document.add_paragraph( - 'first item in ordered list', style='ListNumber' + 'first item in ordered list', style='List Number' ) document.add_picture('monty-truth.png', width=Inches(1.25)) + records = ( + (3, '101', 'Spam'), + (7, '422', 'Eggs'), + (4, '631', 'Spam, spam, eggs, and spam') + ) + table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' - for item in recordset: + for qty, id, desc in records: row_cells = table.add_row().cells - row_cells[0].text = str(item.qty) - row_cells[1].text = str(item.id) - row_cells[2].text = item.desc + row_cells[0].text = str(qty) + row_cells[1].text = id + row_cells[2].text = desc document.add_page_break() @@ -68,11 +74,14 @@ User Guide user/install user/quickstart user/documents + user/tables user/text user/sections + user/hdrftr user/api-concepts user/styles-understanding user/styles-using + user/comments user/shapes @@ -88,6 +97,7 @@ API Documentation api/text api/table api/section + api/comments api/shape api/dml api/shared diff --git a/docs/user/comments.rst b/docs/user/comments.rst new file mode 100644 index 000000000..869d6f5f1 --- /dev/null +++ b/docs/user/comments.rst @@ -0,0 +1,168 @@ +.. _comments: + +Working with Comments +===================== + +Word allows *comments* to be added to a document. This is an aspect of the *reviewing* +feature-set and is typically used by a second party to provide feedback to the author +without changing the document itself. + +The procedure is simple: + +- You select some range of text with the mouse or Shift+Arrow keys +- You press the *New Comment* button (Review toolbar) +- You type or paste in your comment + +.. image:: /_static/img/comment-parts.png + +A comment can only be added to the main document. A comment cannot be added in a header, +a footer, or within a comment. A comment _can_ be added to a footnote or endnote, but +those are not yet supported by *python-docx*. + +**Comment Anatomy.** Each comment has two parts, the *comment-reference* and the +*comment-content*: + +The **comment-refererence**, sometimes *comment-anchor*, is the text in the main +document you selected before pressing the *New Comment* button. It is a so-called +*range* in the main document that starts at the first selected character and ends after +the last one. + +The **comment-content**, sometimes just *comment*, is whatever content you typed or +pasted in. The content for each comment is stored in a separate comment object, and +these comment objects are stored in a separate *comments-part* (part-name +``word/comments.xml``), not in the main document. Each comment is assigned a unique id +when it is created, allowing the comment reference to be associated with its content and +vice versa. + +**Comment Reference.** The comment-reference is a *range*. A range must both start and +end at an even *run* boundary. Intuitively, a range corresponds to a *selection* of text +in the Word UI, one formed by dragging with the mouse or using the *Shift-Arrow* keys. + +In the XML, this range is delimited by a start marker `` and an +end marker ``, both of which contain the *id* of the comment they +delimit. The start marker appears before the run starting with the first character of +the range and the end marker appears immediately after the run ending with the last +character of the range. Adding a comment that references an arbitrary range of text in +an existing document may require splitting runs on the desired character boundaries. + +In general a range can span paragraphs, such that the range begins in one paragraph and +ends in a later paragraph. However, a range must enclose *contiguous* runs, such that a +range that contains only two vertically adjacent cells in a multi-column table is not +possible (even though Word allows such a selection with the mouse). + +**Comment Content.** Interestingly, although commonly used to contain a single line of +plain text, the comment-content can contain essentially any content that can appear in +the document body. This includes rich text with emphasis, runs with a different typeface +and size, both paragraph and character styles, hyperlinks, images, and tables. Note that +tables do not appear in the comment as displayed in the *comment-sidebar* although they +do apper in the *reviewing-pane*. + +**Comment Metadata.** Each comment can be assigned *author*, *initals*, and *date* +metadata. In Word, these fields are assigned automatically based on values in ``Settings +> User`` of the installed Word application. These might be configured automatically in +an enterprise installation, based on the user account, but by default they are empty. + +*author* metadata is required, although silently assigned the empty string by Word if +the user name is not configured. *initials* is optional, but always set by Word, to the +empty string if not configured. *date* is also optional, but always set by Word to the +UTC date and time the comment was added, with seconds resolution (no milliseconds or +microseconds). + +**Additional Features.** Later versions of Word allow a comment to be *resolved*. A +comment in this state will appear grayed-out in the Word UI. Later versions of Word also +allow a comment to be *replied to*, forming a *comment thread*. Neither of these +features is supported by the initial implementation of comments in *python-docx*. + +**Applicability.** Note that comments cannot be added to a header or footer and cannot +be nested inside a comment itself. In general the *python-docx* API will not allow these +operations but if you outsmart it then the resulting comment will either be silently +removed or trigger a repair error when the document is loaded by Word. + + +Adding a Comment +---------------- + +A simple example is adding a comment to a paragraph:: + + >>> from docx import Document + >>> document = Document() + >>> paragraph = document.add_paragraph("Hello, world!") + + >>> comment = document.add_comment( + ... runs=paragraph.runs, + ... text="I have this to say about that" + ... author="Steve Canny", + ... initials="SC", + ... ) + >>> comment + + >>> comment.id + 0 + >>> comment.author + 'Steve Canny' + >>> comment.initials + 'SC' + >>> comment.date + datetime.datetime(2025, 6, 11, 20, 42, 30, 0, tzinfo=datetime.timezone.utc) + >>> comment.text + 'I have this to say about that' + +The API documentation for :meth:`.Document.add_comment` provides further details. + + +Accessing and using the Comments collection +------------------------------------------- + +The comments collection is accessed via the :attr:`.Document.comments` property:: + + >>> comments = document.comments + >>> comments + + >>> len(comments) + 1 + +The comments collection supports random access to a comment by its id:: + + >>> comment = comments.get(0) + >>> comment + + + +Adding rich content to a comment +-------------------------------- + +A comment is a _block-item container_, just like the document body or a table cell, so +it can contain any content that can appear in those places. It does not contain +page-layout sections and cannot contain a comment reference, but it can contain multiple +paragraphs and/or tables, and runs within paragraphs can have emphasis such as bold or +italic, and have images or hyperlinks. + +A comment created with `text=""` will contain a single paragraph with a single empty run +containing the so-called *annotation reference* but no text. It's probably best to leave +this run as it is but you can freely add additional runs to the paragraph that contain +whatever content you like. + +The methods for adding this content are the same as those used for the document and +table cells:: + + >>> paragraph = document.add_paragraph("The rain in Spain.") + >>> comment = document.add_comment( + ... runs=paragraph.runs, + ... text="", + ... ) + >>> cmt_para = comment.paragraphs[0] + >>> cmt_para.add_run("Please finish this thought. I believe it should be ") + >>> cmt_para.add_run("falls mainly in the plain.").bold = True + + +Updating comment metadata +------------------------- + +The author and initials metadata can be updated as desired:: + + >>> comment.author = "John Smith" + >>> comment.initials = "JS" + >>> comment.author + 'John Smith' + >>> comment.initials + 'JS' diff --git a/docs/user/hdrftr.rst b/docs/user/hdrftr.rst new file mode 100644 index 000000000..ae378536b --- /dev/null +++ b/docs/user/hdrftr.rst @@ -0,0 +1,166 @@ +.. _hdrftr: + +Working with Headers and Footers +================================ + +Word supports *page headers* and *page footers*. A page header is text that appears in +the top margin area of each page, separated from the main body of text, and usually +conveying context information, such as the document title, author, creation date, or the +page number. The page headers in a document are the same from page to page, with only +small differences in content, such as a changing section title or page number. A page +header is also known as a *running head*. + +A *page footer* is analogous in every way to a page header except that it appears at the +bottom of a page. It should not be confused with a footnote, which is not uniform +between pages. For brevity's sake, the term `header` is often used here to refer to what +may be either a header or footer object, trusting the reader to understand its +applicability to both object types. + + +Accessing the header for a section +---------------------------------- + +Headers and footers are linked to a `section`; this allows each section to have +a distinct header and/or footer. For example, a landscape section might have a wider +header than a portrait section. + +Each section object has a ``.header`` property providing access to a |_Header| object +for that section:: + + >>> document = Document() + >>> section = document.sections[0] + >>> header = section.header + >>> header + + +A |_Header| object is `always` present on ``Section.header``, even when no header is +defined for that section. The presence of an actual header definition is indicated by +``_Header.is_linked_to_previous``:: + + >>> header.is_linked_to_previous + True + +A value of ``True`` indicates the |_Header| object contains no header definition and the +section will display the same header as the previous section. This "inheritance" +behavior is recursive, such that a "linked" header actually gets its definition from the +first prior section having a header definition. This "linked" state is indicated as +*"Same as previous"* in the Word UI. + +A new document does not have a header (on the single section it contains) and so +``.is_linked_to_previous`` is ``True`` in that case. Note this case may be a bit +counterintuitive in that there *is no previous section header* to link to. In +this "no previous header" case, no header is displayed. + + +Adding a header (simple case) +----------------------------- + +A header can be added to a new document simply by editing the content of the |_Header| +object. A |_Header| object is a "story" container and its content is edited just like +a |Document| object. Note that like a new document, a new header already contains +a single (empty) paragraph:: + + >>> paragraph = header.paragraphs[0] + >>> paragraph.text = "Title of my document" + +.. image:: /_static/img/hdrftr-01.png + :scale: 50% + +Note also that the act of adding content (or even just accessing ``header.paragraphs``) +added a header definition and changed the state of ``.is_linked_to_previous``:: + + >>> header.is_linked_to_previous + False + + +Adding "zoned" header content +----------------------------- + +A header with multiple "zones" is often accomplished using carefully placed tab stops. + +The required tab-stops for a center and right-aligned "zone" are part of the ``Header`` +and ``Footer`` styles in Word. If you're using a custom template rather than the +`python-docx` default, it probably makes sense to define that style in your template. + +Inserted tab characters (``"\t"``) are used to separate left, center, and right-aligned +header content:: + + >>> paragraph = header.paragraphs[0] + >>> paragraph.text = "Left Text\tCenter Text\tRight Text" + >>> paragraph.style = document.styles["Header"] + +.. image:: /_static/img/hdrftr-02.png + :scale: 75% + +The ``Header`` style is automatically applied to a new header, so the third line just +above (applying the ``Header`` style) is unnecessary in this case, but included here to +illustrate the general case. + + +Removing a header +----------------- + +An unwanted header can be removed by assigning ``True`` to its +``.is_linked_to_previous`` attribute:: + + >>> header.is_linked_to_previous = True + >>> header.is_linked_to_previous + True + +The content for a header is irreversably deleted when ``True`` is assigned to +``.is_linked_to_previous``. + + +Understanding headers in a multi-section document +------------------------------------------------- + +The "just start editing" approach works fine for the simple case, but to make sense of +header behaviors in a multi-section document, a few simple concepts will be helpful. +Here they are in a nutshell: + +1. Each section can have its own header definition (but doesn't have to). + +2. A section that lacks a header definition inherits the header of the section before + it. The ``_Header.is_linked_to_previous`` property simply reflects the presence of + a header definition, ``False`` when a definition is present and ``True`` when not. + +3. Lacking a header definition is the default state. A new document has no defined + header and neither does a newly-inserted section. ``.is_linked_to_previous`` reports + ``True`` in both those cases. + +4. The content of a ``_Header`` object is its own content if it has a header definition. + If not, its content is that of the first prior section that `does` have a header + definition. If no sections have a header definition, a new one is added on the first + section and all other sections inherit that one. This adding of a header definition + happens the first time header content is accessed, perhaps by referencing + ``header.paragraphs``. + + +Adding a header definition (general case) +----------------------------------------- + +An explicit header definition can be given to a section that lacks one by assigning +``False`` to its ``.is_linked_to_previous`` property:: + + >>> header.is_linked_to_previous + True + >>> header.is_linked_to_previous = False + >>> header.is_linked_to_previous + False + +The newly added header definition contains a single empty paragraph. Note that leaving +the header this way is occasionally useful as it effectively "turns-off" a header for +that section and those after it until the next section with a defined header. + +Assigning ``False`` to ``.is_linked_to_previous`` on a header that already has a header +definition does nothing. + + +Inherited content is automatically located +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Editing the content of a header edits the content of the `source` header, taking into +account any "inheritance". So for example, if the section 2 header inherits from section +1 and you edit the section 2 header, you actually change the contents of the section +1 header. A new header definition is not added for section 2 unless you first explicitly +assign ``False`` to its ``.is_linked_to_previous`` property. diff --git a/docs/user/quickstart.rst b/docs/user/quickstart.rst index 1c6f419ab..0d6982ee0 100644 --- a/docs/user/quickstart.rst +++ b/docs/user/quickstart.rst @@ -115,9 +115,9 @@ supports indexed access, like a list:: row.cells[0].text = 'Foo bar to you.' row.cells[1].text = 'And a hearty foo bar to you too sir!' -The ``.rows`` and ``.columns`` collections on a table are iterable, so you can -use them directly in a ``for`` loop. Same with the ``.cells`` sequences on -a row or column:: +The ``.rows`` and ``.columns`` collections on a table are iterable, so you +can use them directly in a ``for`` loop. Same with the ``.cells`` sequences +on a row or column:: for row in table.rows: for cell in row.cells: @@ -132,12 +132,16 @@ the sequence:: You can also add rows to a table incrementally like so:: row = table.add_row() - + This can be very handy for the variable length table scenario we mentioned above:: # get table data ------------- - items = get_things_from_database_or_something() + items = ( + (7, '1024', 'Plush kittens'), + (3, '2042', 'Furbees'), + (1, '1288', 'French Poodle Collars, Deluxe'), + ) # add table ------------------ table = document.add_table(1, 3) @@ -185,7 +189,7 @@ or over a network and don't want to get the filesystem involved. Image size ~~~~~~~~~~ -By default, the added image appears at *native* size. This is often bigger than +By default, the added image appears at `native` size. This is often bigger than you want. Native size is calculated as ``pixels / dpi``. So a 300x300 pixel image having 300 dpi resolution appears in a one inch square. The problem is most images don't contain a dpi property and it defaults to 72 dpi. This would @@ -229,14 +233,11 @@ thing. You can also apply a style afterward. These two lines are equivalent to the one above:: paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.') - paragraph.style = 'ListBullet' + paragraph.style = 'List Bullet' -The style is specified using its style ID, 'ListBullet' in this example. -Generally, the style ID is formed by removing the spaces in the style name as -it appears in the Word user interface (UI). So the style 'List Number 3' -would be specified as ``'ListNumber3'``. However, note that if you are using -a localized version of Word, the style ID may be derived from the English -style name and may not correspond so neatly to its style name in the Word UI. +The style is specified using its style name, 'List Bullet' in this example. +Generally, the style name is exactly as it appears in the Word user interface +(UI). Applying bold and italic @@ -249,7 +250,7 @@ a little about what goes on inside a paragraph. The short version is this: height, tabs, and so forth. #. Character-level formatting, such as bold and italic, are applied at the - *run* level. All content within a paragraph must be within a run, but there + `run` level. All content within a paragraph must be within a run, but there can be more than one. So a paragraph with a bold word in the middle would need three runs, a normal one, a bold one containing the word, and another normal one for the text after. @@ -282,14 +283,14 @@ Note that you can set bold or italic right on the result of ``.add_run()`` if you don't need it for anything else:: paragraph.add_run('dolor').bold = True - + # is equivalent to: run = paragraph.add_run('dolor') run.bold = True # except you don't have a reference to `run` afterward - + It's not necessary to provide text to the ``.add_paragraph()`` method. This can make your code simpler if you're building the paragraph up from runs anyway:: @@ -298,7 +299,7 @@ make your code simpler if you're building the paragraph up from runs anyway:: paragraph.add_run('Lorem ipsum ') paragraph.add_run('dolor').bold = True paragraph.add_run(' sit amet.') - + Applying a character style -------------------------- @@ -309,7 +310,7 @@ settings. In general you can think of a character style as specifying a font, including its typeface, size, color, bold, italic, etc. Like paragraph styles, a character style must already be defined in the -document you open with the ``Document()`` call (*see* +document you open with the ``Document()`` call (`see` :ref:`understanding_styles`). A character style can be specified when adding a new run:: @@ -324,8 +325,4 @@ the same result as the lines above:: run = paragraph.add_run('text with emphasis.') run.style = 'Emphasis' -As with a paragraph style, the style ID is formed by removing the spaces in -the name as it appears in the Word UI. So the style 'Subtle Emphasis' would -be specified as ``'SubtleEmphasis'``. Note that if you are using -a localized version of Word, the style ID may be derived from the English -style name and may not correspond to its style name in the Word UI. +As with a paragraph style, the style name is as it appears in the Word UI. diff --git a/docs/user/sections.rst b/docs/user/sections.rst index 5dc956172..895021874 100644 --- a/docs/user/sections.rst +++ b/docs/user/sections.rst @@ -3,15 +3,14 @@ Working with Sections ===================== -Word supports the notion of a *section*, a division of a document having the -same page layout settings, such as margins and page orientation. This is how, -for example, a document can contain some pages in portrait layout and others in -landscape. +Word supports the notion of a `section`, a division of a document having the same page +layout settings, such as margins and page orientation. This is how, for example, a +document can contain some pages in portrait layout and others in landscape. Each section +also defines the headers and footers that apply to the pages of that section. -Most Word documents have only the single section that comes by default and -further, most of those have no reason to change the default margins or other -page layout. But when you *do* need to change the page layout, you'll need -to understand sections to get it done. +Most Word documents have only the single section that comes by default and further, most +of those have no reason to change the default margins or other page layout. But when you +`do` need to change the page layout, you'll need to understand sections to get it done. Accessing sections @@ -98,7 +97,7 @@ from portrait to landscape:: >>> section.page_width = new_width >>> section.page_height = new_height >>> section.orientation, section.page_width, section.page_height - (LANDSCAPE (1), 10058400, 7772400) + (LANDSCAPE (1), 10058400, 7772400) Page margins diff --git a/docs/user/shapes.rst b/docs/user/shapes.rst index ec5d22797..5dcefbf61 100644 --- a/docs/user/shapes.rst +++ b/docs/user/shapes.rst @@ -2,11 +2,11 @@ Understanding pictures and other shapes ======================================= -Conceptually, Word documents have two *layers*, a *text layer* and a *drawing +Conceptually, Word documents have two `layers`, a *text layer* and a *drawing layer*. In the text layer, text objects are flowed from left to right and from top to bottom, starting a new page when the prior one is filled. In the drawing -layer, drawing objects, called *shapes*, are placed at arbitrary positions. -These are sometimes referred to as *floating* shapes. +layer, drawing objects, called `shapes`, are placed at arbitrary positions. +These are sometimes referred to as `floating` shapes. A picture is a shape that can appear in either the text or drawing layer. When it appears in the text layer it is called an *inline shape*, or more diff --git a/docs/user/styles-understanding.rst b/docs/user/styles-understanding.rst index e49fdea83..114b7ad6a 100644 --- a/docs/user/styles-understanding.rst +++ b/docs/user/styles-understanding.rst @@ -125,7 +125,7 @@ access purposes. A style's :attr:`style_id` is used internally to key a content object such as a paragraph to its style. However this value is generated automatically by Word and is not guaranteed to be stable across saves. In general, the style -id is formed simply by removing spaces from the *localized* style name, +id is formed simply by removing spaces from the `localized` style name, however there are exceptions. Users of |docx| should generally avoid using the style id unless they are confident with the internals involved. @@ -155,13 +155,13 @@ Style Behavior -------------- In addition to collecting a set of formatting properties, a style has five -properties that specify its *behavior*. This behavior is relatively simple, +properties that specify its `behavior`. This behavior is relatively simple, basically amounting to when and where the style appears in the Word or LibreOffice UI. The key notion to understanding style behavior is the recommended list. In the style pane in Word, the user can select which list of styles they want to -see. One of these is named *Recommended* and is known as the *recommended +see. One of these is named `Recommended` and is known as the *recommended list*. All five behavior properties affect some aspect of the style’s appearance in this list and in the style gallery. diff --git a/docs/user/tables.rst b/docs/user/tables.rst new file mode 100644 index 000000000..40ef20933 --- /dev/null +++ b/docs/user/tables.rst @@ -0,0 +1,202 @@ +.. _tables: + +Working with Tables +=================== + +Word provides sophisticated capabilities to create tables. As usual, this power comes with +additional conceptual complexity. + +This complexity becomes most apparent when *reading* tables, in particular from documents drawn from +the wild where there is limited or no prior knowledge as to what the tables might contain or how +they might be structured. + +These are some of the important concepts you'll need to understand. + + +Concept: Simple (uniform) tables +-------------------------------- + +:: + + +---+---+---+ + | a | b | c | + +---+---+---+ + | d | e | f | + +---+---+---+ + | g | h | i | + +---+---+---+ + +The basic concept of a table is intuitive enough. You have *rows* and *columns*, and at each (row, +column) position is a different *cell*. It can be described as a *grid* or a *matrix*. Let's call +this concept a *uniform table*. A relational database table and a Pandas dataframe are both examples +of a uniform table. + +The following invariants apply to uniform tables: + +* Each row has the same number of cells, one for each column. +* Each column has the same number of cells, one for each row. + + +Complication 1: Merged Cells +---------------------------- + +:: + + +---+---+---+ +---+---+---+ + | a | b | | | b | c | + +---+---+---+ + a +---+---+ + | c | d | e | | | d | e | + +---+---+---+ +---+---+---+ + | f | g | h | | f | g | h | + +---+---+---+ +---+---+---+ + +While very suitable for data processing, a uniform table lacks expressive power desireable for +tables intended for a human reader. + +Perhaps the most important characteristic a uniform table lacks is *merged cells*. It is very common +to want to group multiple cells into one, for example to form a column-group heading or provide the +same value for a sequence of cells rather than repeat it for each cell. These make a rendered table +more *readable* by reducing the cognitive load on the human reader and make certain relationships +explicit that might easily be missed otherwise. + +Unfortunately, accommodating merged cells breaks both the invariants of a uniform table: + +* Each row can have a different number of cells. +* Each column can have a different number of cells. + +This challenges reading table contents programatically. One might naturally want to read the table +into a uniform matrix data structure like a 3 x 3 "2D array" (list of lists perhaps), but this is +not directly possible when the table is not known to be uniform. + + +Concept: The layout grid +------------------------ + +:: + + + - + - + - + + | | | | + + - + - + - + + | | | | + + - + - + - + + | | | | + + - + - + - + + +In Word, each table has a *layout grid*. + +- The layout grid is *uniform*. There is a layout position for every (layout-row, layout-column) + pair. +- The layout grid itself is not visible. However it is represented and referenced by certain + elements and attributes within the table XML +- Each table cell is located at a layout-grid position; i.e. the top-left corner of each cell is the + top-left corner of a layout-grid cell. +- Each table cell occupies one or more whole layout-grid cells. A merged cell will occupy multiple + layout-grid cells. No table cell can occupy a partial layout-grid cell. +- Another way of saying this is that every vertical boundary (left and right) of a cell aligns with + a layout-grid vertical boundary, likewise for horizontal boundaries. But not all layout-grid + boundaries need be occupied by a cell boundary of the table. + + +Complication 2: Omitted Cells +----------------------------- + +:: + + +---+---+ +---+---+---+ + | a | b | | a | b | c | + +---+---+---+ +---+---+---+ + | c | d | | d | + +---+---+ +---+---+---+ + | e | | e | f | g | + +---+ +---+---+---+ + +Word is unusual in that it allows cells to be omitted from the beginning or end (but not the middle) +of a row. A typical practical example is a table with both a row of column headings and a column of +row headings, but no top-left cell (position 0, 0), such as this XOR truth table. + +:: + + +---+---+ + | T | F | + +---+---+---+ + | T | F | T | + +---+---+---+ + | F | T | F | + +---+---+---+ + +In `python-docx`, omitted cells in a |_Row| object are represented by the ``.grid_cols_before`` and +``.grid_cols_after`` properties. In the example above, for the first row, ``.grid_cols_before`` +would equal ``1`` and ``.grid_cols_after`` would equal ``0``. + +Note that omitted cells are not just "empty" cells. They represent layout-grid positions that are +unoccupied by a cell and they cannot be represented by a |_Cell| object. This distinction becomes +important when trying to produce a uniform representation (e.g. a 2D array) for an arbitrary Word +table. + + +Concept: `python-docx` approximates uniform tables by default +------------------------------------------------------------- + +To accurately represent an arbitrary table would require a complex graph data structure. Navigating +this data structure would be at least as complex as navigating the `python-docx` object graph for a +table. When extracting content from a collection of arbitrary Word files, such as for indexing the +document, it is common to choose a simpler data structure and *approximate* the table in that +structure. + +Reflecting on how a relational table or dataframe represents tabular information, a straightforward +approximation would simply repeat merged-cell values for each layout-grid cell occupied by the +merged cell:: + + + +---+---+---+ +---+---+---+ + | a | b | -> | a | a | b | + +---+---+---+ +---+---+---+ + | | d | e | -> | c | d | e | + + c +---+---+ +---+---+---+ + | | f | g | -> | c | f | g | + +---+---+---+ +---+---+---+ + +This is what ``_Row.cells`` does by default. Conceptually:: + + >>> [tuple(c.text for c in r.cells) for r in table.rows] + [ + (a, a, b), + (c, d, e), + (c, f, g), + ] + +Note this only produces a uniform "matrix" of cells when there are no omitted cells. Dealing with +omitted cells requires a more sophisticated approach when maintaining column integrity is required:: + + # +---+---+ + # | a | b | + # +---+---+---+ + # | c | d | + # +---+---+ + # | e | + # +---+ + + def iter_row_cell_texts(row: _Row) -> Iterator[str]: + for _ in range(row.grid_cols_before): + yield "" + for c in row.cells: + yield c.text + for _ in range(row.grid_cols_after): + yield "" + + >>> [tuple(iter_row_cell_texts(r)) for r in table.rows] + [ + ("", "a", "b"), + ("c", "d", ""), + ("", "e", ""), + ] + + +Complication 3: Tables are Recursive +------------------------------------ + +Further complicating table processing is their recursive nature. In Word, as in HTML, a table cell +can itself include one or more tables. + +These can be detected using ``_Cell.tables`` or ``_Cell.iter_inner_content()``. The latter preserves +the document order of the table with respect to paragraphs also in the cell. diff --git a/docs/user/text.rst b/docs/user/text.rst index 1b28feaab..f2e54f3b4 100644 --- a/docs/user/text.rst +++ b/docs/user/text.rst @@ -22,7 +22,7 @@ A table is also a block-level object. An inline object is a portion of the content that occurs inside a block-level item. An example would be a word that appears in bold or a sentence in -all-caps. The most common inline object is a *run*. All content within +all-caps. The most common inline object is a `run`. All content within a block container is inside of an inline object. Typically, a paragraph contains one or more runs, each of which contain some part of the paragraph's text. @@ -55,7 +55,7 @@ The formatting properties of a paragraph are accessed using the Horizontal alignment (justification) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Also known as *justification*, the horizontal alignment of a paragraph can be +Also known as `justification`, the horizontal alignment of a paragraph can be set to left, centered, right, or fully justified (aligned on both the left and right sides) using values from the enumeration :ref:`WdParagraphAlignment`:: @@ -180,7 +180,7 @@ Paragraph spacing The :attr:`~.ParagraphFormat.space_before` and :attr:`~.ParagraphFormat.space_after` properties control the spacing between subsequent paragraphs, controlling the spacing before and after a paragraph, -respectively. Inter-paragraph spacing is *collapsed* during page layout, +respectively. Inter-paragraph spacing is `collapsed` during page layout, meaning the spacing between two paragraphs is the maximum of the `space_after` for the first paragraph and the `space_before` of the second paragraph. Paragraph spacing is specified as a |Length| value, often using diff --git a/docx/__init__.py b/docx/__init__.py deleted file mode 100644 index cfa48729d..000000000 --- a/docx/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# encoding: utf-8 - -from docx.api import Document # noqa - -__version__ = '0.8.6' - - -# register custom Part classes with opc package reader - -from docx.opc.constants import CONTENT_TYPE as CT, RELATIONSHIP_TYPE as RT -from docx.opc.part import PartFactory -from docx.opc.parts.coreprops import CorePropertiesPart - -from docx.parts.document import DocumentPart -from docx.parts.image import ImagePart -from docx.parts.numbering import NumberingPart -from docx.parts.settings import SettingsPart -from docx.parts.styles import StylesPart - - -def part_class_selector(content_type, reltype): - if reltype == RT.IMAGE: - return ImagePart - return None - - -PartFactory.part_class_selector = part_class_selector -PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart -PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart -PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart -PartFactory.part_type_for[CT.WML_SETTINGS] = SettingsPart -PartFactory.part_type_for[CT.WML_STYLES] = StylesPart - -del ( - CT, CorePropertiesPart, DocumentPart, NumberingPart, PartFactory, - StylesPart, part_class_selector -) diff --git a/docx/api.py b/docx/api.py deleted file mode 100644 index 63e18c406..000000000 --- a/docx/api.py +++ /dev/null @@ -1,37 +0,0 @@ -# encoding: utf-8 - -""" -Directly exposed API functions and classes, :func:`Document` for now. -Provides a syntactically more convenient API for interacting with the -OpcPackage graph. -""" - -from __future__ import absolute_import, division, print_function - -import os - -from docx.opc.constants import CONTENT_TYPE as CT -from docx.package import Package - - -def Document(docx=None): - """ - Return a |Document| object loaded from *docx*, where *docx* can be - either a path to a ``.docx`` file (a string) or a file-like object. If - *docx* is missing or ``None``, the built-in default document "template" - is loaded. - """ - docx = _default_docx_path() if docx is None else docx - document_part = Package.open(docx).main_document_part - if document_part.content_type != CT.WML_DOCUMENT_MAIN: - tmpl = "file '%s' is not a Word file, content type is '%s'" - raise ValueError(tmpl % (docx, document_part.content_type)) - return document_part.document - - -def _default_docx_path(): - """ - Return the path to the built-in default .docx package. - """ - _thisdir = os.path.split(__file__)[0] - return os.path.join(_thisdir, 'templates', 'default.docx') diff --git a/docx/blkcntnr.py b/docx/blkcntnr.py deleted file mode 100644 index d57a0cd0f..000000000 --- a/docx/blkcntnr.py +++ /dev/null @@ -1,74 +0,0 @@ -# encoding: utf-8 - -""" -Block item container, used by body, cell, header, etc. Block level items are -things like paragraph and table, although there are a few other specialized -ones like structured document tags. -""" - -from __future__ import absolute_import, print_function - -from .oxml.table import CT_Tbl -from .shared import Parented -from .text.paragraph import Paragraph - - -class BlockItemContainer(Parented): - """ - Base class for proxy objects that can contain block items, such as _Body, - _Cell, header, footer, footnote, endnote, comment, and text box objects. - Provides the shared functionality to add a block item like a paragraph or - table. - """ - def __init__(self, element, parent): - super(BlockItemContainer, self).__init__(parent) - self._element = element - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the content in this - container, having *text* in a single run if present, and having - paragraph style *style*. If *style* is |None|, no paragraph style is - applied, which has the same effect as applying the 'Normal' style. - """ - paragraph = self._add_paragraph() - if text: - paragraph.add_run(text) - if style is not None: - paragraph.style = style - return paragraph - - def add_table(self, rows, cols, width): - """ - Return a table of *width* having *rows* rows and *cols* columns, - newly appended to the content in this container. *width* is evenly - distributed between the table columns. - """ - from .table import Table - tbl = CT_Tbl.new_tbl(rows, cols, width) - self._element._insert_tbl(tbl) - return Table(tbl, self) - - @property - def paragraphs(self): - """ - A list containing the paragraphs in this container, in document - order. Read-only. - """ - return [Paragraph(p, self) for p in self._element.p_lst] - - @property - def tables(self): - """ - A list containing the tables in this container, in document order. - Read-only. - """ - from .table import Table - return [Table(tbl, self) for tbl in self._element.tbl_lst] - - def _add_paragraph(self): - """ - Return a paragraph newly added to the end of the content in this - container. - """ - return Paragraph(self._element.add_p(), self) diff --git a/docx/compat.py b/docx/compat.py deleted file mode 100644 index dc9e20e39..000000000 --- a/docx/compat.py +++ /dev/null @@ -1,43 +0,0 @@ -# encoding: utf-8 - -""" -Provides Python 2/3 compatibility objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import sys - -# =========================================================================== -# Python 3 versions -# =========================================================================== - -if sys.version_info >= (3, 0): - - from io import BytesIO - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, str) - - Unicode = str - -# =========================================================================== -# Python 2 versions -# =========================================================================== - -else: - - from StringIO import StringIO as BytesIO # noqa - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, basestring) - - Unicode = unicode diff --git a/docx/dml/color.py b/docx/dml/color.py deleted file mode 100644 index 2f2f25cb2..000000000 --- a/docx/dml/color.py +++ /dev/null @@ -1,116 +0,0 @@ -# encoding: utf-8 - -""" -DrawingML objects related to color, ColorFormat being the most prominent. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..enum.dml import MSO_COLOR_TYPE -from ..oxml.simpletypes import ST_HexColorAuto -from ..shared import ElementProxy - - -class ColorFormat(ElementProxy): - """ - Provides access to color settings such as RGB color, theme color, and - luminance adjustments. - """ - - __slots__ = () - - def __init__(self, rPr_parent): - super(ColorFormat, self).__init__(rPr_parent) - - @property - def rgb(self): - """ - An |RGBColor| value or |None| if no RGB color is specified. - - When :attr:`type` is `MSO_COLOR_TYPE.RGB`, the value of this property - will always be an |RGBColor| value. It may also be an |RGBColor| - value if :attr:`type` is `MSO_COLOR_TYPE.THEME`, as Word writes the - current value of a theme color when one is assigned. In that case, - the RGB value should be interpreted as no more than a good guess - however, as the theme color takes precedence at rendering time. Its - value is |None| whenever :attr:`type` is either |None| or - `MSO_COLOR_TYPE.AUTO`. - - Assigning an |RGBColor| value causes :attr:`type` to become - `MSO_COLOR_TYPE.RGB` and any theme color is removed. Assigning |None| - causes any color to be removed such that the effective color is - inherited from the style hierarchy. - """ - color = self._color - if color is None: - return None - if color.val == ST_HexColorAuto.AUTO: - return None - return color.val - - @rgb.setter - def rgb(self, value): - if value is None and self._color is None: - return - rPr = self._element.get_or_add_rPr() - rPr._remove_color() - if value is not None: - rPr.get_or_add_color().val = value - - @property - def theme_color(self): - """ - A member of :ref:`MsoThemeColorIndex` or |None| if no theme color is - specified. When :attr:`type` is `MSO_COLOR_TYPE.THEME`, the value of - this property will always be a member of :ref:`MsoThemeColorIndex`. - When :attr:`type` has any other value, the value of this property is - |None|. - - Assigning a member of :ref:`MsoThemeColorIndex` causes :attr:`type` - to become `MSO_COLOR_TYPE.THEME`. Any existing RGB value is retained - but ignored by Word. Assigning |None| causes any color specification - to be removed such that the effective color is inherited from the - style hierarchy. - """ - color = self._color - if color is None or color.themeColor is None: - return None - return color.themeColor - - @theme_color.setter - def theme_color(self, value): - if value is None: - if self._color is not None: - self._element.rPr._remove_color() - return - self._element.get_or_add_rPr().get_or_add_color().themeColor = value - - @property - def type(self): - """ - Read-only. A member of :ref:`MsoColorType`, one of RGB, THEME, or - AUTO, corresponding to the way this color is defined. Its value is - |None| if no color is applied at this level, which causes the - effective color to be inherited from the style hierarchy. - """ - color = self._color - if color is None: - return None - if color.themeColor is not None: - return MSO_COLOR_TYPE.THEME - if color.val == ST_HexColorAuto.AUTO: - return MSO_COLOR_TYPE.AUTO - return MSO_COLOR_TYPE.RGB - - @property - def _color(self): - """ - Return `w:rPr/w:color` or |None| if not present. Helper to factor out - repetitive element access. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.color diff --git a/docx/document.py b/docx/document.py deleted file mode 100644 index ba94a7990..000000000 --- a/docx/document.py +++ /dev/null @@ -1,215 +0,0 @@ -# encoding: utf-8 - -""" -|Document| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .blkcntnr import BlockItemContainer -from .enum.section import WD_SECTION -from .enum.text import WD_BREAK -from .section import Section, Sections -from .shared import ElementProxy, Emu - - -class Document(ElementProxy): - """ - WordprocessingML (WML) document. Not intended to be constructed directly. - Use :func:`docx.Document` to open or create a document. - """ - - __slots__ = ('_part', '__body') - - def __init__(self, element, part): - super(Document, self).__init__(element) - self._part = part - self.__body = None - - def add_heading(self, text='', level=1): - """ - Return a heading paragraph newly added to the end of the document, - containing *text* and having its paragraph style determined by - *level*. If *level* is 0, the style is set to `Title`. If *level* is - 1 (or omitted), `Heading 1` is used. Otherwise the style is set to - `Heading {level}`. Raises |ValueError| if *level* is outside the - range 0-9. - """ - if not 0 <= level <= 9: - raise ValueError("level must be in range 0-9, got %d" % level) - style = 'Title' if level == 0 else 'Heading %d' % level - return self.add_paragraph(text, style) - - def add_page_break(self): - """ - Return a paragraph newly added to the end of the document and - containing only a page break. - """ - paragraph = self.add_paragraph() - paragraph.add_run().add_break(WD_BREAK.PAGE) - return paragraph - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the document, populated - with *text* and having paragraph style *style*. *text* can contain - tab (``\\t``) characters, which are converted to the appropriate XML - form for a tab. *text* can also include newline (``\\n``) or carriage - return (``\\r``) characters, each of which is converted to a line - break. - """ - return self._body.add_paragraph(text, style) - - def add_picture(self, image_path_or_stream, width=None, height=None): - """ - Return a new picture shape added in its own paragraph at the end of - the document. The picture contains the image at - *image_path_or_stream*, scaled based on *width* and *height*. If - neither width nor height is specified, the picture appears at its - native size. If only one is specified, it is used to compute - a scaling factor that is then applied to the unspecified dimension, - preserving the aspect ratio of the image. The native size of the - picture is calculated using the dots-per-inch (dpi) value specified - in the image file, defaulting to 72 dpi if no value is specified, as - is often the case. - """ - run = self.add_paragraph().add_run() - return run.add_picture(image_path_or_stream, width, height) - - def add_section(self, start_type=WD_SECTION.NEW_PAGE): - """ - Return a |Section| object representing a new section added at the end - of the document. The optional *start_type* argument must be a member - of the :ref:`WdSectionStart` enumeration, and defaults to - ``WD_SECTION.NEW_PAGE`` if not provided. - """ - new_sectPr = self._element.body.add_section_break() - new_sectPr.start_type = start_type - return Section(new_sectPr) - - def add_table(self, rows, cols, style=None): - """ - Add a table having row and column counts of *rows* and *cols* - respectively and table style of *style*. *style* may be a paragraph - style object or a paragraph style name. If *style* is |None|, the - table inherits the default table style of the document. - """ - table = self._body.add_table(rows, cols, self._block_width) - table.style = style - return table - - @property - def core_properties(self): - """ - A |CoreProperties| object providing read/write access to the core - properties of this document. - """ - return self._part.core_properties - - @property - def inline_shapes(self): - """ - An |InlineShapes| object providing access to the inline shapes in - this document. An inline shape is a graphical object, such as - a picture, contained in a run of text and behaving like a character - glyph, being flowed like other text in a paragraph. - """ - return self._part.inline_shapes - - @property - def paragraphs(self): - """ - A list of |Paragraph| instances corresponding to the paragraphs in - the document, in document order. Note that paragraphs within revision - marks such as ```` or ```` do not appear in this list. - """ - return self._body.paragraphs - - @property - def part(self): - """ - The |DocumentPart| object of this document. - """ - return self._part - - def save(self, path_or_stream): - """ - Save this document to *path_or_stream*, which can be either a path to - a filesystem location (a string) or a file-like object. - """ - self._part.save(path_or_stream) - - @property - def sections(self): - """ - A |Sections| object providing access to each section in this - document. - """ - return Sections(self._element) - - @property - def settings(self): - """ - A |Settings| object providing access to the document-level settings - for this document. - """ - return self._part.settings - - @property - def styles(self): - """ - A |Styles| object providing access to the styles in this document. - """ - return self._part.styles - - @property - def tables(self): - """ - A list of |Table| instances corresponding to the tables in the - document, in document order. Note that only tables appearing at the - top level of the document appear in this list; a table nested inside - a table cell does not appear. A table within revision marks such as - ```` or ```` will also not appear in the list. - """ - return self._body.tables - - @property - def _block_width(self): - """ - Return a |Length| object specifying the width of available "writing" - space between the margins of the last section of this document. - """ - section = self.sections[-1] - return Emu( - section.page_width - section.left_margin - section.right_margin - ) - - @property - def _body(self): - """ - The |_Body| instance containing the content for this document. - """ - if self.__body is None: - self.__body = _Body(self._element.body, self) - return self.__body - - -class _Body(BlockItemContainer): - """ - Proxy for ```` element in this document, having primarily a - container role. - """ - def __init__(self, body_elm, parent): - super(_Body, self).__init__(body_elm, parent) - self._body = body_elm - - def clear_content(self): - """ - Return this |_Body| instance after clearing it of all content. - Section properties for the main document story, if present, are - preserved. - """ - self._body.clear_content() - return self diff --git a/docx/enum/__init__.py b/docx/enum/__init__.py deleted file mode 100644 index dd49faafd..000000000 --- a/docx/enum/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations used in python-docx -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class Enumeration(object): - - @classmethod - def from_xml(cls, xml_val): - return cls._xml_to_idx[xml_val] - - @classmethod - def to_xml(cls, enum_val): - return cls._idx_to_xml[enum_val] diff --git a/docx/enum/base.py b/docx/enum/base.py deleted file mode 100644 index aad44e9c8..000000000 --- a/docx/enum/base.py +++ /dev/null @@ -1,360 +0,0 @@ -# encoding: utf-8 - -""" -Base classes and other objects used by enumerations -""" - -from __future__ import absolute_import, print_function - -import sys -import textwrap - -from ..exceptions import InvalidXmlError - - -def alias(*aliases): - """ - Decorating a class with @alias('FOO', 'BAR', ..) allows the class to - be referenced by each of the names provided as arguments. - """ - def decorator(cls): - # alias must be set in globals from caller's frame - caller = sys._getframe(1) - globals_dict = caller.f_globals - for alias in aliases: - globals_dict[alias] = cls - return cls - return decorator - - -class _DocsPageFormatter(object): - """ - Formats a RestructuredText documention page (string) for the enumeration - class parts passed to the constructor. An immutable one-shot service - object. - """ - def __init__(self, clsname, clsdict): - self._clsname = clsname - self._clsdict = clsdict - - @property - def page_str(self): - """ - The RestructuredText documentation page for the enumeration. This is - the only API member for the class. - """ - tmpl = '.. _%s:\n\n%s\n\n%s\n\n----\n\n%s' - components = ( - self._ms_name, self._page_title, self._intro_text, - self._member_defs - ) - return tmpl % components - - @property - def _intro_text(self): - """ - The docstring of the enumeration, formatted for use at the top of the - documentation page - """ - try: - cls_docstring = self._clsdict['__doc__'] - except KeyError: - cls_docstring = '' - return textwrap.dedent(cls_docstring).strip() - - def _member_def(self, member): - """ - Return an individual member definition formatted as an RST glossary - entry, wrapped to fit within 78 columns. - """ - member_docstring = textwrap.dedent(member.docstring).strip() - member_docstring = textwrap.fill( - member_docstring, width=78, initial_indent=' '*4, - subsequent_indent=' '*4 - ) - return '%s\n%s\n' % (member.name, member_docstring) - - @property - def _member_defs(self): - """ - A single string containing the aggregated member definitions section - of the documentation page - """ - members = self._clsdict['__members__'] - member_defs = [ - self._member_def(member) for member in members - if member.name is not None - ] - return '\n'.join(member_defs) - - @property - def _ms_name(self): - """ - The Microsoft API name for this enumeration - """ - return self._clsdict['__ms_name__'] - - @property - def _page_title(self): - """ - The title for the documentation page, formatted as code (surrounded - in double-backtics) and underlined with '=' characters - """ - title_underscore = '=' * (len(self._clsname)+4) - return '``%s``\n%s' % (self._clsname, title_underscore) - - -class MetaEnumeration(type): - """ - The metaclass for Enumeration and its subclasses. Adds a name for each - named member and compiles state needed by the enumeration class to - respond to other attribute gets - """ - def __new__(meta, clsname, bases, clsdict): - meta._add_enum_members(clsdict) - meta._collect_valid_settings(clsdict) - meta._generate_docs_page(clsname, clsdict) - return type.__new__(meta, clsname, bases, clsdict) - - @classmethod - def _add_enum_members(meta, clsdict): - """ - Dispatch ``.add_to_enum()`` call to each member so it can do its - thing to properly add itself to the enumeration class. This - delegation allows member sub-classes to add specialized behaviors. - """ - enum_members = clsdict['__members__'] - for member in enum_members: - member.add_to_enum(clsdict) - - @classmethod - def _collect_valid_settings(meta, clsdict): - """ - Return a sequence containing the enumeration values that are valid - assignment values. Return-only values are excluded. - """ - enum_members = clsdict['__members__'] - valid_settings = [] - for member in enum_members: - valid_settings.extend(member.valid_settings) - clsdict['_valid_settings'] = valid_settings - - @classmethod - def _generate_docs_page(meta, clsname, clsdict): - """ - Return the RST documentation page for the enumeration. - """ - clsdict['__docs_rst__'] = ( - _DocsPageFormatter(clsname, clsdict).page_str - ) - - -class EnumerationBase(object): - """ - Base class for all enumerations, used directly for enumerations requiring - only basic behavior. It's __dict__ is used below in the Python 2+3 - compatible metaclass definition. - """ - __members__ = () - __ms_name__ = '' - - @classmethod - def validate(cls, value): - """ - Raise |ValueError| if *value* is not an assignable value. - """ - if value not in cls._valid_settings: - raise ValueError( - "%s not a member of %s enumeration" % (value, cls.__name__) - ) - - -Enumeration = MetaEnumeration( - 'Enumeration', (object,), dict(EnumerationBase.__dict__) -) - - -class XmlEnumeration(Enumeration): - """ - Provides ``to_xml()`` and ``from_xml()`` methods in addition to base - enumeration features - """ - __members__ = () - __ms_name__ = '' - - @classmethod - def from_xml(cls, xml_val): - """ - Return the enumeration member corresponding to the XML value - *xml_val*. - """ - if xml_val not in cls._xml_to_member: - raise InvalidXmlError( - "attribute value '%s' not valid for this type" % xml_val - ) - return cls._xml_to_member[xml_val] - - @classmethod - def to_xml(cls, enum_val): - """ - Return the XML value of the enumeration value *enum_val*. - """ - if enum_val not in cls._member_to_xml: - raise ValueError( - "value '%s' not in enumeration %s" % (enum_val, cls.__name__) - ) - return cls._member_to_xml[enum_val] - - -class EnumMember(object): - """ - Used in the enumeration class definition to define a member value and its - mappings - """ - def __init__(self, name, value, docstring): - self._name = name - if isinstance(value, int): - value = EnumValue(name, value, docstring) - self._value = value - self._docstring = docstring - - def add_to_enum(self, clsdict): - """ - Add a name to *clsdict* for this member. - """ - self.register_name(clsdict) - - @property - def docstring(self): - """ - The description of this member - """ - return self._docstring - - @property - def name(self): - """ - The distinguishing name of this member within the enumeration class, - e.g. 'MIDDLE' for MSO_VERTICAL_ANCHOR.MIDDLE, if this is a named - member. Otherwise the primitive value such as |None|, |True| or - |False|. - """ - return self._name - - def register_name(self, clsdict): - """ - Add a member name to the class dict *clsdict* containing the value of - this member object. Where the name of this object is None, do - nothing; this allows out-of-band values to be defined without adding - a name to the class dict. - """ - if self.name is None: - return - clsdict[self.name] = self.value - - @property - def valid_settings(self): - """ - A sequence containing the values valid for assignment for this - member. May be zero, one, or more in number. - """ - return (self._value,) - - @property - def value(self): - """ - The enumeration value for this member, often an instance of - EnumValue, but may be a primitive value such as |None|. - """ - return self._value - - -class EnumValue(int): - """ - A named enumeration value, providing __str__ and __doc__ string values - for its symbolic name and description, respectively. Subclasses int, so - behaves as a regular int unless the strings are asked for. - """ - def __new__(cls, member_name, int_value, docstring): - return super(EnumValue, cls).__new__(cls, int_value) - - def __init__(self, member_name, int_value, docstring): - super(EnumValue, self).__init__() - self._member_name = member_name - self._docstring = docstring - - @property - def __doc__(self): - """ - The description of this enumeration member - """ - return self._docstring.strip() - - def __str__(self): - """ - The symbolic name and string value of this member, e.g. 'MIDDLE (3)' - """ - return "%s (%d)" % (self._member_name, int(self)) - - -class ReturnValueOnlyEnumMember(EnumMember): - """ - Used to define a member of an enumeration that is only valid as a query - result and is not valid as a setting, e.g. MSO_VERTICAL_ANCHOR.MIXED (-2) - """ - @property - def valid_settings(self): - """ - No settings are valid for a return-only value. - """ - return () - - -class XmlMappedEnumMember(EnumMember): - """ - Used to define a member whose value maps to an XML attribute value. - """ - def __init__(self, name, value, xml_value, docstring): - super(XmlMappedEnumMember, self).__init__(name, value, docstring) - self._xml_value = xml_value - - def add_to_enum(self, clsdict): - """ - Compile XML mappings in addition to base add behavior. - """ - super(XmlMappedEnumMember, self).add_to_enum(clsdict) - self.register_xml_mapping(clsdict) - - def register_xml_mapping(self, clsdict): - """ - Add XML mappings to the enumeration class state for this member. - """ - member_to_xml = self._get_or_add_member_to_xml(clsdict) - member_to_xml[self.value] = self.xml_value - xml_to_member = self._get_or_add_xml_to_member(clsdict) - xml_to_member[self.xml_value] = self.value - - @property - def xml_value(self): - """ - The XML attribute value that corresponds to this enumeration value - """ - return self._xml_value - - @staticmethod - def _get_or_add_member_to_xml(clsdict): - """ - Add the enum -> xml value mapping to the enumeration class state - """ - if '_member_to_xml' not in clsdict: - clsdict['_member_to_xml'] = dict() - return clsdict['_member_to_xml'] - - @staticmethod - def _get_or_add_xml_to_member(clsdict): - """ - Add the xml -> enum value mapping to the enumeration class state - """ - if '_xml_to_member' not in clsdict: - clsdict['_xml_to_member'] = dict() - return clsdict['_xml_to_member'] diff --git a/docx/enum/dml.py b/docx/enum/dml.py deleted file mode 100644 index 1ad0eaa87..000000000 --- a/docx/enum/dml.py +++ /dev/null @@ -1,124 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations used by DrawingML objects -""" - -from __future__ import absolute_import - -from .base import ( - alias, Enumeration, EnumMember, XmlEnumeration, XmlMappedEnumMember -) - - -class MSO_COLOR_TYPE(Enumeration): - """ - Specifies the color specification scheme - - Example:: - - from docx.enum.dml import MSO_COLOR_TYPE - - assert font.color.type == MSO_COLOR_TYPE.SCHEME - """ - - __ms_name__ = 'MsoColorType' - - __url__ = ( - 'http://msdn.microsoft.com/en-us/library/office/ff864912(v=office.15' - ').aspx' - ) - - __members__ = ( - EnumMember( - 'RGB', 1, 'Color is specified by an |RGBColor| value.' - ), - EnumMember( - 'THEME', 2, 'Color is one of the preset theme colors.' - ), - EnumMember( - 'AUTO', 101, 'Color is determined automatically by the ' - 'application.' - ), - ) - - -@alias('MSO_THEME_COLOR') -class MSO_THEME_COLOR_INDEX(XmlEnumeration): - """ - Indicates the Office theme color, one of those shown in the color gallery - on the formatting ribbon. - - Alias: ``MSO_THEME_COLOR`` - - Example:: - - from docx.enum.dml import MSO_THEME_COLOR - - font.color.theme_color = MSO_THEME_COLOR.ACCENT_1 - """ - - __ms_name__ = 'MsoThemeColorIndex' - - __url__ = ( - 'http://msdn.microsoft.com/en-us/library/office/ff860782(v=office.15' - ').aspx' - ) - - __members__ = ( - EnumMember( - 'NOT_THEME_COLOR', 0, 'Indicates the color is not a theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_1', 5, 'accent1', 'Specifies the Accent 1 theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_2', 6, 'accent2', 'Specifies the Accent 2 theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_3', 7, 'accent3', 'Specifies the Accent 3 theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_4', 8, 'accent4', 'Specifies the Accent 4 theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_5', 9, 'accent5', 'Specifies the Accent 5 theme color.' - ), - XmlMappedEnumMember( - 'ACCENT_6', 10, 'accent6', 'Specifies the Accent 6 theme color.' - ), - XmlMappedEnumMember( - 'BACKGROUND_1', 14, 'background1', 'Specifies the Background 1 ' - 'theme color.' - ), - XmlMappedEnumMember( - 'BACKGROUND_2', 16, 'background2', 'Specifies the Background 2 ' - 'theme color.' - ), - XmlMappedEnumMember( - 'DARK_1', 1, 'dark1', 'Specifies the Dark 1 theme color.' - ), - XmlMappedEnumMember( - 'DARK_2', 3, 'dark2', 'Specifies the Dark 2 theme color.' - ), - XmlMappedEnumMember( - 'FOLLOWED_HYPERLINK', 12, 'followedHyperlink', 'Specifies the ' - 'theme color for a clicked hyperlink.' - ), - XmlMappedEnumMember( - 'HYPERLINK', 11, 'hyperlink', 'Specifies the theme color for a ' - 'hyperlink.' - ), - XmlMappedEnumMember( - 'LIGHT_1', 2, 'light1', 'Specifies the Light 1 theme color.' - ), - XmlMappedEnumMember( - 'LIGHT_2', 4, 'light2', 'Specifies the Light 2 theme color.' - ), - XmlMappedEnumMember( - 'TEXT_1', 13, 'text1', 'Specifies the Text 1 theme color.' - ), - XmlMappedEnumMember( - 'TEXT_2', 15, 'text2', 'Specifies the Text 2 theme color.' - ), - ) diff --git a/docx/enum/section.py b/docx/enum/section.py deleted file mode 100644 index b16ddbe72..000000000 --- a/docx/enum/section.py +++ /dev/null @@ -1,76 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to the main document in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .base import alias, XmlEnumeration, XmlMappedEnumMember - - -@alias('WD_ORIENT') -class WD_ORIENTATION(XmlEnumeration): - """ - alias: **WD_ORIENT** - - Specifies the page layout orientation. - - Example:: - - from docx.enum.section import WD_ORIENT - - section = document.sections[-1] - section.orientation = WD_ORIENT.LANDSCAPE - """ - - __ms_name__ = 'WdOrientation' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff837902.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'PORTRAIT', 0, 'portrait', 'Portrait orientation.' - ), - XmlMappedEnumMember( - 'LANDSCAPE', 1, 'landscape', 'Landscape orientation.' - ), - ) - - -@alias('WD_SECTION') -class WD_SECTION_START(XmlEnumeration): - """ - alias: **WD_SECTION** - - Specifies the start type of a section break. - - Example:: - - from docx.enum.section import WD_SECTION - - section = document.sections[0] - section.start_type = WD_SECTION.NEW_PAGE - """ - - __ms_name__ = 'WdSectionStart' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff840975.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'CONTINUOUS', 0, 'continuous', 'Continuous section break.' - ), - XmlMappedEnumMember( - 'NEW_COLUMN', 1, 'nextColumn', 'New column section break.' - ), - XmlMappedEnumMember( - 'NEW_PAGE', 2, 'nextPage', 'New page section break.' - ), - XmlMappedEnumMember( - 'EVEN_PAGE', 3, 'evenPage', 'Even pages section break.' - ), - XmlMappedEnumMember( - 'ODD_PAGE', 4, 'oddPage', 'Section begins on next odd page.' - ), - ) diff --git a/docx/enum/shape.py b/docx/enum/shape.py deleted file mode 100644 index f1d6ffd8c..000000000 --- a/docx/enum/shape.py +++ /dev/null @@ -1,21 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to DrawingML shapes in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class WD_INLINE_SHAPE_TYPE(object): - """ - Corresponds to WdInlineShapeType enumeration - http://msdn.microsoft.com/en-us/library/office/ff192587.aspx - """ - CHART = 12 - LINKED_PICTURE = 4 - PICTURE = 3 - SMART_ART = 15 - NOT_IMPLEMENTED = -6 - -WD_INLINE_SHAPE = WD_INLINE_SHAPE_TYPE diff --git a/docx/enum/style.py b/docx/enum/style.py deleted file mode 100644 index 515c594ce..000000000 --- a/docx/enum/style.py +++ /dev/null @@ -1,466 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to styles -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .base import alias, EnumMember, XmlEnumeration, XmlMappedEnumMember - - -@alias('WD_STYLE') -class WD_BUILTIN_STYLE(XmlEnumeration): - """ - alias: **WD_STYLE** - - Specifies a built-in Microsoft Word style. - - Example:: - - from docx import Document - from docx.enum.style import WD_STYLE - - document = Document() - styles = document.styles - style = styles[WD_STYLE.BODY_TEXT] - """ - - __ms_name__ = 'WdBuiltinStyle' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff835210.aspx' - - __members__ = ( - EnumMember( - 'BLOCK_QUOTATION', -85, 'Block Text.' - ), - EnumMember( - 'BODY_TEXT', -67, 'Body Text.' - ), - EnumMember( - 'BODY_TEXT_2', -81, 'Body Text 2.' - ), - EnumMember( - 'BODY_TEXT_3', -82, 'Body Text 3.' - ), - EnumMember( - 'BODY_TEXT_FIRST_INDENT', -78, 'Body Text First Indent.' - ), - EnumMember( - 'BODY_TEXT_FIRST_INDENT_2', -79, 'Body Text First Indent 2.' - ), - EnumMember( - 'BODY_TEXT_INDENT', -68, 'Body Text Indent.' - ), - EnumMember( - 'BODY_TEXT_INDENT_2', -83, 'Body Text Indent 2.' - ), - EnumMember( - 'BODY_TEXT_INDENT_3', -84, 'Body Text Indent 3.' - ), - EnumMember( - 'BOOK_TITLE', -265, 'Book Title.' - ), - EnumMember( - 'CAPTION', -35, 'Caption.' - ), - EnumMember( - 'CLOSING', -64, 'Closing.' - ), - EnumMember( - 'COMMENT_REFERENCE', -40, 'Comment Reference.' - ), - EnumMember( - 'COMMENT_TEXT', -31, 'Comment Text.' - ), - EnumMember( - 'DATE', -77, 'Date.' - ), - EnumMember( - 'DEFAULT_PARAGRAPH_FONT', -66, 'Default Paragraph Font.' - ), - EnumMember( - 'EMPHASIS', -89, 'Emphasis.' - ), - EnumMember( - 'ENDNOTE_REFERENCE', -43, 'Endnote Reference.' - ), - EnumMember( - 'ENDNOTE_TEXT', -44, 'Endnote Text.' - ), - EnumMember( - 'ENVELOPE_ADDRESS', -37, 'Envelope Address.' - ), - EnumMember( - 'ENVELOPE_RETURN', -38, 'Envelope Return.' - ), - EnumMember( - 'FOOTER', -33, 'Footer.' - ), - EnumMember( - 'FOOTNOTE_REFERENCE', -39, 'Footnote Reference.' - ), - EnumMember( - 'FOOTNOTE_TEXT', -30, 'Footnote Text.' - ), - EnumMember( - 'HEADER', -32, 'Header.' - ), - EnumMember( - 'HEADING_1', -2, 'Heading 1.' - ), - EnumMember( - 'HEADING_2', -3, 'Heading 2.' - ), - EnumMember( - 'HEADING_3', -4, 'Heading 3.' - ), - EnumMember( - 'HEADING_4', -5, 'Heading 4.' - ), - EnumMember( - 'HEADING_5', -6, 'Heading 5.' - ), - EnumMember( - 'HEADING_6', -7, 'Heading 6.' - ), - EnumMember( - 'HEADING_7', -8, 'Heading 7.' - ), - EnumMember( - 'HEADING_8', -9, 'Heading 8.' - ), - EnumMember( - 'HEADING_9', -10, 'Heading 9.' - ), - EnumMember( - 'HTML_ACRONYM', -96, 'HTML Acronym.' - ), - EnumMember( - 'HTML_ADDRESS', -97, 'HTML Address.' - ), - EnumMember( - 'HTML_CITE', -98, 'HTML Cite.' - ), - EnumMember( - 'HTML_CODE', -99, 'HTML Code.' - ), - EnumMember( - 'HTML_DFN', -100, 'HTML Definition.' - ), - EnumMember( - 'HTML_KBD', -101, 'HTML Keyboard.' - ), - EnumMember( - 'HTML_NORMAL', -95, 'Normal (Web).' - ), - EnumMember( - 'HTML_PRE', -102, 'HTML Preformatted.' - ), - EnumMember( - 'HTML_SAMP', -103, 'HTML Sample.' - ), - EnumMember( - 'HTML_TT', -104, 'HTML Typewriter.' - ), - EnumMember( - 'HTML_VAR', -105, 'HTML Variable.' - ), - EnumMember( - 'HYPERLINK', -86, 'Hyperlink.' - ), - EnumMember( - 'HYPERLINK_FOLLOWED', -87, 'Followed Hyperlink.' - ), - EnumMember( - 'INDEX_1', -11, 'Index 1.' - ), - EnumMember( - 'INDEX_2', -12, 'Index 2.' - ), - EnumMember( - 'INDEX_3', -13, 'Index 3.' - ), - EnumMember( - 'INDEX_4', -14, 'Index 4.' - ), - EnumMember( - 'INDEX_5', -15, 'Index 5.' - ), - EnumMember( - 'INDEX_6', -16, 'Index 6.' - ), - EnumMember( - 'INDEX_7', -17, 'Index 7.' - ), - EnumMember( - 'INDEX_8', -18, 'Index 8.' - ), - EnumMember( - 'INDEX_9', -19, 'Index 9.' - ), - EnumMember( - 'INDEX_HEADING', -34, 'Index Heading' - ), - EnumMember( - 'INTENSE_EMPHASIS', -262, 'Intense Emphasis.' - ), - EnumMember( - 'INTENSE_QUOTE', -182, 'Intense Quote.' - ), - EnumMember( - 'INTENSE_REFERENCE', -264, 'Intense Reference.' - ), - EnumMember( - 'LINE_NUMBER', -41, 'Line Number.' - ), - EnumMember( - 'LIST', -48, 'List.' - ), - EnumMember( - 'LIST_2', -51, 'List 2.' - ), - EnumMember( - 'LIST_3', -52, 'List 3.' - ), - EnumMember( - 'LIST_4', -53, 'List 4.' - ), - EnumMember( - 'LIST_5', -54, 'List 5.' - ), - EnumMember( - 'LIST_BULLET', -49, 'List Bullet.' - ), - EnumMember( - 'LIST_BULLET_2', -55, 'List Bullet 2.' - ), - EnumMember( - 'LIST_BULLET_3', -56, 'List Bullet 3.' - ), - EnumMember( - 'LIST_BULLET_4', -57, 'List Bullet 4.' - ), - EnumMember( - 'LIST_BULLET_5', -58, 'List Bullet 5.' - ), - EnumMember( - 'LIST_CONTINUE', -69, 'List Continue.' - ), - EnumMember( - 'LIST_CONTINUE_2', -70, 'List Continue 2.' - ), - EnumMember( - 'LIST_CONTINUE_3', -71, 'List Continue 3.' - ), - EnumMember( - 'LIST_CONTINUE_4', -72, 'List Continue 4.' - ), - EnumMember( - 'LIST_CONTINUE_5', -73, 'List Continue 5.' - ), - EnumMember( - 'LIST_NUMBER', -50, 'List Number.' - ), - EnumMember( - 'LIST_NUMBER_2', -59, 'List Number 2.' - ), - EnumMember( - 'LIST_NUMBER_3', -60, 'List Number 3.' - ), - EnumMember( - 'LIST_NUMBER_4', -61, 'List Number 4.' - ), - EnumMember( - 'LIST_NUMBER_5', -62, 'List Number 5.' - ), - EnumMember( - 'LIST_PARAGRAPH', -180, 'List Paragraph.' - ), - EnumMember( - 'MACRO_TEXT', -46, 'Macro Text.' - ), - EnumMember( - 'MESSAGE_HEADER', -74, 'Message Header.' - ), - EnumMember( - 'NAV_PANE', -90, 'Document Map.' - ), - EnumMember( - 'NORMAL', -1, 'Normal.' - ), - EnumMember( - 'NORMAL_INDENT', -29, 'Normal Indent.' - ), - EnumMember( - 'NORMAL_OBJECT', -158, 'Normal (applied to an object).' - ), - EnumMember( - 'NORMAL_TABLE', -106, 'Normal (applied within a table).' - ), - EnumMember( - 'NOTE_HEADING', -80, 'Note Heading.' - ), - EnumMember( - 'PAGE_NUMBER', -42, 'Page Number.' - ), - EnumMember( - 'PLAIN_TEXT', -91, 'Plain Text.' - ), - EnumMember( - 'QUOTE', -181, 'Quote.' - ), - EnumMember( - 'SALUTATION', -76, 'Salutation.' - ), - EnumMember( - 'SIGNATURE', -65, 'Signature.' - ), - EnumMember( - 'STRONG', -88, 'Strong.' - ), - EnumMember( - 'SUBTITLE', -75, 'Subtitle.' - ), - EnumMember( - 'SUBTLE_EMPHASIS', -261, 'Subtle Emphasis.' - ), - EnumMember( - 'SUBTLE_REFERENCE', -263, 'Subtle Reference.' - ), - EnumMember( - 'TABLE_COLORFUL_GRID', -172, 'Colorful Grid.' - ), - EnumMember( - 'TABLE_COLORFUL_LIST', -171, 'Colorful List.' - ), - EnumMember( - 'TABLE_COLORFUL_SHADING', -170, 'Colorful Shading.' - ), - EnumMember( - 'TABLE_DARK_LIST', -169, 'Dark List.' - ), - EnumMember( - 'TABLE_LIGHT_GRID', -161, 'Light Grid.' - ), - EnumMember( - 'TABLE_LIGHT_GRID_ACCENT_1', -175, 'Light Grid Accent 1.' - ), - EnumMember( - 'TABLE_LIGHT_LIST', -160, 'Light List.' - ), - EnumMember( - 'TABLE_LIGHT_LIST_ACCENT_1', -174, 'Light List Accent 1.' - ), - EnumMember( - 'TABLE_LIGHT_SHADING', -159, 'Light Shading.' - ), - EnumMember( - 'TABLE_LIGHT_SHADING_ACCENT_1', -173, 'Light Shading Accent 1.' - ), - EnumMember( - 'TABLE_MEDIUM_GRID_1', -166, 'Medium Grid 1.' - ), - EnumMember( - 'TABLE_MEDIUM_GRID_2', -167, 'Medium Grid 2.' - ), - EnumMember( - 'TABLE_MEDIUM_GRID_3', -168, 'Medium Grid 3.' - ), - EnumMember( - 'TABLE_MEDIUM_LIST_1', -164, 'Medium List 1.' - ), - EnumMember( - 'TABLE_MEDIUM_LIST_1_ACCENT_1', -178, 'Medium List 1 Accent 1.' - ), - EnumMember( - 'TABLE_MEDIUM_LIST_2', -165, 'Medium List 2.' - ), - EnumMember( - 'TABLE_MEDIUM_SHADING_1', -162, 'Medium Shading 1.' - ), - EnumMember( - 'TABLE_MEDIUM_SHADING_1_ACCENT_1', -176, - 'Medium Shading 1 Accent 1.' - ), - EnumMember( - 'TABLE_MEDIUM_SHADING_2', -163, 'Medium Shading 2.' - ), - EnumMember( - 'TABLE_MEDIUM_SHADING_2_ACCENT_1', -177, - 'Medium Shading 2 Accent 1.' - ), - EnumMember( - 'TABLE_OF_AUTHORITIES', -45, 'Table of Authorities.' - ), - EnumMember( - 'TABLE_OF_FIGURES', -36, 'Table of Figures.' - ), - EnumMember( - 'TITLE', -63, 'Title.' - ), - EnumMember( - 'TOAHEADING', -47, 'TOA Heading.' - ), - EnumMember( - 'TOC_1', -20, 'TOC 1.' - ), - EnumMember( - 'TOC_2', -21, 'TOC 2.' - ), - EnumMember( - 'TOC_3', -22, 'TOC 3.' - ), - EnumMember( - 'TOC_4', -23, 'TOC 4.' - ), - EnumMember( - 'TOC_5', -24, 'TOC 5.' - ), - EnumMember( - 'TOC_6', -25, 'TOC 6.' - ), - EnumMember( - 'TOC_7', -26, 'TOC 7.' - ), - EnumMember( - 'TOC_8', -27, 'TOC 8.' - ), - EnumMember( - 'TOC_9', -28, 'TOC 9.' - ), - ) - - -class WD_STYLE_TYPE(XmlEnumeration): - """ - Specifies one of the four style types: paragraph, character, list, or - table. - - Example:: - - from docx import Document - from docx.enum.style import WD_STYLE_TYPE - - styles = Document().styles - assert styles[0].type == WD_STYLE_TYPE.PARAGRAPH - """ - - __ms_name__ = 'WdStyleType' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff196870.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'CHARACTER', 2, 'character', 'Character style.' - ), - XmlMappedEnumMember( - 'LIST', 4, 'numbering', 'List style.' - ), - XmlMappedEnumMember( - 'PARAGRAPH', 1, 'paragraph', 'Paragraph style.' - ), - XmlMappedEnumMember( - 'TABLE', 3, 'table', 'Table style.' - ), - ) diff --git a/docx/enum/table.py b/docx/enum/table.py deleted file mode 100644 index bc201346c..000000000 --- a/docx/enum/table.py +++ /dev/null @@ -1,71 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to tables in WordprocessingML files -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .base import ( - Enumeration, EnumMember, XmlEnumeration, XmlMappedEnumMember -) - - -class WD_TABLE_ALIGNMENT(XmlEnumeration): - """ - Specifies table justification type. - - Example:: - - from docx.enum.table import WD_TABLE_ALIGNMENT - - table = document.add_table(3, 3) - table.alignment = WD_TABLE_ALIGNMENT.CENTER - """ - - __ms_name__ = 'WdRowAlignment' - - __url__ = ' http://office.microsoft.com/en-us/word-help/HV080607259.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'LEFT', 0, 'left', 'Left-aligned' - ), - XmlMappedEnumMember( - 'CENTER', 1, 'center', 'Center-aligned.' - ), - XmlMappedEnumMember( - 'RIGHT', 2, 'right', 'Right-aligned.' - ), - ) - - -class WD_TABLE_DIRECTION(Enumeration): - """ - Specifies the direction in which an application orders cells in the - specified table or row. - - Example:: - - from docx.enum.table import WD_TABLE_DIRECTION - - table = document.add_table(3, 3) - table.direction = WD_TABLE_DIRECTION.RTL - """ - - __ms_name__ = 'WdTableDirection' - - __url__ = ' http://msdn.microsoft.com/en-us/library/ff835141.aspx' - - __members__ = ( - EnumMember( - 'LTR', 0, 'The table or row is arranged with the first column ' - 'in the leftmost position.' - ), - EnumMember( - 'RTL', 1, 'The table or row is arranged with the first column ' - 'in the rightmost position.' - ), - ) diff --git a/docx/enum/text.py b/docx/enum/text.py deleted file mode 100644 index f4111eb92..000000000 --- a/docx/enum/text.py +++ /dev/null @@ -1,351 +0,0 @@ -# encoding: utf-8 - -""" -Enumerations related to text in WordprocessingML files -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .base import alias, EnumMember, XmlEnumeration, XmlMappedEnumMember - - -@alias('WD_ALIGN_PARAGRAPH') -class WD_PARAGRAPH_ALIGNMENT(XmlEnumeration): - """ - alias: **WD_ALIGN_PARAGRAPH** - - Specifies paragraph justification type. - - Example:: - - from docx.enum.text import WD_ALIGN_PARAGRAPH - - paragraph = document.add_paragraph() - paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER - """ - - __ms_name__ = 'WdParagraphAlignment' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff835817.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'LEFT', 0, 'left', 'Left-aligned' - ), - XmlMappedEnumMember( - 'CENTER', 1, 'center', 'Center-aligned.' - ), - XmlMappedEnumMember( - 'RIGHT', 2, 'right', 'Right-aligned.' - ), - XmlMappedEnumMember( - 'JUSTIFY', 3, 'both', 'Fully justified.' - ), - XmlMappedEnumMember( - 'DISTRIBUTE', 4, 'distribute', 'Paragraph characters are distrib' - 'uted to fill the entire width of the paragraph.' - ), - XmlMappedEnumMember( - 'JUSTIFY_MED', 5, 'mediumKashida', 'Justified with a medium char' - 'acter compression ratio.' - ), - XmlMappedEnumMember( - 'JUSTIFY_HI', 7, 'highKashida', 'Justified with a high character' - ' compression ratio.' - ), - XmlMappedEnumMember( - 'JUSTIFY_LOW', 8, 'lowKashida', 'Justified with a low character ' - 'compression ratio.' - ), - XmlMappedEnumMember( - 'THAI_JUSTIFY', 9, 'thaiDistribute', 'Justified according to Tha' - 'i formatting layout.' - ), - ) - - -class WD_BREAK_TYPE(object): - """ - Corresponds to WdBreakType enumeration - http://msdn.microsoft.com/en-us/library/office/ff195905.aspx - """ - COLUMN = 8 - LINE = 6 - LINE_CLEAR_LEFT = 9 - LINE_CLEAR_RIGHT = 10 - LINE_CLEAR_ALL = 11 # added for consistency, not in MS version - PAGE = 7 - SECTION_CONTINUOUS = 3 - SECTION_EVEN_PAGE = 4 - SECTION_NEXT_PAGE = 2 - SECTION_ODD_PAGE = 5 - TEXT_WRAPPING = 11 - -WD_BREAK = WD_BREAK_TYPE - - -@alias('WD_COLOR') -class WD_COLOR_INDEX(XmlEnumeration): - """ - Specifies a standard preset color to apply. Used for font highlighting and - perhaps other applications. - """ - - __ms_name__ = 'WdColorIndex' - - __url__ = 'https://msdn.microsoft.com/EN-US/library/office/ff195343.aspx' - - __members__ = ( - XmlMappedEnumMember( - None, None, None, 'Color is inherited from the style hierarchy.' - ), - XmlMappedEnumMember( - 'AUTO', 0, 'default', 'Automatic color. Default; usually black.' - ), - XmlMappedEnumMember( - 'BLACK', 1, 'black', 'Black color.' - ), - XmlMappedEnumMember( - 'BLUE', 2, 'blue', 'Blue color' - ), - XmlMappedEnumMember( - 'BRIGHT_GREEN', 4, 'green', 'Bright green color.' - ), - XmlMappedEnumMember( - 'DARK_BLUE', 9, 'darkBlue', 'Dark blue color.' - ), - XmlMappedEnumMember( - 'DARK_RED', 13, 'darkRed', 'Dark red color.' - ), - XmlMappedEnumMember( - 'DARK_YELLOW', 14, 'darkYellow', 'Dark yellow color.' - ), - XmlMappedEnumMember( - 'GRAY_25', 16, 'lightGray', '25% shade of gray color.' - ), - XmlMappedEnumMember( - 'GRAY_50', 15, 'darkGray', '50% shade of gray color.' - ), - XmlMappedEnumMember( - 'GREEN', 11, 'darkGreen', 'Green color.' - ), - XmlMappedEnumMember( - 'PINK', 5, 'magenta', 'Pink color.' - ), - XmlMappedEnumMember( - 'RED', 6, 'red', 'Red color.' - ), - XmlMappedEnumMember( - 'TEAL', 10, 'darkCyan', 'Teal color.' - ), - XmlMappedEnumMember( - 'TURQUOISE', 3, 'cyan', 'Turquoise color.' - ), - XmlMappedEnumMember( - 'VIOLET', 12, 'darkMagenta', 'Violet color.' - ), - XmlMappedEnumMember( - 'WHITE', 8, 'white', 'White color.' - ), - XmlMappedEnumMember( - 'YELLOW', 7, 'yellow', 'Yellow color.' - ), - ) - - -class WD_LINE_SPACING(XmlEnumeration): - """ - Specifies a line spacing format to be applied to a paragraph. - - Example:: - - from docx.enum.text import WD_LINE_SPACING - - paragraph = document.add_paragraph() - paragraph.line_spacing_rule = WD_LINE_SPACING.EXACTLY - """ - - __ms_name__ = 'WdLineSpacing' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff844910.aspx' - - __members__ = ( - EnumMember( - 'ONE_POINT_FIVE', 1, 'Space-and-a-half line spacing.' - ), - XmlMappedEnumMember( - 'AT_LEAST', 3, 'atLeast', 'Line spacing is always at least the s' - 'pecified amount. The amount is specified separately.' - ), - EnumMember( - 'DOUBLE', 2, 'Double spaced.' - ), - XmlMappedEnumMember( - 'EXACTLY', 4, 'exact', 'Line spacing is exactly the specified am' - 'ount. The amount is specified separately.' - ), - XmlMappedEnumMember( - 'MULTIPLE', 5, 'auto', 'Line spacing is specified as a multiple ' - 'of line heights. Changing the font size will change the line sp' - 'acing proportionately.' - ), - EnumMember( - 'SINGLE', 0, 'Single spaced (default).' - ), - ) - - -class WD_TAB_ALIGNMENT(XmlEnumeration): - """ - Specifies the tab stop alignment to apply. - """ - - __ms_name__ = 'WdTabAlignment' - - __url__ = 'https://msdn.microsoft.com/EN-US/library/office/ff195609.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'LEFT', 0, 'left', 'Left-aligned.' - ), - XmlMappedEnumMember( - 'CENTER', 1, 'center', 'Center-aligned.' - ), - XmlMappedEnumMember( - 'RIGHT', 2, 'right', 'Right-aligned.' - ), - XmlMappedEnumMember( - 'DECIMAL', 3, 'decimal', 'Decimal-aligned.' - ), - XmlMappedEnumMember( - 'BAR', 4, 'bar', 'Bar-aligned.' - ), - XmlMappedEnumMember( - 'LIST', 6, 'list', 'List-aligned. (deprecated)' - ), - XmlMappedEnumMember( - 'CLEAR', 101, 'clear', 'Clear an inherited tab stop.' - ), - XmlMappedEnumMember( - 'END', 102, 'end', 'Right-aligned. (deprecated)' - ), - XmlMappedEnumMember( - 'NUM', 103, 'num', 'Left-aligned. (deprecated)' - ), - XmlMappedEnumMember( - 'START', 104, 'start', 'Left-aligned. (deprecated)' - ), - ) - - -class WD_TAB_LEADER(XmlEnumeration): - """ - Specifies the character to use as the leader with formatted tabs. - """ - - __ms_name__ = 'WdTabLeader' - - __url__ = 'https://msdn.microsoft.com/en-us/library/office/ff845050.aspx' - - __members__ = ( - XmlMappedEnumMember( - 'SPACES', 0, 'none', 'Spaces. Default.' - ), - XmlMappedEnumMember( - 'DOTS', 1, 'dot', 'Dots.' - ), - XmlMappedEnumMember( - 'DASHES', 2, 'hyphen', 'Dashes.' - ), - XmlMappedEnumMember( - 'LINES', 3, 'underscore', 'Double lines.' - ), - XmlMappedEnumMember( - 'HEAVY', 4, 'heavy', 'A heavy line.' - ), - XmlMappedEnumMember( - 'MIDDLE_DOT', 5, 'middleDot', 'A vertically-centered dot.' - ), - ) - - -class WD_UNDERLINE(XmlEnumeration): - """ - Specifies the style of underline applied to a run of characters. - """ - - __ms_name__ = 'WdUnderline' - - __url__ = 'http://msdn.microsoft.com/en-us/library/office/ff822388.aspx' - - __members__ = ( - XmlMappedEnumMember( - None, None, None, 'Inherit underline setting from containing par' - 'agraph.' - ), - XmlMappedEnumMember( - 'NONE', 0, 'none', 'No underline. This setting overrides any inh' - 'erited underline value, so can be used to remove underline from' - ' a run that inherits underlining from its containing paragraph.' - ' Note this is not the same as assigning |None| to Run.underline' - '. |None| is a valid assignment value, but causes the run to inh' - 'erit its underline value. Assigning ``WD_UNDERLINE.NONE`` cause' - 's underlining to be unconditionally turned off.' - ), - XmlMappedEnumMember( - 'SINGLE', 1, 'single', 'A single line. Note that this setting is' - 'write-only in the sense that |True| (rather than ``WD_UNDERLINE' - '.SINGLE``) is returned for a run having this setting.' - ), - XmlMappedEnumMember( - 'WORDS', 2, 'words', 'Underline individual words only.' - ), - XmlMappedEnumMember( - 'DOUBLE', 3, 'double', 'A double line.' - ), - XmlMappedEnumMember( - 'DOTTED', 4, 'dotted', 'Dots.' - ), - XmlMappedEnumMember( - 'THICK', 6, 'thick', 'A single thick line.' - ), - XmlMappedEnumMember( - 'DASH', 7, 'dash', 'Dashes.' - ), - XmlMappedEnumMember( - 'DOT_DASH', 9, 'dotDash', 'Alternating dots and dashes.' - ), - XmlMappedEnumMember( - 'DOT_DOT_DASH', 10, 'dotDotDash', 'An alternating dot-dot-dash p' - 'attern.' - ), - XmlMappedEnumMember( - 'WAVY', 11, 'wave', 'A single wavy line.' - ), - XmlMappedEnumMember( - 'DOTTED_HEAVY', 20, 'dottedHeavy', 'Heavy dots.' - ), - XmlMappedEnumMember( - 'DASH_HEAVY', 23, 'dashedHeavy', 'Heavy dashes.' - ), - XmlMappedEnumMember( - 'DOT_DASH_HEAVY', 25, 'dashDotHeavy', 'Alternating heavy dots an' - 'd heavy dashes.' - ), - XmlMappedEnumMember( - 'DOT_DOT_DASH_HEAVY', 26, 'dashDotDotHeavy', 'An alternating hea' - 'vy dot-dot-dash pattern.' - ), - XmlMappedEnumMember( - 'WAVY_HEAVY', 27, 'wavyHeavy', 'A heavy wavy line.' - ), - XmlMappedEnumMember( - 'DASH_LONG', 39, 'dashLong', 'Long dashes.' - ), - XmlMappedEnumMember( - 'WAVY_DOUBLE', 43, 'wavyDouble', 'A double wavy line.' - ), - XmlMappedEnumMember( - 'DASH_LONG_HEAVY', 55, 'dashLongHeavy', 'Long heavy dashes.' - ), - ) diff --git a/docx/exceptions.py b/docx/exceptions.py deleted file mode 100644 index 7a8b99c81..000000000 --- a/docx/exceptions.py +++ /dev/null @@ -1,27 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions used with python-docx. - -The base exception class is PythonDocxError. -""" - - -class PythonDocxError(Exception): - """ - Generic error class. - """ - - -class InvalidSpanError(PythonDocxError): - """ - Raised when an invalid merge region is specified in a request to merge - table cells. - """ - - -class InvalidXmlError(PythonDocxError): - """ - Raised when invalid XML is encountered, such as on attempt to access a - missing required child element - """ diff --git a/docx/image/__init__.py b/docx/image/__init__.py deleted file mode 100644 index 8ab3ada68..000000000 --- a/docx/image/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# encoding: utf-8 - -""" -Provides objects that can characterize image streams as to content type and -size, as a required step in including them in a document. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from docx.image.bmp import Bmp -from docx.image.gif import Gif -from docx.image.jpeg import Exif, Jfif -from docx.image.png import Png -from docx.image.tiff import Tiff - - -SIGNATURES = ( - # class, offset, signature_bytes - (Png, 0, b'\x89PNG\x0D\x0A\x1A\x0A'), - (Jfif, 6, b'JFIF'), - (Exif, 6, b'Exif'), - (Gif, 0, b'GIF87a'), - (Gif, 0, b'GIF89a'), - (Tiff, 0, b'MM\x00*'), # big-endian (Motorola) TIFF - (Tiff, 0, b'II*\x00'), # little-endian (Intel) TIFF - (Bmp, 0, b'BM'), -) diff --git a/docx/image/constants.py b/docx/image/constants.py deleted file mode 100644 index 90b469705..000000000 --- a/docx/image/constants.py +++ /dev/null @@ -1,169 +0,0 @@ -# encoding: utf-8 - -""" -Constants specific the the image sub-package -""" - - -class JPEG_MARKER_CODE(object): - """ - JPEG marker codes - """ - TEM = b'\x01' - DHT = b'\xC4' - DAC = b'\xCC' - JPG = b'\xC8' - - SOF0 = b'\xC0' - SOF1 = b'\xC1' - SOF2 = b'\xC2' - SOF3 = b'\xC3' - SOF5 = b'\xC5' - SOF6 = b'\xC6' - SOF7 = b'\xC7' - SOF9 = b'\xC9' - SOFA = b'\xCA' - SOFB = b'\xCB' - SOFD = b'\xCD' - SOFE = b'\xCE' - SOFF = b'\xCF' - - RST0 = b'\xD0' - RST1 = b'\xD1' - RST2 = b'\xD2' - RST3 = b'\xD3' - RST4 = b'\xD4' - RST5 = b'\xD5' - RST6 = b'\xD6' - RST7 = b'\xD7' - - SOI = b'\xD8' - EOI = b'\xD9' - SOS = b'\xDA' - DQT = b'\xDB' # Define Quantization Table(s) - DNL = b'\xDC' - DRI = b'\xDD' - DHP = b'\xDE' - EXP = b'\xDF' - - APP0 = b'\xE0' - APP1 = b'\xE1' - APP2 = b'\xE2' - APP3 = b'\xE3' - APP4 = b'\xE4' - APP5 = b'\xE5' - APP6 = b'\xE6' - APP7 = b'\xE7' - APP8 = b'\xE8' - APP9 = b'\xE9' - APPA = b'\xEA' - APPB = b'\xEB' - APPC = b'\xEC' - APPD = b'\xED' - APPE = b'\xEE' - APPF = b'\xEF' - - STANDALONE_MARKERS = ( - TEM, SOI, EOI, RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7 - ) - - SOF_MARKER_CODES = ( - SOF0, SOF1, SOF2, SOF3, SOF5, SOF6, SOF7, SOF9, SOFA, SOFB, SOFD, - SOFE, SOFF - ) - - marker_names = { - b'\x00': 'UNKNOWN', - b'\xC0': 'SOF0', - b'\xC2': 'SOF2', - b'\xC4': 'DHT', - b'\xDA': 'SOS', # start of scan - b'\xD8': 'SOI', # start of image - b'\xD9': 'EOI', # end of image - b'\xDB': 'DQT', - b'\xE0': 'APP0', - b'\xE1': 'APP1', - b'\xE2': 'APP2', - b'\xED': 'APP13', - b'\xEE': 'APP14', - } - - @classmethod - def is_standalone(cls, marker_code): - return marker_code in cls.STANDALONE_MARKERS - - -class MIME_TYPE(object): - """ - Image content types - """ - BMP = 'image/bmp' - GIF = 'image/gif' - JPEG = 'image/jpeg' - PNG = 'image/png' - TIFF = 'image/tiff' - - -class PNG_CHUNK_TYPE(object): - """ - PNG chunk type names - """ - IHDR = 'IHDR' - pHYs = 'pHYs' - IEND = 'IEND' - - -class TIFF_FLD_TYPE(object): - """ - Tag codes for TIFF Image File Directory (IFD) entries. - """ - BYTE = 1 - ASCII = 2 - SHORT = 3 - LONG = 4 - RATIONAL = 5 - - field_type_names = { - 1: 'BYTE', 2: 'ASCII char', 3: 'SHORT', 4: 'LONG', - 5: 'RATIONAL' - } - - -TIFF_FLD = TIFF_FLD_TYPE - - -class TIFF_TAG(object): - """ - Tag codes for TIFF Image File Directory (IFD) entries. - """ - IMAGE_WIDTH = 0x0100 - IMAGE_LENGTH = 0x0101 - X_RESOLUTION = 0x011A - Y_RESOLUTION = 0x011B - RESOLUTION_UNIT = 0x0128 - - tag_names = { - 0x00FE: 'NewSubfileType', - 0x0100: 'ImageWidth', - 0x0101: 'ImageLength', - 0x0102: 'BitsPerSample', - 0x0103: 'Compression', - 0x0106: 'PhotometricInterpretation', - 0x010E: 'ImageDescription', - 0x010F: 'Make', - 0x0110: 'Model', - 0x0111: 'StripOffsets', - 0x0112: 'Orientation', - 0x0115: 'SamplesPerPixel', - 0x0117: 'StripByteCounts', - 0x011A: 'XResolution', - 0x011B: 'YResolution', - 0x011C: 'PlanarConfiguration', - 0x0128: 'ResolutionUnit', - 0x0131: 'Software', - 0x0132: 'DateTime', - 0x0213: 'YCbCrPositioning', - 0x8769: 'ExifTag', - 0x8825: 'GPS IFD', - 0xC4A5: 'PrintImageMatching', - } diff --git a/docx/image/exceptions.py b/docx/image/exceptions.py deleted file mode 100644 index f233edc4e..000000000 --- a/docx/image/exceptions.py +++ /dev/null @@ -1,23 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions specific the the image sub-package -""" - - -class InvalidImageStreamError(Exception): - """ - The recognized image stream appears to be corrupted - """ - - -class UnexpectedEndOfFileError(Exception): - """ - EOF was unexpectedly encountered while reading an image stream. - """ - - -class UnrecognizedImageError(Exception): - """ - The provided image stream could not be recognized. - """ diff --git a/docx/image/gif.py b/docx/image/gif.py deleted file mode 100644 index 57f037d80..000000000 --- a/docx/image/gif.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -from __future__ import absolute_import, division, print_function - -from struct import Struct - -from .constants import MIME_TYPE -from .image import BaseImageHeader - - -class Gif(BaseImageHeader): - """ - Image header parser for GIF images. Note that the GIF format does not - support resolution (DPI) information. Both horizontal and vertical DPI - default to 72. - """ - @classmethod - def from_stream(cls, stream): - """ - Return |Gif| instance having header properties parsed from GIF image - in *stream*. - """ - px_width, px_height = cls._dimensions_from_stream(stream) - return cls(px_width, px_height, 72, 72) - - @property - def content_type(self): - """ - MIME content type for this image, unconditionally `image/gif` for - GIF images. - """ - return MIME_TYPE.GIF - - @property - def default_ext(self): - """ - Default filename extension, always 'gif' for GIF images. - """ - return 'gif' - - @classmethod - def _dimensions_from_stream(cls, stream): - stream.seek(6) - bytes_ = stream.read(4) - struct = Struct('L' - return self._read_int(fmt, base, offset) - - def read_short(self, base, offset=0): - """ - Return the int value of the two bytes at the file position determined - by *base* and *offset*, similarly to ``read_long()`` above. - """ - fmt = b'H' - return self._read_int(fmt, base, offset) - - def read_str(self, char_count, base, offset=0): - """ - Return a string containing the *char_count* bytes at the file - position determined by self._base_offset + *base* + *offset*. - """ - def str_struct(char_count): - format_ = '%ds' % char_count - return Struct(format_) - struct = str_struct(char_count) - chars = self._unpack_item(struct, base, offset) - unicode_str = chars.decode('UTF-8') - return unicode_str - - def seek(self, base, offset=0): - location = self._base_offset + base + offset - self._stream.seek(location) - - def tell(self): - """ - Allow pass-through tell() call - """ - return self._stream.tell() - - def _read_bytes(self, byte_count, base, offset): - self.seek(base, offset) - bytes_ = self._stream.read(byte_count) - if len(bytes_) < byte_count: - raise UnexpectedEndOfFileError - return bytes_ - - def _read_int(self, fmt, base, offset): - struct = Struct(fmt) - return self._unpack_item(struct, base, offset) - - def _unpack_item(self, struct, base, offset): - bytes_ = self._read_bytes(struct.size, base, offset) - return struct.unpack(bytes_)[0] diff --git a/docx/image/image.py b/docx/image/image.py deleted file mode 100644 index ba2158e72..000000000 --- a/docx/image/image.py +++ /dev/null @@ -1,263 +0,0 @@ -# encoding: utf-8 - -""" -Provides objects that can characterize image streams as to content type and -size, as a required step in including them in a document. -""" - -from __future__ import absolute_import, division, print_function - -import hashlib -import os - -from ..compat import BytesIO, is_string -from .exceptions import UnrecognizedImageError -from ..shared import Emu, Inches, lazyproperty - - -class Image(object): - """ - Graphical image stream such as JPEG, PNG, or GIF with properties and - methods required by ImagePart. - """ - def __init__(self, blob, filename, image_header): - super(Image, self).__init__() - self._blob = blob - self._filename = filename - self._image_header = image_header - - @classmethod - def from_blob(cls, blob): - """ - Return a new |Image| subclass instance parsed from the image binary - contained in *blob*. - """ - stream = BytesIO(blob) - return cls._from_stream(stream, blob) - - @classmethod - def from_file(cls, image_descriptor): - """ - Return a new |Image| subclass instance loaded from the image file - identified by *image_descriptor*, a path or file-like object. - """ - if is_string(image_descriptor): - path = image_descriptor - with open(path, 'rb') as f: - blob = f.read() - stream = BytesIO(blob) - filename = os.path.basename(path) - else: - stream = image_descriptor - stream.seek(0) - blob = stream.read() - filename = None - return cls._from_stream(stream, blob, filename) - - @property - def blob(self): - """ - The bytes of the image 'file' - """ - return self._blob - - @property - def content_type(self): - """ - MIME content type for this image, e.g. ``'image/jpeg'`` for a JPEG - image - """ - return self._image_header.content_type - - @lazyproperty - def ext(self): - """ - The file extension for the image. If an actual one is available from - a load filename it is used. Otherwise a canonical extension is - assigned based on the content type. Does not contain the leading - period, e.g. 'jpg', not '.jpg'. - """ - return os.path.splitext(self._filename)[1][1:] - - @property - def filename(self): - """ - Original image file name, if loaded from disk, or a generic filename - if loaded from an anonymous stream. - """ - return self._filename - - @property - def px_width(self): - """ - The horizontal pixel dimension of the image - """ - return self._image_header.px_width - - @property - def px_height(self): - """ - The vertical pixel dimension of the image - """ - return self._image_header.px_height - - @property - def horz_dpi(self): - """ - Integer dots per inch for the width of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._image_header.horz_dpi - - @property - def vert_dpi(self): - """ - Integer dots per inch for the height of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._image_header.vert_dpi - - @property - def width(self): - """ - A |Length| value representing the native width of the image, - calculated from the values of `px_width` and `horz_dpi`. - """ - return Inches(self.px_width / self.horz_dpi) - - @property - def height(self): - """ - A |Length| value representing the native height of the image, - calculated from the values of `px_height` and `vert_dpi`. - """ - return Inches(self.px_height / self.vert_dpi) - - def scaled_dimensions(self, width=None, height=None): - """ - Return a (cx, cy) 2-tuple representing the native dimensions of this - image scaled by applying the following rules to *width* and *height*. - If both *width* and *height* are specified, the return value is - (*width*, *height*); no scaling is performed. If only one is - specified, it is used to compute a scaling factor that is then - applied to the unspecified dimension, preserving the aspect ratio of - the image. If both *width* and *height* are |None|, the native - dimensions are returned. The native dimensions are calculated using - the dots-per-inch (dpi) value embedded in the image, defaulting to 72 - dpi if no value is specified, as is often the case. The returned - values are both |Length| objects. - """ - if width is None and height is None: - return self.width, self.height - - if width is None: - scaling_factor = float(height) / float(self.height) - width = round(self.width * scaling_factor) - - if height is None: - scaling_factor = float(width) / float(self.width) - height = round(self.height * scaling_factor) - - return Emu(width), Emu(height) - - @lazyproperty - def sha1(self): - """ - SHA1 hash digest of the image blob - """ - return hashlib.sha1(self._blob).hexdigest() - - @classmethod - def _from_stream(cls, stream, blob, filename=None): - """ - Return an instance of the |Image| subclass corresponding to the - format of the image in *stream*. - """ - image_header = _ImageHeaderFactory(stream) - if filename is None: - filename = 'image.%s' % image_header.default_ext - return cls(blob, filename, image_header) - - -def _ImageHeaderFactory(stream): - """ - Return a |BaseImageHeader| subclass instance that knows how to parse the - headers of the image in *stream*. - """ - from docx.image import SIGNATURES - - def read_32(stream): - stream.seek(0) - return stream.read(32) - - header = read_32(stream) - for cls, offset, signature_bytes in SIGNATURES: - end = offset + len(signature_bytes) - found_bytes = header[offset:end] - if found_bytes == signature_bytes: - return cls.from_stream(stream) - raise UnrecognizedImageError - - -class BaseImageHeader(object): - """ - Base class for image header subclasses like |Jpeg| and |Tiff|. - """ - def __init__(self, px_width, px_height, horz_dpi, vert_dpi): - self._px_width = px_width - self._px_height = px_height - self._horz_dpi = horz_dpi - self._vert_dpi = vert_dpi - - @property - def content_type(self): - """ - Abstract property definition, must be implemented by all subclasses. - """ - msg = ( - 'content_type property must be implemented by all subclasses of ' - 'BaseImageHeader' - ) - raise NotImplementedError(msg) - - @property - def default_ext(self): - """ - Default filename extension for images of this type. An abstract - property definition, must be implemented by all subclasses. - """ - msg = ( - 'default_ext property must be implemented by all subclasses of ' - 'BaseImageHeader' - ) - raise NotImplementedError(msg) - - @property - def px_width(self): - """ - The horizontal pixel dimension of the image - """ - return self._px_width - - @property - def px_height(self): - """ - The vertical pixel dimension of the image - """ - return self._px_height - - @property - def horz_dpi(self): - """ - Integer dots per inch for the width of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._horz_dpi - - @property - def vert_dpi(self): - """ - Integer dots per inch for the height of this image. Defaults to 72 - when not present in the file, as is often the case. - """ - return self._vert_dpi diff --git a/docx/image/tiff.py b/docx/image/tiff.py deleted file mode 100644 index c38242360..000000000 --- a/docx/image/tiff.py +++ /dev/null @@ -1,345 +0,0 @@ -# encoding: utf-8 - -from __future__ import absolute_import, division, print_function - -from .constants import MIME_TYPE, TIFF_FLD, TIFF_TAG -from .helpers import BIG_ENDIAN, LITTLE_ENDIAN, StreamReader -from .image import BaseImageHeader - - -class Tiff(BaseImageHeader): - """ - Image header parser for TIFF images. Handles both big and little endian - byte ordering. - """ - @property - def content_type(self): - """ - Return the MIME type of this TIFF image, unconditionally the string - ``image/tiff``. - """ - return MIME_TYPE.TIFF - - @property - def default_ext(self): - """ - Default filename extension, always 'tiff' for TIFF images. - """ - return 'tiff' - - @classmethod - def from_stream(cls, stream): - """ - Return a |Tiff| instance containing the properties of the TIFF image - in *stream*. - """ - parser = _TiffParser.parse(stream) - - px_width = parser.px_width - px_height = parser.px_height - horz_dpi = parser.horz_dpi - vert_dpi = parser.vert_dpi - - return cls(px_width, px_height, horz_dpi, vert_dpi) - - -class _TiffParser(object): - """ - Parses a TIFF image stream to extract the image properties found in its - main image file directory (IFD) - """ - def __init__(self, ifd_entries): - super(_TiffParser, self).__init__() - self._ifd_entries = ifd_entries - - @classmethod - def parse(cls, stream): - """ - Return an instance of |_TiffParser| containing the properties parsed - from the TIFF image in *stream*. - """ - stream_rdr = cls._make_stream_reader(stream) - ifd0_offset = stream_rdr.read_long(4) - ifd_entries = _IfdEntries.from_stream(stream_rdr, ifd0_offset) - return cls(ifd_entries) - - @property - def horz_dpi(self): - """ - The horizontal dots per inch value calculated from the XResolution - and ResolutionUnit tags of the IFD; defaults to 72 if those tags are - not present. - """ - return self._dpi(TIFF_TAG.X_RESOLUTION) - - @property - def vert_dpi(self): - """ - The vertical dots per inch value calculated from the XResolution and - ResolutionUnit tags of the IFD; defaults to 72 if those tags are not - present. - """ - return self._dpi(TIFF_TAG.Y_RESOLUTION) - - @property - def px_height(self): - """ - The number of stacked rows of pixels in the image, |None| if the IFD - contains no ``ImageLength`` tag, the expected case when the TIFF is - embeded in an Exif image. - """ - return self._ifd_entries.get(TIFF_TAG.IMAGE_LENGTH) - - @property - def px_width(self): - """ - The number of pixels in each row in the image, |None| if the IFD - contains no ``ImageWidth`` tag, the expected case when the TIFF is - embeded in an Exif image. - """ - return self._ifd_entries.get(TIFF_TAG.IMAGE_WIDTH) - - @classmethod - def _detect_endian(cls, stream): - """ - Return either BIG_ENDIAN or LITTLE_ENDIAN depending on the endian - indicator found in the TIFF *stream* header, either 'MM' or 'II'. - """ - stream.seek(0) - endian_str = stream.read(2) - return BIG_ENDIAN if endian_str == b'MM' else LITTLE_ENDIAN - - def _dpi(self, resolution_tag): - """ - Return the dpi value calculated for *resolution_tag*, which can be - either TIFF_TAG.X_RESOLUTION or TIFF_TAG.Y_RESOLUTION. The - calculation is based on the values of both that tag and the - TIFF_TAG.RESOLUTION_UNIT tag in this parser's |_IfdEntries| instance. - """ - ifd_entries = self._ifd_entries - - if resolution_tag not in ifd_entries: - return 72 - - # resolution unit defaults to inches (2) - resolution_unit = ( - ifd_entries[TIFF_TAG.RESOLUTION_UNIT] - if TIFF_TAG.RESOLUTION_UNIT in ifd_entries else 2 - ) - - if resolution_unit == 1: # aspect ratio only - return 72 - # resolution_unit == 2 for inches, 3 for centimeters - units_per_inch = 1 if resolution_unit == 2 else 2.54 - dots_per_unit = ifd_entries[resolution_tag] - return int(round(dots_per_unit * units_per_inch)) - - @classmethod - def _make_stream_reader(cls, stream): - """ - Return a |StreamReader| instance with wrapping *stream* and having - "endian-ness" determined by the 'MM' or 'II' indicator in the TIFF - stream header. - """ - endian = cls._detect_endian(stream) - return StreamReader(stream, endian) - - -class _IfdEntries(object): - """ - Image File Directory for a TIFF image, having mapping (dict) semantics - allowing "tag" values to be retrieved by tag code. - """ - def __init__(self, entries): - super(_IfdEntries, self).__init__() - self._entries = entries - - def __contains__(self, key): - """ - Provides ``in`` operator, e.g. ``tag in ifd_entries`` - """ - return self._entries.__contains__(key) - - def __getitem__(self, key): - """ - Provides indexed access, e.g. ``tag_value = ifd_entries[tag_code]`` - """ - return self._entries.__getitem__(key) - - @classmethod - def from_stream(cls, stream, offset): - """ - Return a new |_IfdEntries| instance parsed from *stream* starting at - *offset*. - """ - ifd_parser = _IfdParser(stream, offset) - entries = dict((e.tag, e.value) for e in ifd_parser.iter_entries()) - return cls(entries) - - def get(self, tag_code, default=None): - """ - Return value of IFD entry having tag matching *tag_code*, or - *default* if no matching tag found. - """ - return self._entries.get(tag_code, default) - - -class _IfdParser(object): - """ - Service object that knows how to extract directory entries from an Image - File Directory (IFD) - """ - def __init__(self, stream_rdr, offset): - super(_IfdParser, self).__init__() - self._stream_rdr = stream_rdr - self._offset = offset - - def iter_entries(self): - """ - Generate an |_IfdEntry| instance corresponding to each entry in the - directory. - """ - for idx in range(self._entry_count): - dir_entry_offset = self._offset + 2 + (idx*12) - ifd_entry = _IfdEntryFactory(self._stream_rdr, dir_entry_offset) - yield ifd_entry - - @property - def _entry_count(self): - """ - The count of directory entries, read from the top of the IFD header - """ - return self._stream_rdr.read_short(self._offset) - - -def _IfdEntryFactory(stream_rdr, offset): - """ - Return an |_IfdEntry| subclass instance containing the value of the - directory entry at *offset* in *stream_rdr*. - """ - ifd_entry_classes = { - TIFF_FLD.ASCII: _AsciiIfdEntry, - TIFF_FLD.SHORT: _ShortIfdEntry, - TIFF_FLD.LONG: _LongIfdEntry, - TIFF_FLD.RATIONAL: _RationalIfdEntry, - } - field_type = stream_rdr.read_short(offset, 2) - if field_type in ifd_entry_classes: - entry_cls = ifd_entry_classes[field_type] - else: - entry_cls = _IfdEntry - return entry_cls.from_stream(stream_rdr, offset) - - -class _IfdEntry(object): - """ - Base class for IFD entry classes. Subclasses are differentiated by value - type, e.g. ASCII, long int, etc. - """ - def __init__(self, tag_code, value): - super(_IfdEntry, self).__init__() - self._tag_code = tag_code - self._value = value - - @classmethod - def from_stream(cls, stream_rdr, offset): - """ - Return an |_IfdEntry| subclass instance containing the tag and value - of the tag parsed from *stream_rdr* at *offset*. Note this method is - common to all subclasses. Override the ``_parse_value()`` method to - provide distinctive behavior based on field type. - """ - tag_code = stream_rdr.read_short(offset, 0) - value_count = stream_rdr.read_long(offset, 4) - value_offset = stream_rdr.read_long(offset, 8) - value = cls._parse_value( - stream_rdr, offset, value_count, value_offset - ) - return cls(tag_code, value) - - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the value of this field parsed from *stream_rdr* at *offset*. - Intended to be overridden by subclasses. - """ - return 'UNIMPLEMENTED FIELD TYPE' # pragma: no cover - - @property - def tag(self): - """ - Short int code that identifies this IFD entry - """ - return self._tag_code - - @property - def value(self): - """ - Value of this tag, its type being dependent on the tag. - """ - return self._value - - -class _AsciiIfdEntry(_IfdEntry): - """ - IFD entry having the form of a NULL-terminated ASCII string - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the ASCII string parsed from *stream_rdr* at *value_offset*. - The length of the string, including a terminating '\x00' (NUL) - character, is in *value_count*. - """ - return stream_rdr.read_str(value_count-1, value_offset) - - -class _ShortIfdEntry(_IfdEntry): - """ - IFD entry expressed as a short (2-byte) integer - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the short int value contained in the *value_offset* field of - this entry. Only supports single values at present. - """ - if value_count == 1: - return stream_rdr.read_short(offset, 8) - else: # pragma: no cover - return 'Multi-value short integer NOT IMPLEMENTED' - - -class _LongIfdEntry(_IfdEntry): - """ - IFD entry expressed as a long (4-byte) integer - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the long int value contained in the *value_offset* field of - this entry. Only supports single values at present. - """ - if value_count == 1: - return stream_rdr.read_long(offset, 8) - else: # pragma: no cover - return 'Multi-value long integer NOT IMPLEMENTED' - - -class _RationalIfdEntry(_IfdEntry): - """ - IFD entry expressed as a numerator, denominator pair - """ - @classmethod - def _parse_value(cls, stream_rdr, offset, value_count, value_offset): - """ - Return the rational (numerator / denominator) value at *value_offset* - in *stream_rdr* as a floating-point number. Only supports single - values at present. - """ - if value_count == 1: - numerator = stream_rdr.read_long(value_offset) - denominator = stream_rdr.read_long(value_offset, 4) - return numerator / denominator - else: # pragma: no cover - return 'Multi-value Rational NOT IMPLEMENTED' diff --git a/docx/opc/compat.py b/docx/opc/compat.py deleted file mode 100644 index d944fe43b..000000000 --- a/docx/opc/compat.py +++ /dev/null @@ -1,50 +0,0 @@ -# encoding: utf-8 - -""" -Provides Python 2/3 compatibility objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import sys - -# =========================================================================== -# Python 3 versions -# =========================================================================== - -if sys.version_info >= (3, 0): - - def cls_method_fn(cls, method_name): - """ - Return the function object associated with the method of *cls* having - *method_name*. - """ - return getattr(cls, method_name) - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, str) - -# =========================================================================== -# Python 2 versions -# =========================================================================== - -else: - - def cls_method_fn(cls, method_name): - """ - Return the function object associated with the method of *cls* having - *method_name*. - """ - unbound_method = getattr(cls, method_name) - return unbound_method.__func__ - - def is_string(obj): - """ - Return True if *obj* is a string, False otherwise. - """ - return isinstance(obj, basestring) diff --git a/docx/opc/constants.py b/docx/opc/constants.py deleted file mode 100644 index b90aa394a..000000000 --- a/docx/opc/constants.py +++ /dev/null @@ -1,658 +0,0 @@ -# encoding: utf-8 - -""" -Constant values related to the Open Packaging Convention, in particular, -content types and relationship types. -""" - - -class CONTENT_TYPE(object): - """ - Content type URIs (like MIME-types) that specify a part's format - """ - BMP = ( - 'image/bmp' - ) - DML_CHART = ( - 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml' - ) - DML_CHARTSHAPES = ( - 'application/vnd.openxmlformats-officedocument.drawingml.chartshapes' - '+xml' - ) - DML_DIAGRAM_COLORS = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramColo' - 'rs+xml' - ) - DML_DIAGRAM_DATA = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramData' - '+xml' - ) - DML_DIAGRAM_LAYOUT = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramLayo' - 'ut+xml' - ) - DML_DIAGRAM_STYLE = ( - 'application/vnd.openxmlformats-officedocument.drawingml.diagramStyl' - 'e+xml' - ) - GIF = ( - 'image/gif' - ) - JPEG = ( - 'image/jpeg' - ) - MS_PHOTO = ( - 'image/vnd.ms-photo' - ) - OFC_CUSTOM_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.custom-properties+xml' - ) - OFC_CUSTOM_XML_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.customXmlProperties+x' - 'ml' - ) - OFC_DRAWING = ( - 'application/vnd.openxmlformats-officedocument.drawing+xml' - ) - OFC_EXTENDED_PROPERTIES = ( - 'application/vnd.openxmlformats-officedocument.extended-properties+x' - 'ml' - ) - OFC_OLE_OBJECT = ( - 'application/vnd.openxmlformats-officedocument.oleObject' - ) - OFC_PACKAGE = ( - 'application/vnd.openxmlformats-officedocument.package' - ) - OFC_THEME = ( - 'application/vnd.openxmlformats-officedocument.theme+xml' - ) - OFC_THEME_OVERRIDE = ( - 'application/vnd.openxmlformats-officedocument.themeOverride+xml' - ) - OFC_VML_DRAWING = ( - 'application/vnd.openxmlformats-officedocument.vmlDrawing' - ) - OPC_CORE_PROPERTIES = ( - 'application/vnd.openxmlformats-package.core-properties+xml' - ) - OPC_DIGITAL_SIGNATURE_CERTIFICATE = ( - 'application/vnd.openxmlformats-package.digital-signature-certificat' - 'e' - ) - OPC_DIGITAL_SIGNATURE_ORIGIN = ( - 'application/vnd.openxmlformats-package.digital-signature-origin' - ) - OPC_DIGITAL_SIGNATURE_XMLSIGNATURE = ( - 'application/vnd.openxmlformats-package.digital-signature-xmlsignatu' - 're+xml' - ) - OPC_RELATIONSHIPS = ( - 'application/vnd.openxmlformats-package.relationships+xml' - ) - PML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.commen' - 'ts+xml' - ) - PML_COMMENT_AUTHORS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.commen' - 'tAuthors+xml' - ) - PML_HANDOUT_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.handou' - 'tMaster+xml' - ) - PML_NOTES_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.notesM' - 'aster+xml' - ) - PML_NOTES_SLIDE = ( - 'application/vnd.openxmlformats-officedocument.presentationml.notesS' - 'lide+xml' - ) - PML_PRESENTATION_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.presen' - 'tation.main+xml' - ) - PML_PRES_PROPS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.presPr' - 'ops+xml' - ) - PML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.printe' - 'rSettings' - ) - PML_SLIDE = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slide+' - 'xml' - ) - PML_SLIDESHOW_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slides' - 'how.main+xml' - ) - PML_SLIDE_LAYOUT = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideL' - 'ayout+xml' - ) - PML_SLIDE_MASTER = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideM' - 'aster+xml' - ) - PML_SLIDE_UPDATE_INFO = ( - 'application/vnd.openxmlformats-officedocument.presentationml.slideU' - 'pdateInfo+xml' - ) - PML_TABLE_STYLES = ( - 'application/vnd.openxmlformats-officedocument.presentationml.tableS' - 'tyles+xml' - ) - PML_TAGS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.tags+x' - 'ml' - ) - PML_TEMPLATE_MAIN = ( - 'application/vnd.openxmlformats-officedocument.presentationml.templa' - 'te.main+xml' - ) - PML_VIEW_PROPS = ( - 'application/vnd.openxmlformats-officedocument.presentationml.viewPr' - 'ops+xml' - ) - PNG = ( - 'image/png' - ) - SML_CALC_CHAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.calcCha' - 'in+xml' - ) - SML_CHARTSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.chartsh' - 'eet+xml' - ) - SML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.comment' - 's+xml' - ) - SML_CONNECTIONS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.connect' - 'ions+xml' - ) - SML_CUSTOM_PROPERTY = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.customP' - 'roperty' - ) - SML_DIALOGSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.dialogs' - 'heet+xml' - ) - SML_EXTERNAL_LINK = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.externa' - 'lLink+xml' - ) - SML_PIVOT_CACHE_DEFINITION = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCa' - 'cheDefinition+xml' - ) - SML_PIVOT_CACHE_RECORDS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCa' - 'cheRecords+xml' - ) - SML_PIVOT_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTa' - 'ble+xml' - ) - SML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.printer' - 'Settings' - ) - SML_QUERY_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.queryTa' - 'ble+xml' - ) - SML_REVISION_HEADERS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.revisio' - 'nHeaders+xml' - ) - SML_REVISION_LOG = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.revisio' - 'nLog+xml' - ) - SML_SHARED_STRINGS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedS' - 'trings+xml' - ) - SML_SHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' - ) - SML_SHEET_MAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.m' - 'ain+xml' - ) - SML_SHEET_METADATA = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMe' - 'tadata+xml' - ) - SML_STYLES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+' - 'xml' - ) - SML_TABLE = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.table+x' - 'ml' - ) - SML_TABLE_SINGLE_CELLS = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.tableSi' - 'ngleCells+xml' - ) - SML_TEMPLATE_MAIN = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.templat' - 'e.main+xml' - ) - SML_USER_NAMES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.userNam' - 'es+xml' - ) - SML_VOLATILE_DEPENDENCIES = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.volatil' - 'eDependencies+xml' - ) - SML_WORKSHEET = ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.workshe' - 'et+xml' - ) - TIFF = ( - 'image/tiff' - ) - WML_COMMENTS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.comm' - 'ents+xml' - ) - WML_DOCUMENT = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment' - ) - WML_DOCUMENT_GLOSSARY = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment.glossary+xml' - ) - WML_DOCUMENT_MAIN = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.docu' - 'ment.main+xml' - ) - WML_ENDNOTES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.endn' - 'otes+xml' - ) - WML_FONT_TABLE = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.font' - 'Table+xml' - ) - WML_FOOTER = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.foot' - 'er+xml' - ) - WML_FOOTNOTES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.foot' - 'notes+xml' - ) - WML_HEADER = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.head' - 'er+xml' - ) - WML_NUMBERING = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.numb' - 'ering+xml' - ) - WML_PRINTER_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.prin' - 'terSettings' - ) - WML_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.sett' - 'ings+xml' - ) - WML_STYLES = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.styl' - 'es+xml' - ) - WML_WEB_SETTINGS = ( - 'application/vnd.openxmlformats-officedocument.wordprocessingml.webS' - 'ettings+xml' - ) - XML = ( - 'application/xml' - ) - X_EMF = ( - 'image/x-emf' - ) - X_FONTDATA = ( - 'application/x-fontdata' - ) - X_FONT_TTF = ( - 'application/x-font-ttf' - ) - X_WMF = ( - 'image/x-wmf' - ) - - -class NAMESPACE(object): - """Constant values for OPC XML namespaces""" - DML_WORDPROCESSING_DRAWING = ( - 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDraw' - 'ing' - ) - OFC_RELATIONSHIPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - ) - OPC_RELATIONSHIPS = ( - 'http://schemas.openxmlformats.org/package/2006/relationships' - ) - OPC_CONTENT_TYPES = ( - 'http://schemas.openxmlformats.org/package/2006/content-types' - ) - WML_MAIN = ( - 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' - ) - - -class RELATIONSHIP_TARGET_MODE(object): - """Open XML relationship target modes""" - EXTERNAL = 'External' - INTERNAL = 'Internal' - - -class RELATIONSHIP_TYPE(object): - AUDIO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/audio' - ) - A_F_CHUNK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/aFChunk' - ) - CALC_CHAIN = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/calcChain' - ) - CERTIFICATE = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/certificate' - ) - CHART = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chart' - ) - CHARTSHEET = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chartsheet' - ) - CHART_USER_SHAPES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/chartUserShapes' - ) - COMMENTS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/comments' - ) - COMMENT_AUTHORS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/commentAuthors' - ) - CONNECTIONS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/connections' - ) - CONTROL = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/control' - ) - CORE_PROPERTIES = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/metada' - 'ta/core-properties' - ) - CUSTOM_PROPERTIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/custom-properties' - ) - CUSTOM_PROPERTY = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customProperty' - ) - CUSTOM_XML = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customXml' - ) - CUSTOM_XML_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/customXmlProps' - ) - DIAGRAM_COLORS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramColors' - ) - DIAGRAM_DATA = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramData' - ) - DIAGRAM_LAYOUT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramLayout' - ) - DIAGRAM_QUICK_STYLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/diagramQuickStyle' - ) - DIALOGSHEET = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/dialogsheet' - ) - DRAWING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/drawing' - ) - ENDNOTES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/endnotes' - ) - EXTENDED_PROPERTIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/extended-properties' - ) - EXTERNAL_LINK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/externalLink' - ) - FONT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/font' - ) - FONT_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/fontTable' - ) - FOOTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/footer' - ) - FOOTNOTES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/footnotes' - ) - GLOSSARY_DOCUMENT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/glossaryDocument' - ) - HANDOUT_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/handoutMaster' - ) - HEADER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/header' - ) - HYPERLINK = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/hyperlink' - ) - IMAGE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/image' - ) - NOTES_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/notesMaster' - ) - NOTES_SLIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/notesSlide' - ) - NUMBERING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/numbering' - ) - OFFICE_DOCUMENT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/officeDocument' - ) - OLE_OBJECT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/oleObject' - ) - ORIGIN = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/origin' - ) - PACKAGE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/package' - ) - PIVOT_CACHE_DEFINITION = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/pivotCacheDefinition' - ) - PIVOT_CACHE_RECORDS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/spreadsheetml/pivotCacheRecords' - ) - PIVOT_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/pivotTable' - ) - PRES_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/presProps' - ) - PRINTER_SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/printerSettings' - ) - QUERY_TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/queryTable' - ) - REVISION_HEADERS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/revisionHeaders' - ) - REVISION_LOG = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/revisionLog' - ) - SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/settings' - ) - SHARED_STRINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/sharedStrings' - ) - SHEET_METADATA = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/sheetMetadata' - ) - SIGNATURE = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/digita' - 'l-signature/signature' - ) - SLIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slide' - ) - SLIDE_LAYOUT = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideLayout' - ) - SLIDE_MASTER = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideMaster' - ) - SLIDE_UPDATE_INFO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/slideUpdateInfo' - ) - STYLES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/styles' - ) - TABLE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/table' - ) - TABLE_SINGLE_CELLS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tableSingleCells' - ) - TABLE_STYLES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tableStyles' - ) - TAGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/tags' - ) - THEME = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/theme' - ) - THEME_OVERRIDE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/themeOverride' - ) - THUMBNAIL = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/metada' - 'ta/thumbnail' - ) - USERNAMES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/usernames' - ) - VIDEO = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/video' - ) - VIEW_PROPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/viewProps' - ) - VML_DRAWING = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/vmlDrawing' - ) - VOLATILE_DEPENDENCIES = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/volatileDependencies' - ) - WEB_SETTINGS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/webSettings' - ) - WORKSHEET_SOURCE = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/worksheetSource' - ) - XML_MAPS = ( - 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' - '/xmlMaps' - ) diff --git a/docx/opc/exceptions.py b/docx/opc/exceptions.py deleted file mode 100644 index b8e6de43f..000000000 --- a/docx/opc/exceptions.py +++ /dev/null @@ -1,19 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions specific to python-opc - -The base exception class is OpcError. -""" - - -class OpcError(Exception): - """ - Base error class for python-opc - """ - - -class PackageNotFoundError(OpcError): - """ - Raised when a package cannot be found at the specified path. - """ diff --git a/docx/opc/oxml.py b/docx/opc/oxml.py deleted file mode 100644 index 494b31dca..000000000 --- a/docx/opc/oxml.py +++ /dev/null @@ -1,292 +0,0 @@ -# encoding: utf-8 - -""" -Temporary stand-in for main oxml module that came across with the -PackageReader transplant. Probably much will get replaced with objects from -the pptx.oxml.core and then this module will either get deleted or only hold -the package related custom element classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from lxml import etree - -from .constants import NAMESPACE as NS, RELATIONSHIP_TARGET_MODE as RTM - - -# configure XML parser -element_class_lookup = etree.ElementNamespaceClassLookup() -oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False) -oxml_parser.set_element_class_lookup(element_class_lookup) - -nsmap = { - 'ct': NS.OPC_CONTENT_TYPES, - 'pr': NS.OPC_RELATIONSHIPS, - 'r': NS.OFC_RELATIONSHIPS, -} - - -# =========================================================================== -# functions -# =========================================================================== - -def parse_xml(text): - """ - ``etree.fromstring()`` replacement that uses oxml parser - """ - return etree.fromstring(text, oxml_parser) - - -def qn(tag): - """ - Stands for "qualified name", a utility function to turn a namespace - prefixed tag name into a Clark-notation qualified tag name for lxml. For - example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``. - """ - prefix, tagroot = tag.split(':') - uri = nsmap[prefix] - return '{%s}%s' % (uri, tagroot) - - -def serialize_part_xml(part_elm): - """ - Serialize *part_elm* etree element to XML suitable for storage as an XML - part. That is to say, no insignificant whitespace added for readability, - and an appropriate XML declaration added with UTF-8 encoding specified. - """ - return etree.tostring(part_elm, encoding='UTF-8', standalone=True) - - -def serialize_for_reading(element): - """ - Serialize *element* to human-readable XML suitable for tests. No XML - declaration. - """ - return etree.tostring(element, encoding='unicode', pretty_print=True) - - -# =========================================================================== -# Custom element classes -# =========================================================================== - -class BaseOxmlElement(etree.ElementBase): - """ - Base class for all custom element classes, to add standardized behavior - to all classes in one place. - """ - @property - def xml(self): - """ - Return XML string for this element, suitable for testing purposes. - Pretty printed for readability and without an XML declaration at the - top. - """ - return serialize_for_reading(self) - - -class CT_Default(BaseOxmlElement): - """ - ```` element, specifying the default content type to be applied - to a part with the specified extension. - """ - @property - def content_type(self): - """ - String held in the ``ContentType`` attribute of this ```` - element. - """ - return self.get('ContentType') - - @property - def extension(self): - """ - String held in the ``Extension`` attribute of this ```` - element. - """ - return self.get('Extension') - - @staticmethod - def new(ext, content_type): - """ - Return a new ```` element with attributes set to parameter - values. - """ - xml = '' % nsmap['ct'] - default = parse_xml(xml) - default.set('Extension', ext) - default.set('ContentType', content_type) - return default - - -class CT_Override(BaseOxmlElement): - """ - ```` element, specifying the content type to be applied for a - part with the specified partname. - """ - @property - def content_type(self): - """ - String held in the ``ContentType`` attribute of this ```` - element. - """ - return self.get('ContentType') - - @staticmethod - def new(partname, content_type): - """ - Return a new ```` element with attributes set to parameter - values. - """ - xml = '' % nsmap['ct'] - override = parse_xml(xml) - override.set('PartName', partname) - override.set('ContentType', content_type) - return override - - @property - def partname(self): - """ - String held in the ``PartName`` attribute of this ```` - element. - """ - return self.get('PartName') - - -class CT_Relationship(BaseOxmlElement): - """ - ```` element, representing a single relationship from a - source to a target part. - """ - @staticmethod - def new(rId, reltype, target, target_mode=RTM.INTERNAL): - """ - Return a new ```` element. - """ - xml = '' % nsmap['pr'] - relationship = parse_xml(xml) - relationship.set('Id', rId) - relationship.set('Type', reltype) - relationship.set('Target', target) - if target_mode == RTM.EXTERNAL: - relationship.set('TargetMode', RTM.EXTERNAL) - return relationship - - @property - def rId(self): - """ - String held in the ``Id`` attribute of this ```` - element. - """ - return self.get('Id') - - @property - def reltype(self): - """ - String held in the ``Type`` attribute of this ```` - element. - """ - return self.get('Type') - - @property - def target_ref(self): - """ - String held in the ``Target`` attribute of this ```` - element. - """ - return self.get('Target') - - @property - def target_mode(self): - """ - String held in the ``TargetMode`` attribute of this - ```` element, either ``Internal`` or ``External``. - Defaults to ``Internal``. - """ - return self.get('TargetMode', RTM.INTERNAL) - - -class CT_Relationships(BaseOxmlElement): - """ - ```` element, the root element in a .rels file. - """ - def add_rel(self, rId, reltype, target, is_external=False): - """ - Add a child ```` element with attributes set according - to parameter values. - """ - target_mode = RTM.EXTERNAL if is_external else RTM.INTERNAL - relationship = CT_Relationship.new(rId, reltype, target, target_mode) - self.append(relationship) - - @staticmethod - def new(): - """ - Return a new ```` element. - """ - xml = '' % nsmap['pr'] - relationships = parse_xml(xml) - return relationships - - @property - def Relationship_lst(self): - """ - Return a list containing all the ```` child elements. - """ - return self.findall(qn('pr:Relationship')) - - @property - def xml(self): - """ - Return XML string for this element, suitable for saving in a .rels - stream, not pretty printed and with an XML declaration at the top. - """ - return serialize_part_xml(self) - - -class CT_Types(BaseOxmlElement): - """ - ```` element, the container element for Default and Override - elements in [Content_Types].xml. - """ - def add_default(self, ext, content_type): - """ - Add a child ```` element with attributes set to parameter - values. - """ - default = CT_Default.new(ext, content_type) - self.append(default) - - def add_override(self, partname, content_type): - """ - Add a child ```` element with attributes set to parameter - values. - """ - override = CT_Override.new(partname, content_type) - self.append(override) - - @property - def defaults(self): - return self.findall(qn('ct:Default')) - - @staticmethod - def new(): - """ - Return a new ```` element. - """ - xml = '' % nsmap['ct'] - types = parse_xml(xml) - return types - - @property - def overrides(self): - return self.findall(qn('ct:Override')) - - -ct_namespace = element_class_lookup.get_namespace(nsmap['ct']) -ct_namespace['Default'] = CT_Default -ct_namespace['Override'] = CT_Override -ct_namespace['Types'] = CT_Types - -pr_namespace = element_class_lookup.get_namespace(nsmap['pr']) -pr_namespace['Relationship'] = CT_Relationship -pr_namespace['Relationships'] = CT_Relationships diff --git a/docx/opc/package.py b/docx/opc/package.py deleted file mode 100644 index b0ea37ea5..000000000 --- a/docx/opc/package.py +++ /dev/null @@ -1,221 +0,0 @@ -# encoding: utf-8 - -""" -The :mod:`pptx.packaging` module coheres around the concerns of reading and -writing presentations to and from a .pptx file. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .constants import RELATIONSHIP_TYPE as RT -from .packuri import PACKAGE_URI -from .part import PartFactory -from .parts.coreprops import CorePropertiesPart -from .pkgreader import PackageReader -from .pkgwriter import PackageWriter -from .rel import Relationships -from .shared import lazyproperty - - -class OpcPackage(object): - """ - Main API class for |python-opc|. A new instance is constructed by calling - the :meth:`open` class method with a path to a package file or file-like - object containing one. - """ - def __init__(self): - super(OpcPackage, self).__init__() - - def after_unmarshal(self): - """ - Entry point for any post-unmarshaling processing. May be overridden - by subclasses without forwarding call to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - @property - def core_properties(self): - """ - |CoreProperties| object providing read/write access to the Dublin - Core properties for this document. - """ - return self._core_properties_part.core_properties - - def iter_rels(self): - """ - Generate exactly one reference to each relationship in the package by - performing a depth-first traversal of the rels graph. - """ - def walk_rels(source, visited=None): - visited = [] if visited is None else visited - for rel in source.rels.values(): - yield rel - if rel.is_external: - continue - part = rel.target_part - if part in visited: - continue - visited.append(part) - new_source = part - for rel in walk_rels(new_source, visited): - yield rel - - for rel in walk_rels(self): - yield rel - - def iter_parts(self): - """ - Generate exactly one reference to each of the parts in the package by - performing a depth-first traversal of the rels graph. - """ - def walk_parts(source, visited=list()): - for rel in source.rels.values(): - if rel.is_external: - continue - part = rel.target_part - if part in visited: - continue - visited.append(part) - yield part - new_source = part - for part in walk_parts(new_source, visited): - yield part - - for part in walk_parts(self): - yield part - - def load_rel(self, reltype, target, rId, is_external=False): - """ - Return newly added |_Relationship| instance of *reltype* between this - part and *target* with key *rId*. Target mode is set to - ``RTM.EXTERNAL`` if *is_external* is |True|. Intended for use during - load from a serialized package, where the rId is well known. Other - methods exist for adding a new relationship to the package during - processing. - """ - return self.rels.add_relationship(reltype, target, rId, is_external) - - @property - def main_document_part(self): - """ - Return a reference to the main document part for this package. - Examples include a document part for a WordprocessingML package, a - presentation part for a PresentationML package, or a workbook part - for a SpreadsheetML package. - """ - return self.part_related_by(RT.OFFICE_DOCUMENT) - - @classmethod - def open(cls, pkg_file): - """ - Return an |OpcPackage| instance loaded with the contents of - *pkg_file*. - """ - pkg_reader = PackageReader.from_file(pkg_file) - package = cls() - Unmarshaller.unmarshal(pkg_reader, package, PartFactory) - return package - - def part_related_by(self, reltype): - """ - Return part to which this package has a relationship of *reltype*. - Raises |KeyError| if no such relationship is found and |ValueError| - if more than one such relationship is found. - """ - return self.rels.part_with_reltype(reltype) - - @property - def parts(self): - """ - Return a list containing a reference to each of the parts in this - package. - """ - return [part for part in self.iter_parts()] - - def relate_to(self, part, reltype): - """ - Return rId key of relationship to *part*, from the existing - relationship if there is one, otherwise a newly created one. - """ - rel = self.rels.get_or_add(reltype, part) - return rel.rId - - @lazyproperty - def rels(self): - """ - Return a reference to the |Relationships| instance holding the - collection of relationships for this package. - """ - return Relationships(PACKAGE_URI.baseURI) - - def save(self, pkg_file): - """ - Save this package to *pkg_file*, where *file* can be either a path to - a file (a string) or a file-like object. - """ - for part in self.parts: - part.before_marshal() - PackageWriter.write(pkg_file, self.rels, self.parts) - - @property - def _core_properties_part(self): - """ - |CorePropertiesPart| object related to this package. Creates - a default core properties part if one is not present (not common). - """ - try: - return self.part_related_by(RT.CORE_PROPERTIES) - except KeyError: - core_properties_part = CorePropertiesPart.default(self) - self.relate_to(core_properties_part, RT.CORE_PROPERTIES) - return core_properties_part - - -class Unmarshaller(object): - """ - Hosts static methods for unmarshalling a package from a |PackageReader| - instance. - """ - @staticmethod - def unmarshal(pkg_reader, package, part_factory): - """ - Construct graph of parts and realized relationships based on the - contents of *pkg_reader*, delegating construction of each part to - *part_factory*. Package relationships are added to *pkg*. - """ - parts = Unmarshaller._unmarshal_parts( - pkg_reader, package, part_factory - ) - Unmarshaller._unmarshal_relationships(pkg_reader, package, parts) - for part in parts.values(): - part.after_unmarshal() - package.after_unmarshal() - - @staticmethod - def _unmarshal_parts(pkg_reader, package, part_factory): - """ - Return a dictionary of |Part| instances unmarshalled from - *pkg_reader*, keyed by partname. Side-effect is that each part in - *pkg_reader* is constructed using *part_factory*. - """ - parts = {} - for partname, content_type, reltype, blob in pkg_reader.iter_sparts(): - parts[partname] = part_factory( - partname, content_type, reltype, blob, package - ) - return parts - - @staticmethod - def _unmarshal_relationships(pkg_reader, package, parts): - """ - Add a relationship to the source object corresponding to each of the - relationships in *pkg_reader* with its target_part set to the actual - target part in *parts*. - """ - for source_uri, srel in pkg_reader.iter_srels(): - source = package if source_uri == '/' else parts[source_uri] - target = (srel.target_ref if srel.is_external - else parts[srel.target_partname]) - source.load_rel(srel.reltype, target, srel.rId, srel.is_external) diff --git a/docx/opc/packuri.py b/docx/opc/packuri.py deleted file mode 100644 index 621ed92e5..000000000 --- a/docx/opc/packuri.py +++ /dev/null @@ -1,117 +0,0 @@ -# encoding: utf-8 - -""" -Provides the PackURI value type along with some useful known pack URI strings -such as PACKAGE_URI. -""" - -import posixpath -import re - - -class PackURI(str): - """ - Provides access to pack URI components such as the baseURI and the - filename slice. Behaves as |str| otherwise. - """ - _filename_re = re.compile('([a-zA-Z]+)([1-9][0-9]*)?') - - def __new__(cls, pack_uri_str): - if not pack_uri_str[0] == '/': - tmpl = "PackURI must begin with slash, got '%s'" - raise ValueError(tmpl % pack_uri_str) - return str.__new__(cls, pack_uri_str) - - @staticmethod - def from_rel_ref(baseURI, relative_ref): - """ - Return a |PackURI| instance containing the absolute pack URI formed by - translating *relative_ref* onto *baseURI*. - """ - joined_uri = posixpath.join(baseURI, relative_ref) - abs_uri = posixpath.abspath(joined_uri) - return PackURI(abs_uri) - - @property - def baseURI(self): - """ - The base URI of this pack URI, the directory portion, roughly - speaking. E.g. ``'/ppt/slides'`` for ``'/ppt/slides/slide1.xml'``. - For the package pseudo-partname '/', baseURI is '/'. - """ - return posixpath.split(self)[0] - - @property - def ext(self): - """ - The extension portion of this pack URI, e.g. ``'xml'`` for - ``'/word/document.xml'``. Note the period is not included. - """ - # raw_ext is either empty string or starts with period, e.g. '.xml' - raw_ext = posixpath.splitext(self)[1] - return raw_ext[1:] if raw_ext.startswith('.') else raw_ext - - @property - def filename(self): - """ - The "filename" portion of this pack URI, e.g. ``'slide1.xml'`` for - ``'/ppt/slides/slide1.xml'``. For the package pseudo-partname '/', - filename is ''. - """ - return posixpath.split(self)[1] - - @property - def idx(self): - """ - Return partname index as integer for tuple partname or None for - singleton partname, e.g. ``21`` for ``'/ppt/slides/slide21.xml'`` and - |None| for ``'/ppt/presentation.xml'``. - """ - filename = self.filename - if not filename: - return None - name_part = posixpath.splitext(filename)[0] # filename w/ext removed - match = self._filename_re.match(name_part) - if match is None: - return None - if match.group(2): - return int(match.group(2)) - return None - - @property - def membername(self): - """ - The pack URI with the leading slash stripped off, the form used as - the Zip file membername for the package item. Returns '' for the - package pseudo-partname '/'. - """ - return self[1:] - - def relative_ref(self, baseURI): - """ - Return string containing relative reference to package item from - *baseURI*. E.g. PackURI('/ppt/slideLayouts/slideLayout1.xml') would - return '../slideLayouts/slideLayout1.xml' for baseURI '/ppt/slides'. - """ - # workaround for posixpath bug in 2.6, doesn't generate correct - # relative path when *start* (second) parameter is root ('/') - if baseURI == '/': - relpath = self[1:] - else: - relpath = posixpath.relpath(self, baseURI) - return relpath - - @property - def rels_uri(self): - """ - The pack URI of the .rels part corresponding to the current pack URI. - Only produces sensible output if the pack URI is a partname or the - package pseudo-partname '/'. - """ - rels_filename = '%s.rels' % self.filename - rels_uri_str = posixpath.join(self.baseURI, '_rels', rels_filename) - return PackURI(rels_uri_str) - - -PACKAGE_URI = PackURI('/') -CONTENT_TYPES_URI = PackURI('/[Content_Types].xml') diff --git a/docx/opc/part.py b/docx/opc/part.py deleted file mode 100644 index 928d3c183..000000000 --- a/docx/opc/part.py +++ /dev/null @@ -1,241 +0,0 @@ -# encoding: utf-8 - -""" -Open Packaging Convention (OPC) objects related to package parts. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .compat import cls_method_fn -from .oxml import serialize_part_xml -from ..oxml import parse_xml -from .packuri import PackURI -from .rel import Relationships -from .shared import lazyproperty - - -class Part(object): - """ - Base class for package parts. Provides common properties and methods, but - intended to be subclassed in client code to implement specific part - behaviors. - """ - def __init__(self, partname, content_type, blob=None, package=None): - super(Part, self).__init__() - self._partname = partname - self._content_type = content_type - self._blob = blob - self._package = package - - def after_unmarshal(self): - """ - Entry point for post-unmarshaling processing, for example to parse - the part XML. May be overridden by subclasses without forwarding call - to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - def before_marshal(self): - """ - Entry point for pre-serialization processing, for example to finalize - part naming if necessary. May be overridden by subclasses without - forwarding call to super. - """ - # don't place any code here, just catch call if not overridden by - # subclass - pass - - @property - def blob(self): - """ - Contents of this package part as a sequence of bytes. May be text or - binary. Intended to be overridden by subclasses. Default behavior is - to return load blob. - """ - return self._blob - - @property - def content_type(self): - """ - Content type of this part. - """ - return self._content_type - - def drop_rel(self, rId): - """ - Remove the relationship identified by *rId* if its reference count - is less than 2. Relationships with a reference count of 0 are - implicit relationships. - """ - if self._rel_ref_count(rId) < 2: - del self.rels[rId] - - @classmethod - def load(cls, partname, content_type, blob, package): - return cls(partname, content_type, blob, package) - - def load_rel(self, reltype, target, rId, is_external=False): - """ - Return newly added |_Relationship| instance of *reltype* between this - part and *target* with key *rId*. Target mode is set to - ``RTM.EXTERNAL`` if *is_external* is |True|. Intended for use during - load from a serialized package, where the rId is well-known. Other - methods exist for adding a new relationship to a part when - manipulating a part. - """ - return self.rels.add_relationship(reltype, target, rId, is_external) - - @property - def package(self): - """ - |OpcPackage| instance this part belongs to. - """ - return self._package - - @property - def partname(self): - """ - |PackURI| instance holding partname of this part, e.g. - '/ppt/slides/slide1.xml' - """ - return self._partname - - @partname.setter - def partname(self, partname): - if not isinstance(partname, PackURI): - tmpl = "partname must be instance of PackURI, got '%s'" - raise TypeError(tmpl % type(partname).__name__) - self._partname = partname - - def part_related_by(self, reltype): - """ - Return part to which this part has a relationship of *reltype*. - Raises |KeyError| if no such relationship is found and |ValueError| - if more than one such relationship is found. Provides ability to - resolve implicitly related part, such as Slide -> SlideLayout. - """ - return self.rels.part_with_reltype(reltype) - - def relate_to(self, target, reltype, is_external=False): - """ - Return rId key of relationship of *reltype* to *target*, from an - existing relationship if there is one, otherwise a newly created one. - """ - if is_external: - return self.rels.get_or_add_ext_rel(reltype, target) - else: - rel = self.rels.get_or_add(reltype, target) - return rel.rId - - @property - def related_parts(self): - """ - Dictionary mapping related parts by rId, so child objects can resolve - explicit relationships present in the part XML, e.g. sldIdLst to a - specific |Slide| instance. - """ - return self.rels.related_parts - - @lazyproperty - def rels(self): - """ - |Relationships| instance holding the relationships for this part. - """ - return Relationships(self._partname.baseURI) - - def target_ref(self, rId): - """ - Return URL contained in target ref of relationship identified by - *rId*. - """ - rel = self.rels[rId] - return rel.target_ref - - def _rel_ref_count(self, rId): - """ - Return the count of references in this part's XML to the relationship - identified by *rId*. - """ - rIds = self._element.xpath('//@r:id') - return len([_rId for _rId in rIds if _rId == rId]) - - -class PartFactory(object): - """ - Provides a way for client code to specify a subclass of |Part| to be - constructed by |Unmarshaller| based on its content type and/or a custom - callable. Setting ``PartFactory.part_class_selector`` to a callable - object will cause that object to be called with the parameters - ``content_type, reltype``, once for each part in the package. If the - callable returns an object, it is used as the class for that part. If it - returns |None|, part class selection falls back to the content type map - defined in ``PartFactory.part_type_for``. If no class is returned from - either of these, the class contained in ``PartFactory.default_part_type`` - is used to construct the part, which is by default ``opc.package.Part``. - """ - part_class_selector = None - part_type_for = {} - default_part_type = Part - - def __new__(cls, partname, content_type, reltype, blob, package): - PartClass = None - if cls.part_class_selector is not None: - part_class_selector = cls_method_fn(cls, 'part_class_selector') - PartClass = part_class_selector(content_type, reltype) - if PartClass is None: - PartClass = cls._part_cls_for(content_type) - return PartClass.load(partname, content_type, blob, package) - - @classmethod - def _part_cls_for(cls, content_type): - """ - Return the custom part class registered for *content_type*, or the - default part class if no custom class is registered for - *content_type*. - """ - if content_type in cls.part_type_for: - return cls.part_type_for[content_type] - return cls.default_part_type - - -class XmlPart(Part): - """ - Base class for package parts containing an XML payload, which is most of - them. Provides additional methods to the |Part| base class that take care - of parsing and reserializing the XML payload and managing relationships - to other parts. - """ - def __init__(self, partname, content_type, element, package): - super(XmlPart, self).__init__( - partname, content_type, package=package - ) - self._element = element - - @property - def blob(self): - return serialize_part_xml(self._element) - - @property - def element(self): - """ - The root XML element of this XML part. - """ - return self._element - - @classmethod - def load(cls, partname, content_type, blob, package): - element = parse_xml(blob) - return cls(partname, content_type, element, package) - - @property - def part(self): - """ - Part of the parent protocol, "children" of the document will not know - the part that contains them so must ask their parent object. That - chain of delegation ends here for child objects. - """ - return self diff --git a/docx/opc/parts/coreprops.py b/docx/opc/parts/coreprops.py deleted file mode 100644 index 3c692fb99..000000000 --- a/docx/opc/parts/coreprops.py +++ /dev/null @@ -1,54 +0,0 @@ -# encoding: utf-8 - -""" -Core properties part, corresponds to ``/docProps/core.xml`` part in package. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from datetime import datetime - -from ..constants import CONTENT_TYPE as CT -from ..coreprops import CoreProperties -from ...oxml.coreprops import CT_CoreProperties -from ..packuri import PackURI -from ..part import XmlPart - - -class CorePropertiesPart(XmlPart): - """ - Corresponds to part named ``/docProps/core.xml``, containing the core - document properties for this document package. - """ - @classmethod - def default(cls, package): - """ - Return a new |CorePropertiesPart| object initialized with default - values for its base properties. - """ - core_properties_part = cls._new(package) - core_properties = core_properties_part.core_properties - core_properties.title = 'Word Document' - core_properties.last_modified_by = 'python-docx' - core_properties.revision = 1 - core_properties.modified = datetime.utcnow() - return core_properties_part - - @property - def core_properties(self): - """ - A |CoreProperties| object providing read/write access to the core - properties contained in this core properties part. - """ - return CoreProperties(self.element) - - @classmethod - def _new(cls, package): - partname = PackURI('/docProps/core.xml') - content_type = CT.OPC_CORE_PROPERTIES - coreProperties = CT_CoreProperties.new() - return CorePropertiesPart( - partname, content_type, coreProperties, package - ) diff --git a/docx/opc/phys_pkg.py b/docx/opc/phys_pkg.py deleted file mode 100644 index c86a51994..000000000 --- a/docx/opc/phys_pkg.py +++ /dev/null @@ -1,155 +0,0 @@ -# encoding: utf-8 - -""" -Provides a general interface to a *physical* OPC package, such as a zip file. -""" - -from __future__ import absolute_import - -import os - -from zipfile import ZipFile, is_zipfile, ZIP_DEFLATED - -from .compat import is_string -from .exceptions import PackageNotFoundError -from .packuri import CONTENT_TYPES_URI - - -class PhysPkgReader(object): - """ - Factory for physical package reader objects. - """ - def __new__(cls, pkg_file): - # if *pkg_file* is a string, treat it as a path - if is_string(pkg_file): - if os.path.isdir(pkg_file): - reader_cls = _DirPkgReader - elif is_zipfile(pkg_file): - reader_cls = _ZipPkgReader - else: - raise PackageNotFoundError( - "Package not found at '%s'" % pkg_file - ) - else: # assume it's a stream and pass it to Zip reader to sort out - reader_cls = _ZipPkgReader - - return super(PhysPkgReader, cls).__new__(reader_cls) - - -class PhysPkgWriter(object): - """ - Factory for physical package writer objects. - """ - def __new__(cls, pkg_file): - return super(PhysPkgWriter, cls).__new__(_ZipPkgWriter) - - -class _DirPkgReader(PhysPkgReader): - """ - Implements |PhysPkgReader| interface for an OPC package extracted into a - directory. - """ - def __init__(self, path): - """ - *path* is the path to a directory containing an expanded package. - """ - super(_DirPkgReader, self).__init__() - self._path = os.path.abspath(path) - - def blob_for(self, pack_uri): - """ - Return contents of file corresponding to *pack_uri* in package - directory. - """ - path = os.path.join(self._path, pack_uri.membername) - with open(path, 'rb') as f: - blob = f.read() - return blob - - def close(self): - """ - Provides interface consistency with |ZipFileSystem|, but does - nothing, a directory file system doesn't need closing. - """ - pass - - @property - def content_types_xml(self): - """ - Return the `[Content_Types].xml` blob from the package. - """ - return self.blob_for(CONTENT_TYPES_URI) - - def rels_xml_for(self, source_uri): - """ - Return rels item XML for source with *source_uri*, or None if the - item has no rels item. - """ - try: - rels_xml = self.blob_for(source_uri.rels_uri) - except IOError: - rels_xml = None - return rels_xml - - -class _ZipPkgReader(PhysPkgReader): - """ - Implements |PhysPkgReader| interface for a zip file OPC package. - """ - def __init__(self, pkg_file): - super(_ZipPkgReader, self).__init__() - self._zipf = ZipFile(pkg_file, 'r') - - def blob_for(self, pack_uri): - """ - Return blob corresponding to *pack_uri*. Raises |ValueError| if no - matching member is present in zip archive. - """ - return self._zipf.read(pack_uri.membername) - - def close(self): - """ - Close the zip archive, releasing any resources it is using. - """ - self._zipf.close() - - @property - def content_types_xml(self): - """ - Return the `[Content_Types].xml` blob from the zip package. - """ - return self.blob_for(CONTENT_TYPES_URI) - - def rels_xml_for(self, source_uri): - """ - Return rels item XML for source with *source_uri* or None if no rels - item is present. - """ - try: - rels_xml = self.blob_for(source_uri.rels_uri) - except KeyError: - rels_xml = None - return rels_xml - - -class _ZipPkgWriter(PhysPkgWriter): - """ - Implements |PhysPkgWriter| interface for a zip file OPC package. - """ - def __init__(self, pkg_file): - super(_ZipPkgWriter, self).__init__() - self._zipf = ZipFile(pkg_file, 'w', compression=ZIP_DEFLATED) - - def close(self): - """ - Close the zip archive, flushing any pending physical writes and - releasing any resources it's using. - """ - self._zipf.close() - - def write(self, pack_uri, blob): - """ - Write *blob* to this zip package with the membername corresponding to - *pack_uri*. - """ - self._zipf.writestr(pack_uri.membername, blob) diff --git a/docx/opc/pkgwriter.py b/docx/opc/pkgwriter.py deleted file mode 100644 index fccda6cd8..000000000 --- a/docx/opc/pkgwriter.py +++ /dev/null @@ -1,125 +0,0 @@ -# encoding: utf-8 - -""" -Provides a low-level, write-only API to a serialized Open Packaging -Convention (OPC) package, essentially an implementation of OpcPackage.save() -""" - -from __future__ import absolute_import - -from .constants import CONTENT_TYPE as CT -from .oxml import CT_Types, serialize_part_xml -from .packuri import CONTENT_TYPES_URI, PACKAGE_URI -from .phys_pkg import PhysPkgWriter -from .shared import CaseInsensitiveDict -from .spec import default_content_types - - -class PackageWriter(object): - """ - Writes a zip-format OPC package to *pkg_file*, where *pkg_file* can be - either a path to a zip file (a string) or a file-like object. Its single - API method, :meth:`write`, is static, so this class is not intended to - be instantiated. - """ - @staticmethod - def write(pkg_file, pkg_rels, parts): - """ - Write a physical package (.pptx file) to *pkg_file* containing - *pkg_rels* and *parts* and a content types stream based on the - content types of the parts. - """ - phys_writer = PhysPkgWriter(pkg_file) - PackageWriter._write_content_types_stream(phys_writer, parts) - PackageWriter._write_pkg_rels(phys_writer, pkg_rels) - PackageWriter._write_parts(phys_writer, parts) - phys_writer.close() - - @staticmethod - def _write_content_types_stream(phys_writer, parts): - """ - Write ``[Content_Types].xml`` part to the physical package with an - appropriate content type lookup target for each part in *parts*. - """ - cti = _ContentTypesItem.from_parts(parts) - phys_writer.write(CONTENT_TYPES_URI, cti.blob) - - @staticmethod - def _write_parts(phys_writer, parts): - """ - Write the blob of each part in *parts* to the package, along with a - rels item for its relationships if and only if it has any. - """ - for part in parts: - phys_writer.write(part.partname, part.blob) - if len(part._rels): - phys_writer.write(part.partname.rels_uri, part._rels.xml) - - @staticmethod - def _write_pkg_rels(phys_writer, pkg_rels): - """ - Write the XML rels item for *pkg_rels* ('/_rels/.rels') to the - package. - """ - phys_writer.write(PACKAGE_URI.rels_uri, pkg_rels.xml) - - -class _ContentTypesItem(object): - """ - Service class that composes a content types item ([Content_Types].xml) - based on a list of parts. Not meant to be instantiated directly, its - single interface method is xml_for(), e.g. - ``_ContentTypesItem.xml_for(parts)``. - """ - def __init__(self): - self._defaults = CaseInsensitiveDict() - self._overrides = dict() - - @property - def blob(self): - """ - Return XML form of this content types item, suitable for storage as - ``[Content_Types].xml`` in an OPC package. - """ - return serialize_part_xml(self._element) - - @classmethod - def from_parts(cls, parts): - """ - Return content types XML mapping each part in *parts* to the - appropriate content type and suitable for storage as - ``[Content_Types].xml`` in an OPC package. - """ - cti = cls() - cti._defaults['rels'] = CT.OPC_RELATIONSHIPS - cti._defaults['xml'] = CT.XML - for part in parts: - cti._add_content_type(part.partname, part.content_type) - return cti - - def _add_content_type(self, partname, content_type): - """ - Add a content type for the part with *partname* and *content_type*, - using a default or override as appropriate. - """ - ext = partname.ext - if (ext.lower(), content_type) in default_content_types: - self._defaults[ext] = content_type - else: - self._overrides[partname] = content_type - - @property - def _element(self): - """ - Return XML form of this content types item, suitable for storage as - ``[Content_Types].xml`` in an OPC package. Although the sequence of - elements is not strictly significant, as an aid to testing and - readability Default elements are sorted by extension and Override - elements are sorted by partname. - """ - _types_elm = CT_Types.new() - for ext in sorted(self._defaults.keys()): - _types_elm.add_default(ext, self._defaults[ext]) - for partname in sorted(self._overrides.keys()): - _types_elm.add_override(partname, self._overrides[partname]) - return _types_elm diff --git a/docx/opc/rel.py b/docx/opc/rel.py deleted file mode 100644 index 7dba2af8e..000000000 --- a/docx/opc/rel.py +++ /dev/null @@ -1,170 +0,0 @@ -# encoding: utf-8 - -""" -Relationship-related objects. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .oxml import CT_Relationships - - -class Relationships(dict): - """ - Collection object for |_Relationship| instances, having list semantics. - """ - def __init__(self, baseURI): - super(Relationships, self).__init__() - self._baseURI = baseURI - self._target_parts_by_rId = {} - - def add_relationship(self, reltype, target, rId, is_external=False): - """ - Return a newly added |_Relationship| instance. - """ - rel = _Relationship(rId, reltype, target, self._baseURI, is_external) - self[rId] = rel - if not is_external: - self._target_parts_by_rId[rId] = target - return rel - - def get_or_add(self, reltype, target_part): - """ - Return relationship of *reltype* to *target_part*, newly added if not - already present in collection. - """ - rel = self._get_matching(reltype, target_part) - if rel is None: - rId = self._next_rId - rel = self.add_relationship(reltype, target_part, rId) - return rel - - def get_or_add_ext_rel(self, reltype, target_ref): - """ - Return rId of external relationship of *reltype* to *target_ref*, - newly added if not already present in collection. - """ - rel = self._get_matching(reltype, target_ref, is_external=True) - if rel is None: - rId = self._next_rId - rel = self.add_relationship( - reltype, target_ref, rId, is_external=True - ) - return rel.rId - - def part_with_reltype(self, reltype): - """ - Return target part of rel with matching *reltype*, raising |KeyError| - if not found and |ValueError| if more than one matching relationship - is found. - """ - rel = self._get_rel_of_type(reltype) - return rel.target_part - - @property - def related_parts(self): - """ - dict mapping rIds to target parts for all the internal relationships - in the collection. - """ - return self._target_parts_by_rId - - @property - def xml(self): - """ - Serialize this relationship collection into XML suitable for storage - as a .rels file in an OPC package. - """ - rels_elm = CT_Relationships.new() - for rel in self.values(): - rels_elm.add_rel( - rel.rId, rel.reltype, rel.target_ref, rel.is_external - ) - return rels_elm.xml - - def _get_matching(self, reltype, target, is_external=False): - """ - Return relationship of matching *reltype*, *target*, and - *is_external* from collection, or None if not found. - """ - def matches(rel, reltype, target, is_external): - if rel.reltype != reltype: - return False - if rel.is_external != is_external: - return False - rel_target = rel.target_ref if rel.is_external else rel.target_part - if rel_target != target: - return False - return True - - for rel in self.values(): - if matches(rel, reltype, target, is_external): - return rel - return None - - def _get_rel_of_type(self, reltype): - """ - Return single relationship of type *reltype* from the collection. - Raises |KeyError| if no matching relationship is found. Raises - |ValueError| if more than one matching relationship is found. - """ - matching = [rel for rel in self.values() if rel.reltype == reltype] - if len(matching) == 0: - tmpl = "no relationship of type '%s' in collection" - raise KeyError(tmpl % reltype) - if len(matching) > 1: - tmpl = "multiple relationships of type '%s' in collection" - raise ValueError(tmpl % reltype) - return matching[0] - - @property - def _next_rId(self): - """ - Next available rId in collection, starting from 'rId1' and making use - of any gaps in numbering, e.g. 'rId2' for rIds ['rId1', 'rId3']. - """ - for n in range(1, len(self)+2): - rId_candidate = 'rId%d' % n # like 'rId19' - if rId_candidate not in self: - return rId_candidate - - -class _Relationship(object): - """ - Value object for relationship to part. - """ - def __init__(self, rId, reltype, target, baseURI, external=False): - super(_Relationship, self).__init__() - self._rId = rId - self._reltype = reltype - self._target = target - self._baseURI = baseURI - self._is_external = bool(external) - - @property - def is_external(self): - return self._is_external - - @property - def reltype(self): - return self._reltype - - @property - def rId(self): - return self._rId - - @property - def target_part(self): - if self._is_external: - raise ValueError("target_part property on _Relationship is undef" - "ined when target mode is External") - return self._target - - @property - def target_ref(self): - if self._is_external: - return self._target - else: - return self._target.partname.relative_ref(self._baseURI) diff --git a/docx/opc/shared.py b/docx/opc/shared.py deleted file mode 100644 index 55344483d..000000000 --- a/docx/opc/shared.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by opc modules. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class CaseInsensitiveDict(dict): - """ - Mapping type that behaves like dict except that it matches without respect - to the case of the key. E.g. cid['A'] == cid['a']. Note this is not - general-purpose, just complete enough to satisfy opc package needs. It - assumes str keys, and that it is created empty; keys passed in constructor - are not accounted for - """ - def __contains__(self, key): - return super(CaseInsensitiveDict, self).__contains__(key.lower()) - - def __getitem__(self, key): - return super(CaseInsensitiveDict, self).__getitem__(key.lower()) - - def __setitem__(self, key, value): - return super(CaseInsensitiveDict, self).__setitem__( - key.lower(), value - ) - - -def lazyproperty(f): - """ - @lazyprop decorator. Decorated method will be called only on first access - to calculate a cached property value. After that, the cached value is - returned. - """ - cache_attr_name = '_%s' % f.__name__ # like '_foobar' for prop 'foobar' - docstring = f.__doc__ - - def get_prop_value(obj): - try: - return getattr(obj, cache_attr_name) - except AttributeError: - value = f(obj) - setattr(obj, cache_attr_name, value) - return value - - return property(get_prop_value, doc=docstring) diff --git a/docx/opc/spec.py b/docx/opc/spec.py deleted file mode 100644 index 60fc38564..000000000 --- a/docx/opc/spec.py +++ /dev/null @@ -1,29 +0,0 @@ -# encoding: utf-8 - -""" -Provides mappings that embody aspects of the Open XML spec ISO/IEC 29500. -""" - -from .constants import CONTENT_TYPE as CT - - -default_content_types = ( - ('bin', CT.PML_PRINTER_SETTINGS), - ('bin', CT.SML_PRINTER_SETTINGS), - ('bin', CT.WML_PRINTER_SETTINGS), - ('bmp', CT.BMP), - ('emf', CT.X_EMF), - ('fntdata', CT.X_FONTDATA), - ('gif', CT.GIF), - ('jpe', CT.JPEG), - ('jpeg', CT.JPEG), - ('jpg', CT.JPEG), - ('png', CT.PNG), - ('rels', CT.OPC_RELATIONSHIPS), - ('tif', CT.TIFF), - ('tiff', CT.TIFF), - ('wdp', CT.MS_PHOTO), - ('wmf', CT.X_WMF), - ('xlsx', CT.SML_SHEET), - ('xml', CT.XML), -) diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py deleted file mode 100644 index 528b1eac7..000000000 --- a/docx/oxml/__init__.py +++ /dev/null @@ -1,202 +0,0 @@ -# encoding: utf-8 - -""" -Initializes oxml sub-package, including registering custom element classes -corresponding to Open XML elements. -""" - -from __future__ import absolute_import - -from lxml import etree - -from .ns import NamespacePrefixedTag, nsmap - - -# configure XML parser -element_class_lookup = etree.ElementNamespaceClassLookup() -oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False) -oxml_parser.set_element_class_lookup(element_class_lookup) - - -def parse_xml(xml): - """ - Return root lxml element obtained by parsing XML character string in - *xml*, which can be either a Python 2.x string or unicode. The custom - parser is used, so custom element classes are produced for elements in - *xml* that have them. - """ - root_element = etree.fromstring(xml, oxml_parser) - return root_element - - -def register_element_cls(tag, cls): - """ - Register *cls* to be constructed when the oxml parser encounters an - element with matching *tag*. *tag* is a string of the form - ``nspfx:tagroot``, e.g. ``'w:document'``. - """ - nspfx, tagroot = tag.split(':') - namespace = element_class_lookup.get_namespace(nsmap[nspfx]) - namespace[tagroot] = cls - - -def OxmlElement(nsptag_str, attrs=None, nsdecls=None): - """ - Return a 'loose' lxml element having the tag specified by *nsptag_str*. - *nsptag_str* must contain the standard namespace prefix, e.g. 'a:tbl'. - The resulting element is an instance of the custom element class for this - tag name if one is defined. A dictionary of attribute values may be - provided as *attrs*; they are set if present. All namespaces defined in - the dict *nsdecls* are declared in the element using the key as the - prefix and the value as the namespace name. If *nsdecls* is not provided, - a single namespace declaration is added based on the prefix on - *nsptag_str*. - """ - nsptag = NamespacePrefixedTag(nsptag_str) - if nsdecls is None: - nsdecls = nsptag.nsmap - return oxml_parser.makeelement( - nsptag.clark_name, attrib=attrs, nsmap=nsdecls - ) - - -# =========================================================================== -# custom element class mappings -# =========================================================================== - -from .shared import CT_DecimalNumber, CT_OnOff, CT_String - - -from .coreprops import CT_CoreProperties -register_element_cls('cp:coreProperties', CT_CoreProperties) - -from .document import CT_Body, CT_Document -register_element_cls('w:body', CT_Body) -register_element_cls('w:document', CT_Document) - -from .numbering import ( - CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr -) -register_element_cls('w:abstractNumId', CT_DecimalNumber) -register_element_cls('w:ilvl', CT_DecimalNumber) -register_element_cls('w:lvlOverride', CT_NumLvl) -register_element_cls('w:num', CT_Num) -register_element_cls('w:numId', CT_DecimalNumber) -register_element_cls('w:numPr', CT_NumPr) -register_element_cls('w:numbering', CT_Numbering) -register_element_cls('w:startOverride', CT_DecimalNumber) - -from .section import CT_PageMar, CT_PageSz, CT_SectPr, CT_SectType -register_element_cls('w:pgMar', CT_PageMar) -register_element_cls('w:pgSz', CT_PageSz) -register_element_cls('w:sectPr', CT_SectPr) -register_element_cls('w:type', CT_SectType) - -from .shape import ( - CT_Blip, CT_BlipFillProperties, CT_GraphicalObject, - CT_GraphicalObjectData, CT_Inline, CT_NonVisualDrawingProps, CT_Picture, - CT_PictureNonVisual, CT_Point2D, CT_PositiveSize2D, CT_ShapeProperties, - CT_Transform2D -) -register_element_cls('a:blip', CT_Blip) -register_element_cls('a:ext', CT_PositiveSize2D) -register_element_cls('a:graphic', CT_GraphicalObject) -register_element_cls('a:graphicData', CT_GraphicalObjectData) -register_element_cls('a:off', CT_Point2D) -register_element_cls('a:xfrm', CT_Transform2D) -register_element_cls('pic:blipFill', CT_BlipFillProperties) -register_element_cls('pic:cNvPr', CT_NonVisualDrawingProps) -register_element_cls('pic:nvPicPr', CT_PictureNonVisual) -register_element_cls('pic:pic', CT_Picture) -register_element_cls('pic:spPr', CT_ShapeProperties) -register_element_cls('wp:docPr', CT_NonVisualDrawingProps) -register_element_cls('wp:extent', CT_PositiveSize2D) -register_element_cls('wp:inline', CT_Inline) - -from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles -register_element_cls('w:basedOn', CT_String) -register_element_cls('w:latentStyles', CT_LatentStyles) -register_element_cls('w:locked', CT_OnOff) -register_element_cls('w:lsdException', CT_LsdException) -register_element_cls('w:name', CT_String) -register_element_cls('w:next', CT_String) -register_element_cls('w:qFormat', CT_OnOff) -register_element_cls('w:semiHidden', CT_OnOff) -register_element_cls('w:style', CT_Style) -register_element_cls('w:styles', CT_Styles) -register_element_cls('w:uiPriority', CT_DecimalNumber) -register_element_cls('w:unhideWhenUsed', CT_OnOff) - -from .table import ( - CT_Row, CT_Tbl, CT_TblGrid, CT_TblGridCol, CT_TblLayoutType, CT_TblPr, - CT_TblWidth, CT_Tc, CT_TcPr, CT_VMerge -) -register_element_cls('w:bidiVisual', CT_OnOff) -register_element_cls('w:gridCol', CT_TblGridCol) -register_element_cls('w:gridSpan', CT_DecimalNumber) -register_element_cls('w:tbl', CT_Tbl) -register_element_cls('w:tblGrid', CT_TblGrid) -register_element_cls('w:tblLayout', CT_TblLayoutType) -register_element_cls('w:tblPr', CT_TblPr) -register_element_cls('w:tblStyle', CT_String) -register_element_cls('w:tc', CT_Tc) -register_element_cls('w:tcPr', CT_TcPr) -register_element_cls('w:tcW', CT_TblWidth) -register_element_cls('w:tr', CT_Row) -register_element_cls('w:vMerge', CT_VMerge) - -from .text.font import ( - CT_Color, CT_Fonts, CT_Highlight, CT_HpsMeasure, CT_RPr, CT_Underline, - CT_VerticalAlignRun -) -register_element_cls('w:b', CT_OnOff) -register_element_cls('w:bCs', CT_OnOff) -register_element_cls('w:caps', CT_OnOff) -register_element_cls('w:color', CT_Color) -register_element_cls('w:cs', CT_OnOff) -register_element_cls('w:dstrike', CT_OnOff) -register_element_cls('w:emboss', CT_OnOff) -register_element_cls('w:highlight', CT_Highlight) -register_element_cls('w:i', CT_OnOff) -register_element_cls('w:iCs', CT_OnOff) -register_element_cls('w:imprint', CT_OnOff) -register_element_cls('w:noProof', CT_OnOff) -register_element_cls('w:oMath', CT_OnOff) -register_element_cls('w:outline', CT_OnOff) -register_element_cls('w:rFonts', CT_Fonts) -register_element_cls('w:rPr', CT_RPr) -register_element_cls('w:rStyle', CT_String) -register_element_cls('w:rtl', CT_OnOff) -register_element_cls('w:shadow', CT_OnOff) -register_element_cls('w:smallCaps', CT_OnOff) -register_element_cls('w:snapToGrid', CT_OnOff) -register_element_cls('w:specVanish', CT_OnOff) -register_element_cls('w:strike', CT_OnOff) -register_element_cls('w:sz', CT_HpsMeasure) -register_element_cls('w:u', CT_Underline) -register_element_cls('w:vanish', CT_OnOff) -register_element_cls('w:vertAlign', CT_VerticalAlignRun) -register_element_cls('w:webHidden', CT_OnOff) - -from .text.paragraph import CT_P -register_element_cls('w:p', CT_P) - -from .text.parfmt import ( - CT_Ind, CT_Jc, CT_PPr, CT_Spacing, CT_TabStop, CT_TabStops -) -register_element_cls('w:ind', CT_Ind) -register_element_cls('w:jc', CT_Jc) -register_element_cls('w:keepLines', CT_OnOff) -register_element_cls('w:keepNext', CT_OnOff) -register_element_cls('w:pageBreakBefore', CT_OnOff) -register_element_cls('w:pPr', CT_PPr) -register_element_cls('w:pStyle', CT_String) -register_element_cls('w:spacing', CT_Spacing) -register_element_cls('w:tab', CT_TabStop) -register_element_cls('w:tabs', CT_TabStops) -register_element_cls('w:widowControl', CT_OnOff) - -from .text.run import CT_Br, CT_R, CT_Text -register_element_cls('w:br', CT_Br) -register_element_cls('w:r', CT_R) -register_element_cls('w:t', CT_Text) diff --git a/docx/oxml/coreprops.py b/docx/oxml/coreprops.py deleted file mode 100644 index b53807443..000000000 --- a/docx/oxml/coreprops.py +++ /dev/null @@ -1,318 +0,0 @@ -# encoding: utf-8 - -""" -lxml custom element classes for core properties-related XML elements. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import re - -from datetime import datetime, timedelta - -from . import parse_xml -from .ns import nsdecls, qn -from .xmlchemy import BaseOxmlElement, ZeroOrOne - - -class CT_CoreProperties(BaseOxmlElement): - """ - ```` element, the root element of the Core Properties - part stored as ``/docProps/core.xml``. Implements many of the Dublin Core - document metadata elements. String elements resolve to an empty string - ('') if the element is not present in the XML. String elements are - limited in length to 255 unicode characters. - """ - category = ZeroOrOne('cp:category', successors=()) - contentStatus = ZeroOrOne('cp:contentStatus', successors=()) - created = ZeroOrOne('dcterms:created', successors=()) - creator = ZeroOrOne('dc:creator', successors=()) - description = ZeroOrOne('dc:description', successors=()) - identifier = ZeroOrOne('dc:identifier', successors=()) - keywords = ZeroOrOne('cp:keywords', successors=()) - language = ZeroOrOne('dc:language', successors=()) - lastModifiedBy = ZeroOrOne('cp:lastModifiedBy', successors=()) - lastPrinted = ZeroOrOne('cp:lastPrinted', successors=()) - modified = ZeroOrOne('dcterms:modified', successors=()) - revision = ZeroOrOne('cp:revision', successors=()) - subject = ZeroOrOne('dc:subject', successors=()) - title = ZeroOrOne('dc:title', successors=()) - version = ZeroOrOne('cp:version', successors=()) - - _coreProperties_tmpl = ( - '\n' % nsdecls('cp', 'dc', 'dcterms') - ) - - @classmethod - def new(cls): - """ - Return a new ```` element - """ - xml = cls._coreProperties_tmpl - coreProperties = parse_xml(xml) - return coreProperties - - @property - def author_text(self): - """ - The text in the `dc:creator` child element. - """ - return self._text_of_element('creator') - - @author_text.setter - def author_text(self, value): - self._set_element_text('creator', value) - - @property - def category_text(self): - return self._text_of_element('category') - - @category_text.setter - def category_text(self, value): - self._set_element_text('category', value) - - @property - def comments_text(self): - return self._text_of_element('description') - - @comments_text.setter - def comments_text(self, value): - self._set_element_text('description', value) - - @property - def contentStatus_text(self): - return self._text_of_element('contentStatus') - - @contentStatus_text.setter - def contentStatus_text(self, value): - self._set_element_text('contentStatus', value) - - @property - def created_datetime(self): - return self._datetime_of_element('created') - - @created_datetime.setter - def created_datetime(self, value): - self._set_element_datetime('created', value) - - @property - def identifier_text(self): - return self._text_of_element('identifier') - - @identifier_text.setter - def identifier_text(self, value): - self._set_element_text('identifier', value) - - @property - def keywords_text(self): - return self._text_of_element('keywords') - - @keywords_text.setter - def keywords_text(self, value): - self._set_element_text('keywords', value) - - @property - def language_text(self): - return self._text_of_element('language') - - @language_text.setter - def language_text(self, value): - self._set_element_text('language', value) - - @property - def lastModifiedBy_text(self): - return self._text_of_element('lastModifiedBy') - - @lastModifiedBy_text.setter - def lastModifiedBy_text(self, value): - self._set_element_text('lastModifiedBy', value) - - @property - def lastPrinted_datetime(self): - return self._datetime_of_element('lastPrinted') - - @lastPrinted_datetime.setter - def lastPrinted_datetime(self, value): - self._set_element_datetime('lastPrinted', value) - - @property - def modified_datetime(self): - return self._datetime_of_element('modified') - - @modified_datetime.setter - def modified_datetime(self, value): - self._set_element_datetime('modified', value) - - @property - def revision_number(self): - """ - Integer value of revision property. - """ - revision = self.revision - if revision is None: - return 0 - revision_str = revision.text - try: - revision = int(revision_str) - except ValueError: - # non-integer revision strings also resolve to 0 - revision = 0 - # as do negative integers - if revision < 0: - revision = 0 - return revision - - @revision_number.setter - def revision_number(self, value): - """ - Set revision property to string value of integer *value*. - """ - if not isinstance(value, int) or value < 1: - tmpl = "revision property requires positive int, got '%s'" - raise ValueError(tmpl % value) - revision = self.get_or_add_revision() - revision.text = str(value) - - @property - def subject_text(self): - return self._text_of_element('subject') - - @subject_text.setter - def subject_text(self, value): - self._set_element_text('subject', value) - - @property - def title_text(self): - return self._text_of_element('title') - - @title_text.setter - def title_text(self, value): - self._set_element_text('title', value) - - @property - def version_text(self): - return self._text_of_element('version') - - @version_text.setter - def version_text(self, value): - self._set_element_text('version', value) - - def _datetime_of_element(self, property_name): - element = getattr(self, property_name) - if element is None: - return None - datetime_str = element.text - try: - return self._parse_W3CDTF_to_datetime(datetime_str) - except ValueError: - # invalid datetime strings are ignored - return None - - def _get_or_add(self, prop_name): - """ - Return element returned by 'get_or_add_' method for *prop_name*. - """ - get_or_add_method_name = 'get_or_add_%s' % prop_name - get_or_add_method = getattr(self, get_or_add_method_name) - element = get_or_add_method() - return element - - @classmethod - def _offset_dt(cls, dt, offset_str): - """ - Return a |datetime| instance that is offset from datetime *dt* by - the timezone offset specified in *offset_str*, a string like - ``'-07:00'``. - """ - match = cls._offset_pattern.match(offset_str) - if match is None: - raise ValueError( - "'%s' is not a valid offset string" % offset_str - ) - sign, hours_str, minutes_str = match.groups() - sign_factor = -1 if sign == '+' else 1 - hours = int(hours_str) * sign_factor - minutes = int(minutes_str) * sign_factor - td = timedelta(hours=hours, minutes=minutes) - return dt + td - - _offset_pattern = re.compile('([+-])(\d\d):(\d\d)') - - @classmethod - def _parse_W3CDTF_to_datetime(cls, w3cdtf_str): - # valid W3CDTF date cases: - # yyyy e.g. '2003' - # yyyy-mm e.g. '2003-12' - # yyyy-mm-dd e.g. '2003-12-31' - # UTC timezone e.g. '2003-12-31T10:14:55Z' - # numeric timezone e.g. '2003-12-31T10:14:55-08:00' - templates = ( - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%d', - '%Y-%m', - '%Y', - ) - # strptime isn't smart enough to parse literal timezone offsets like - # '-07:30', so we have to do it ourselves - parseable_part = w3cdtf_str[:19] - offset_str = w3cdtf_str[19:] - dt = None - for tmpl in templates: - try: - dt = datetime.strptime(parseable_part, tmpl) - except ValueError: - continue - if dt is None: - tmpl = "could not parse W3CDTF datetime string '%s'" - raise ValueError(tmpl % w3cdtf_str) - if len(offset_str) == 6: - return cls._offset_dt(dt, offset_str) - return dt - - def _set_element_datetime(self, prop_name, value): - """ - Set date/time value of child element having *prop_name* to *value*. - """ - if not isinstance(value, datetime): - tmpl = ( - "property requires object, got %s" - ) - raise ValueError(tmpl % type(value)) - element = self._get_or_add(prop_name) - dt_str = value.strftime('%Y-%m-%dT%H:%M:%SZ') - element.text = dt_str - if prop_name in ('created', 'modified'): - # These two require an explicit 'xsi:type="dcterms:W3CDTF"' - # attribute. The first and last line are a hack required to add - # the xsi namespace to the root element rather than each child - # element in which it is referenced - self.set(qn('xsi:foo'), 'bar') - element.set(qn('xsi:type'), 'dcterms:W3CDTF') - del self.attrib[qn('xsi:foo')] - - def _set_element_text(self, prop_name, value): - """ - Set string value of *name* property to *value*. - """ - value = str(value) - if len(value) > 255: - tmpl = ( - "exceeded 255 char limit for property, got:\n\n'%s'" - ) - raise ValueError(tmpl % value) - element = self._get_or_add(prop_name) - element.text = value - - def _text_of_element(self, property_name): - """ - Return the text in the element matching *property_name*, or an empty - string if the element is not present or contains no text. - """ - element = getattr(self, property_name) - if element is None: - return '' - if element.text is None: - return '' - return element.text diff --git a/docx/oxml/document.py b/docx/oxml/document.py deleted file mode 100644 index e1cb4ac55..000000000 --- a/docx/oxml/document.py +++ /dev/null @@ -1,59 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes that correspond to the document part, e.g. -. -""" - -from .xmlchemy import BaseOxmlElement, ZeroOrOne, ZeroOrMore - - -class CT_Document(BaseOxmlElement): - """ - ```` element, the root element of a document.xml file. - """ - body = ZeroOrOne('w:body') - - @property - def sectPr_lst(self): - """ - Return a list containing a reference to each ```` element - in the document, in the order encountered. - """ - return self.xpath('.//w:sectPr') - - -class CT_Body(BaseOxmlElement): - """ - ````, the container element for the main document story in - ``document.xml``. - """ - p = ZeroOrMore('w:p', successors=('w:sectPr',)) - tbl = ZeroOrMore('w:tbl', successors=('w:sectPr',)) - sectPr = ZeroOrOne('w:sectPr', successors=()) - - def add_section_break(self): - """ - Return the current ```` element after adding a clone of it - in a new ```` element appended to the block content elements. - Note that the "current" ```` will always be the sentinel - sectPr in this case since we're always working at the end of the - block content. - """ - sentinel_sectPr = self.get_or_add_sectPr() - cloned_sectPr = sentinel_sectPr.clone() - p = self.add_p() - p.set_sectPr(cloned_sectPr) - return sentinel_sectPr - - def clear_content(self): - """ - Remove all content child elements from this element. Leave - the element if it is present. - """ - if self.sectPr is not None: - content_elms = self[:-1] - else: - content_elms = self[:] - for content_elm in content_elms: - self.remove(content_elm) diff --git a/docx/oxml/exceptions.py b/docx/oxml/exceptions.py deleted file mode 100644 index 4696f1e93..000000000 --- a/docx/oxml/exceptions.py +++ /dev/null @@ -1,16 +0,0 @@ -# encoding: utf-8 - -""" -Exceptions for oxml sub-package -""" - - -class XmlchemyError(Exception): - """Generic error class.""" - - -class InvalidXmlError(XmlchemyError): - """ - Raised when invalid XML is encountered, such as on attempt to access a - missing required child element - """ diff --git a/docx/oxml/ns.py b/docx/oxml/ns.py deleted file mode 100644 index e6f6a4acc..000000000 --- a/docx/oxml/ns.py +++ /dev/null @@ -1,114 +0,0 @@ -# encoding: utf-8 - -""" -Namespace-related objects. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -nsmap = { - 'a': ('http://schemas.openxmlformats.org/drawingml/2006/main'), - 'c': ('http://schemas.openxmlformats.org/drawingml/2006/chart'), - 'cp': ('http://schemas.openxmlformats.org/package/2006/metadata/core-pr' - 'operties'), - 'dc': ('http://purl.org/dc/elements/1.1/'), - 'dcmitype': ('http://purl.org/dc/dcmitype/'), - 'dcterms': ('http://purl.org/dc/terms/'), - 'dgm': ('http://schemas.openxmlformats.org/drawingml/2006/diagram'), - 'pic': ('http://schemas.openxmlformats.org/drawingml/2006/picture'), - 'r': ('http://schemas.openxmlformats.org/officeDocument/2006/relations' - 'hips'), - 'w': ('http://schemas.openxmlformats.org/wordprocessingml/2006/main'), - 'wp': ('http://schemas.openxmlformats.org/drawingml/2006/wordprocessing' - 'Drawing'), - 'xml': ('http://www.w3.org/XML/1998/namespace'), - 'xsi': ('http://www.w3.org/2001/XMLSchema-instance'), -} - -pfxmap = dict((value, key) for key, value in nsmap.items()) - - -class NamespacePrefixedTag(str): - """ - Value object that knows the semantics of an XML tag having a namespace - prefix. - """ - def __new__(cls, nstag, *args): - return super(NamespacePrefixedTag, cls).__new__(cls, nstag) - - def __init__(self, nstag): - self._pfx, self._local_part = nstag.split(':') - self._ns_uri = nsmap[self._pfx] - - @property - def clark_name(self): - return '{%s}%s' % (self._ns_uri, self._local_part) - - @classmethod - def from_clark_name(cls, clark_name): - nsuri, local_name = clark_name[1:].split('}') - nstag = '%s:%s' % (pfxmap[nsuri], local_name) - return cls(nstag) - - @property - def local_part(self): - """ - Return the local part of the tag as a string. E.g. 'foobar' is - returned for tag 'f:foobar'. - """ - return self._local_part - - @property - def nsmap(self): - """ - Return a dict having a single member, mapping the namespace prefix of - this tag to it's namespace name (e.g. {'f': 'http://foo/bar'}). This - is handy for passing to xpath calls and other uses. - """ - return {self._pfx: self._ns_uri} - - @property - def nspfx(self): - """ - Return the string namespace prefix for the tag, e.g. 'f' is returned - for tag 'f:foobar'. - """ - return self._pfx - - @property - def nsuri(self): - """ - Return the namespace URI for the tag, e.g. 'http://foo/bar' would be - returned for tag 'f:foobar' if the 'f' prefix maps to - 'http://foo/bar' in nsmap. - """ - return self._ns_uri - - -def nsdecls(*prefixes): - """ - Return a string containing a namespace declaration for each of the - namespace prefix strings, e.g. 'p', 'ct', passed as *prefixes*. - """ - return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes]) - - -def nspfxmap(*nspfxs): - """ - Return a dict containing the subset namespace prefix mappings specified by - *nspfxs*. Any number of namespace prefixes can be supplied, e.g. - namespaces('a', 'r', 'p'). - """ - return dict((pfx, nsmap[pfx]) for pfx in nspfxs) - - -def qn(tag): - """ - Stands for "qualified name", a utility function to turn a namespace - prefixed tag name into a Clark-notation qualified tag name for lxml. For - example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``. - """ - prefix, tagroot = tag.split(':') - uri = nsmap[prefix] - return '{%s}%s' % (uri, tagroot) diff --git a/docx/oxml/numbering.py b/docx/oxml/numbering.py deleted file mode 100644 index aeedfa9a0..000000000 --- a/docx/oxml/numbering.py +++ /dev/null @@ -1,131 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to the numbering part -""" - -from . import OxmlElement -from .shared import CT_DecimalNumber -from .simpletypes import ST_DecimalNumber -from .xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, RequiredAttribute, ZeroOrMore, ZeroOrOne -) - - -class CT_Num(BaseOxmlElement): - """ - ```` element, which represents a concrete list definition - instance, having a required child that references an - abstract numbering definition that defines most of the formatting details. - """ - abstractNumId = OneAndOnlyOne('w:abstractNumId') - lvlOverride = ZeroOrMore('w:lvlOverride') - numId = RequiredAttribute('w:numId', ST_DecimalNumber) - - def add_lvlOverride(self, ilvl): - """ - Return a newly added CT_NumLvl () element having its - ``ilvl`` attribute set to *ilvl*. - """ - return self._add_lvlOverride(ilvl=ilvl) - - @classmethod - def new(cls, num_id, abstractNum_id): - """ - Return a new ```` element having numId of *num_id* and having - a ```` child with val attribute set to - *abstractNum_id*. - """ - num = OxmlElement('w:num') - num.numId = num_id - abstractNumId = CT_DecimalNumber.new( - 'w:abstractNumId', abstractNum_id - ) - num.append(abstractNumId) - return num - - -class CT_NumLvl(BaseOxmlElement): - """ - ```` element, which identifies a level in a list - definition to override with settings it contains. - """ - startOverride = ZeroOrOne('w:startOverride', successors=('w:lvl',)) - ilvl = RequiredAttribute('w:ilvl', ST_DecimalNumber) - - def add_startOverride(self, val): - """ - Return a newly added CT_DecimalNumber element having tagname - ``w:startOverride`` and ``val`` attribute set to *val*. - """ - return self._add_startOverride(val=val) - - -class CT_NumPr(BaseOxmlElement): - """ - A ```` element, a container for numbering properties applied to - a paragraph. - """ - ilvl = ZeroOrOne('w:ilvl', successors=( - 'w:numId', 'w:numberingChange', 'w:ins' - )) - numId = ZeroOrOne('w:numId', successors=('w:numberingChange', 'w:ins')) - - # @ilvl.setter - # def _set_ilvl(self, val): - # """ - # Get or add a child and set its ``w:val`` attribute to *val*. - # """ - # ilvl = self.get_or_add_ilvl() - # ilvl.val = val - - # @numId.setter - # def numId(self, val): - # """ - # Get or add a child and set its ``w:val`` attribute to - # *val*. - # """ - # numId = self.get_or_add_numId() - # numId.val = val - - -class CT_Numbering(BaseOxmlElement): - """ - ```` element, the root element of a numbering part, i.e. - numbering.xml - """ - num = ZeroOrMore('w:num', successors=('w:numIdMacAtCleanup',)) - - def add_num(self, abstractNum_id): - """ - Return a newly added CT_Num () element referencing the - abstract numbering definition identified by *abstractNum_id*. - """ - next_num_id = self._next_numId - num = CT_Num.new(next_num_id, abstractNum_id) - return self._insert_num(num) - - def num_having_numId(self, numId): - """ - Return the ```` child element having ``numId`` attribute - matching *numId*. - """ - xpath = './w:num[@w:numId="%d"]' % numId - try: - return self.xpath(xpath)[0] - except IndexError: - raise KeyError('no element with numId %d' % numId) - - @property - def _next_numId(self): - """ - The first ``numId`` unused by a ```` element, starting at - 1 and filling any gaps in numbering between existing ```` - elements. - """ - numId_strs = self.xpath('./w:num/@w:numId') - num_ids = [int(numId_str) for numId_str in numId_strs] - for num in range(1, len(num_ids)+2): - if num not in num_ids: - break - return num diff --git a/docx/oxml/section.py b/docx/oxml/section.py deleted file mode 100644 index cf76b67ed..000000000 --- a/docx/oxml/section.py +++ /dev/null @@ -1,264 +0,0 @@ -# encoding: utf-8 - -""" -Section-related custom element classes. -""" - -from __future__ import absolute_import, print_function - -from copy import deepcopy - -from ..enum.section import WD_ORIENTATION, WD_SECTION_START -from .simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure -from .xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrOne - - -class CT_PageMar(BaseOxmlElement): - """ - ```` element, defining page margins. - """ - top = OptionalAttribute('w:top', ST_SignedTwipsMeasure) - right = OptionalAttribute('w:right', ST_TwipsMeasure) - bottom = OptionalAttribute('w:bottom', ST_SignedTwipsMeasure) - left = OptionalAttribute('w:left', ST_TwipsMeasure) - header = OptionalAttribute('w:header', ST_TwipsMeasure) - footer = OptionalAttribute('w:footer', ST_TwipsMeasure) - gutter = OptionalAttribute('w:gutter', ST_TwipsMeasure) - - -class CT_PageSz(BaseOxmlElement): - """ - ```` element, defining page dimensions and orientation. - """ - w = OptionalAttribute('w:w', ST_TwipsMeasure) - h = OptionalAttribute('w:h', ST_TwipsMeasure) - orient = OptionalAttribute( - 'w:orient', WD_ORIENTATION, default=WD_ORIENTATION.PORTRAIT - ) - - -class CT_SectPr(BaseOxmlElement): - """ - ```` element, the container element for section properties. - """ - __child_sequence__ = ( - 'w:footnotePr', 'w:endnotePr', 'w:type', 'w:pgSz', 'w:pgMar', - 'w:paperSrc', 'w:pgBorders', 'w:lnNumType', 'w:pgNumType', 'w:cols', - 'w:formProt', 'w:vAlign', 'w:noEndnote', 'w:titlePg', - 'w:textDirection', 'w:bidi', 'w:rtlGutter', 'w:docGrid', - 'w:printerSettings', 'w:sectPrChange', - ) - type = ZeroOrOne('w:type', successors=( - __child_sequence__[__child_sequence__.index('w:type')+1:] - )) - pgSz = ZeroOrOne('w:pgSz', successors=( - __child_sequence__[__child_sequence__.index('w:pgSz')+1:] - )) - pgMar = ZeroOrOne('w:pgMar', successors=( - __child_sequence__[__child_sequence__.index('w:pgMar')+1:] - )) - - @property - def bottom_margin(self): - """ - The value of the ``w:bottom`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.bottom - - @bottom_margin.setter - def bottom_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.bottom = value - - def clone(self): - """ - Return an exact duplicate of this ```` element tree - suitable for use in adding a section break. All rsid* attributes are - removed from the root ```` element. - """ - clone_sectPr = deepcopy(self) - clone_sectPr.attrib.clear() - return clone_sectPr - - @property - def footer(self): - """ - The value of the ``w:footer`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.footer - - @footer.setter - def footer(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.footer = value - - @property - def gutter(self): - """ - The value of the ``w:gutter`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.gutter - - @gutter.setter - def gutter(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.gutter = value - - @property - def header(self): - """ - The value of the ``w:header`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.header - - @header.setter - def header(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.header = value - - @property - def left_margin(self): - """ - The value of the ``w:left`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.left - - @left_margin.setter - def left_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.left = value - - @property - def right_margin(self): - """ - The value of the ``w:right`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.right - - @right_margin.setter - def right_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.right = value - - @property - def orientation(self): - """ - The member of the ``WD_ORIENTATION`` enumeration corresponding to the - value of the ``orient`` attribute of the ```` child element, - or ``WD_ORIENTATION.PORTRAIT`` if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return WD_ORIENTATION.PORTRAIT - return pgSz.orient - - @orientation.setter - def orientation(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.orient = value - - @property - def page_height(self): - """ - Value in EMU of the ``h`` attribute of the ```` child - element, or |None| if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return None - return pgSz.h - - @page_height.setter - def page_height(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.h = value - - @property - def page_width(self): - """ - Value in EMU of the ``w`` attribute of the ```` child - element, or |None| if not present. - """ - pgSz = self.pgSz - if pgSz is None: - return None - return pgSz.w - - @page_width.setter - def page_width(self, value): - pgSz = self.get_or_add_pgSz() - pgSz.w = value - - @property - def start_type(self): - """ - The member of the ``WD_SECTION_START`` enumeration corresponding to - the value of the ``val`` attribute of the ```` child element, - or ``WD_SECTION_START.NEW_PAGE`` if not present. - """ - type = self.type - if type is None or type.val is None: - return WD_SECTION_START.NEW_PAGE - return type.val - - @start_type.setter - def start_type(self, value): - if value is None or value is WD_SECTION_START.NEW_PAGE: - self._remove_type() - return - type = self.get_or_add_type() - type.val = value - - @property - def top_margin(self): - """ - The value of the ``w:top`` attribute in the ```` child - element, as a |Length| object, or |None| if either the element or the - attribute is not present. - """ - pgMar = self.pgMar - if pgMar is None: - return None - return pgMar.top - - @top_margin.setter - def top_margin(self, value): - pgMar = self.get_or_add_pgMar() - pgMar.top = value - - -class CT_SectType(BaseOxmlElement): - """ - ```` element, defining the section start type. - """ - val = OptionalAttribute('w:val', WD_SECTION_START) diff --git a/docx/oxml/shape.py b/docx/oxml/shape.py deleted file mode 100644 index 77ca7db8a..000000000 --- a/docx/oxml/shape.py +++ /dev/null @@ -1,284 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes for shape-related elements like ```` -""" - -from . import parse_xml -from .ns import nsdecls -from .simpletypes import ( - ST_Coordinate, ST_DrawingElementId, ST_PositiveCoordinate, - ST_RelationshipId, XsdString, XsdToken -) -from .xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, OptionalAttribute, RequiredAttribute, - ZeroOrOne -) - - -class CT_Blip(BaseOxmlElement): - """ - ```` element, specifies image source and adjustments such as - alpha and tint. - """ - embed = OptionalAttribute('r:embed', ST_RelationshipId) - link = OptionalAttribute('r:link', ST_RelationshipId) - - -class CT_BlipFillProperties(BaseOxmlElement): - """ - ```` element, specifies picture properties - """ - blip = ZeroOrOne('a:blip', successors=( - 'a:srcRect', 'a:tile', 'a:stretch' - )) - - -class CT_GraphicalObject(BaseOxmlElement): - """ - ```` element, container for a DrawingML object - """ - graphicData = OneAndOnlyOne('a:graphicData') - - -class CT_GraphicalObjectData(BaseOxmlElement): - """ - ```` element, container for the XML of a DrawingML object - """ - pic = ZeroOrOne('pic:pic') - uri = RequiredAttribute('uri', XsdToken) - - -class CT_Inline(BaseOxmlElement): - """ - ```` element, container for an inline shape. - """ - extent = OneAndOnlyOne('wp:extent') - docPr = OneAndOnlyOne('wp:docPr') - graphic = OneAndOnlyOne('a:graphic') - - @classmethod - def new(cls, cx, cy, shape_id, pic): - """ - Return a new ```` element populated with the values passed - as parameters. - """ - inline = parse_xml(cls._inline_xml()) - inline.extent.cx = cx - inline.extent.cy = cy - inline.docPr.id = shape_id - inline.docPr.name = 'Picture %d' % shape_id - inline.graphic.graphicData.uri = ( - 'http://schemas.openxmlformats.org/drawingml/2006/picture' - ) - inline.graphic.graphicData._insert_pic(pic) - return inline - - @classmethod - def new_pic_inline(cls, shape_id, rId, filename, cx, cy): - """ - Return a new `wp:inline` element containing the `pic:pic` element - specified by the argument values. - """ - pic_id = 0 # Word doesn't seem to use this, but does not omit it - pic = CT_Picture.new(pic_id, filename, rId, cx, cy) - inline = cls.new(cx, cy, shape_id, pic) - inline.graphic.graphicData._insert_pic(pic) - return inline - - @classmethod - def _inline_xml(cls): - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '' % nsdecls('wp', 'a', 'pic', 'r') - ) - - -class CT_NonVisualDrawingProps(BaseOxmlElement): - """ - Used for ```` element, and perhaps others. Specifies the id and - name of a DrawingML drawing. - """ - id = RequiredAttribute('id', ST_DrawingElementId) - name = RequiredAttribute('name', XsdString) - - -class CT_NonVisualPictureProperties(BaseOxmlElement): - """ - ```` element, specifies picture locking and resize - behaviors. - """ - - -class CT_Picture(BaseOxmlElement): - """ - ```` element, a DrawingML picture - """ - nvPicPr = OneAndOnlyOne('pic:nvPicPr') - blipFill = OneAndOnlyOne('pic:blipFill') - spPr = OneAndOnlyOne('pic:spPr') - - @classmethod - def new(cls, pic_id, filename, rId, cx, cy): - """ - Return a new ```` element populated with the minimal - contents required to define a viable picture element, based on the - values passed as parameters. - """ - pic = parse_xml(cls._pic_xml()) - pic.nvPicPr.cNvPr.id = pic_id - pic.nvPicPr.cNvPr.name = filename - pic.blipFill.blip.embed = rId - pic.spPr.cx = cx - pic.spPr.cy = cy - return pic - - @classmethod - def _pic_xml(cls): - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '' % nsdecls('pic', 'a', 'r') - ) - - -class CT_PictureNonVisual(BaseOxmlElement): - """ - ```` element, non-visual picture properties - """ - cNvPr = OneAndOnlyOne('pic:cNvPr') - - -class CT_Point2D(BaseOxmlElement): - """ - Used for ```` element, and perhaps others. Specifies an x, y - coordinate (point). - """ - x = RequiredAttribute('x', ST_Coordinate) - y = RequiredAttribute('y', ST_Coordinate) - - -class CT_PositiveSize2D(BaseOxmlElement): - """ - Used for ```` element, and perhaps others later. Specifies the - size of a DrawingML drawing. - """ - cx = RequiredAttribute('cx', ST_PositiveCoordinate) - cy = RequiredAttribute('cy', ST_PositiveCoordinate) - - -class CT_PresetGeometry2D(BaseOxmlElement): - """ - ```` element, specifies an preset autoshape geometry, such - as ``rect``. - """ - - -class CT_RelativeRect(BaseOxmlElement): - """ - ```` element, specifying picture should fill containing - rectangle shape. - """ - - -class CT_ShapeProperties(BaseOxmlElement): - """ - ```` element, specifies size and shape of picture container. - """ - xfrm = ZeroOrOne('a:xfrm', successors=( - 'a:custGeom', 'a:prstGeom', 'a:ln', 'a:effectLst', 'a:effectDag', - 'a:scene3d', 'a:sp3d', 'a:extLst' - )) - - @property - def cx(self): - """ - Shape width as an instance of Emu, or None if not present. - """ - xfrm = self.xfrm - if xfrm is None: - return None - return xfrm.cx - - @cx.setter - def cx(self, value): - xfrm = self.get_or_add_xfrm() - xfrm.cx = value - - @property - def cy(self): - """ - Shape height as an instance of Emu, or None if not present. - """ - xfrm = self.xfrm - if xfrm is None: - return None - return xfrm.cy - - @cy.setter - def cy(self, value): - xfrm = self.get_or_add_xfrm() - xfrm.cy = value - - -class CT_StretchInfoProperties(BaseOxmlElement): - """ - ```` element, specifies how picture should fill its containing - shape. - """ - - -class CT_Transform2D(BaseOxmlElement): - """ - ```` element, specifies size and shape of picture container. - """ - off = ZeroOrOne('a:off', successors=('a:ext',)) - ext = ZeroOrOne('a:ext', successors=()) - - @property - def cx(self): - ext = self.ext - if ext is None: - return None - return ext.cx - - @cx.setter - def cx(self, value): - ext = self.get_or_add_ext() - ext.cx = value - - @property - def cy(self): - ext = self.ext - if ext is None: - return None - return ext.cy - - @cy.setter - def cy(self, value): - ext = self.get_or_add_ext() - ext.cy = value diff --git a/docx/oxml/shared.py b/docx/oxml/shared.py deleted file mode 100644 index 1e21ba366..000000000 --- a/docx/oxml/shared.py +++ /dev/null @@ -1,55 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by modules in the docx.oxml subpackage. -""" - -from __future__ import absolute_import - -from . import OxmlElement -from .ns import qn -from .simpletypes import ST_DecimalNumber, ST_OnOff, ST_String -from .xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute - - -class CT_DecimalNumber(BaseOxmlElement): - """ - Used for ````, ````, ```` and several - others, containing a text representation of a decimal number (e.g. 42) in - its ``val`` attribute. - """ - val = RequiredAttribute('w:val', ST_DecimalNumber) - - @classmethod - def new(cls, nsptagname, val): - """ - Return a new ``CT_DecimalNumber`` element having tagname *nsptagname* - and ``val`` attribute set to *val*. - """ - return OxmlElement(nsptagname, attrs={qn('w:val'): str(val)}) - - -class CT_OnOff(BaseOxmlElement): - """ - Used for ````, ```` elements and others, containing a bool-ish - string in its ``val`` attribute, xsd:boolean plus 'on' and 'off'. - """ - val = OptionalAttribute('w:val', ST_OnOff, default=True) - - -class CT_String(BaseOxmlElement): - """ - Used for ```` and ```` elements and others, - containing a style name in its ``val`` attribute. - """ - val = RequiredAttribute('w:val', ST_String) - - @classmethod - def new(cls, nsptagname, val): - """ - Return a new ``CT_String`` element with tagname *nsptagname* and - ``val`` attribute set to *val*. - """ - elm = OxmlElement(nsptagname) - elm.val = val - return elm diff --git a/docx/oxml/simpletypes.py b/docx/oxml/simpletypes.py deleted file mode 100644 index 400a23700..000000000 --- a/docx/oxml/simpletypes.py +++ /dev/null @@ -1,409 +0,0 @@ -# encoding: utf-8 - -""" -Simple type classes, providing validation and format translation for values -stored in XML element attributes. Naming generally corresponds to the simple -type in the associated XML schema. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..exceptions import InvalidXmlError -from ..shared import Emu, Pt, RGBColor, Twips - - -class BaseSimpleType(object): - - @classmethod - def from_xml(cls, str_value): - return cls.convert_from_xml(str_value) - - @classmethod - def to_xml(cls, value): - cls.validate(value) - str_value = cls.convert_to_xml(value) - return str_value - - @classmethod - def validate_int(cls, value): - if not isinstance(value, int): - raise TypeError( - "value must be , got %s" % type(value) - ) - - @classmethod - def validate_int_in_range(cls, value, min_inclusive, max_inclusive): - cls.validate_int(value) - if value < min_inclusive or value > max_inclusive: - raise ValueError( - "value must be in range %d to %d inclusive, got %d" % - (min_inclusive, max_inclusive, value) - ) - - @classmethod - def validate_string(cls, value): - if isinstance(value, str): - return value - try: - if isinstance(value, basestring): - return value - except NameError: # means we're on Python 3 - pass - raise TypeError( - "value must be a string, got %s" % type(value) - ) - - -class BaseIntType(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - return int(str_value) - - @classmethod - def convert_to_xml(cls, value): - return str(value) - - @classmethod - def validate(cls, value): - cls.validate_int(value) - - -class BaseStringType(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - return str_value - - @classmethod - def convert_to_xml(cls, value): - return value - - @classmethod - def validate(cls, value): - cls.validate_string(value) - - -class BaseStringEnumerationType(BaseStringType): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - if value not in cls._members: - raise ValueError( - "must be one of %s, got '%s'" % (cls._members, value) - ) - - -class XsdAnyUri(BaseStringType): - """ - There's a regular expression this is supposed to meet but so far thinking - spending cycles on validating wouldn't be worth it for the number of - programming errors it would catch. - """ - - -class XsdBoolean(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - if str_value not in ('1', '0', 'true', 'false'): - raise InvalidXmlError( - "value must be one of '1', '0', 'true' or 'false', got '%s'" - % str_value - ) - return str_value in ('1', 'true') - - @classmethod - def convert_to_xml(cls, value): - return {True: '1', False: '0'}[value] - - @classmethod - def validate(cls, value): - if value not in (True, False): - raise TypeError( - "only True or False (and possibly None) may be assigned, got" - " '%s'" % value - ) - - -class XsdId(BaseStringType): - """ - String that must begin with a letter or underscore and cannot contain any - colons. Not fully validated because not used in external API. - """ - pass - - -class XsdInt(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, -2147483648, 2147483647) - - -class XsdLong(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range( - value, -9223372036854775808, 9223372036854775807 - ) - - -class XsdString(BaseStringType): - pass - - -class XsdStringEnumeration(BaseStringEnumerationType): - """ - Set of enumerated xsd:string values. - """ - - -class XsdToken(BaseStringType): - """ - xsd:string with whitespace collapsing, e.g. multiple spaces reduced to - one, leading and trailing space stripped. - """ - pass - - -class XsdUnsignedInt(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 4294967295) - - -class XsdUnsignedLong(BaseIntType): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 18446744073709551615) - - -class ST_BrClear(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('none', 'left', 'right', 'all') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_BrType(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('page', 'column', 'textWrapping') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_Coordinate(BaseIntType): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Emu(int(str_value)) - - @classmethod - def validate(cls, value): - ST_CoordinateUnqualified.validate(value) - - -class ST_CoordinateUnqualified(XsdLong): - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, -27273042329600, 27273042316900) - - -class ST_DecimalNumber(XsdInt): - pass - - -class ST_DrawingElementId(XsdUnsignedInt): - pass - - -class ST_HexColor(BaseStringType): - - @classmethod - def convert_from_xml(cls, str_value): - if str_value == 'auto': - return ST_HexColorAuto.AUTO - return RGBColor.from_string(str_value) - - @classmethod - def convert_to_xml(cls, value): - """ - Keep alpha hex numerals all uppercase just for consistency. - """ - # expecting 3-tuple of ints in range 0-255 - return '%02X%02X%02X' % value - - @classmethod - def validate(cls, value): - # must be an RGBColor object --- - if not isinstance(value, RGBColor): - raise ValueError( - "rgb color value must be RGBColor object, got %s %s" - % (type(value), value) - ) - - -class ST_HexColorAuto(XsdStringEnumeration): - """ - Value for `w:color/[@val="auto"] attribute setting - """ - AUTO = 'auto' - - _members = (AUTO,) - - -class ST_HpsMeasure(XsdUnsignedLong): - """ - Half-point measure, e.g. 24.0 represents 12.0 points. - """ - @classmethod - def convert_from_xml(cls, str_value): - if 'm' in str_value or 'n' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Pt(int(str_value)/2.0) - - @classmethod - def convert_to_xml(cls, value): - emu = Emu(value) - half_points = int(emu.pt * 2) - return str(half_points) - - -class ST_Merge(XsdStringEnumeration): - """ - Valid values for attribute - """ - CONTINUE = 'continue' - RESTART = 'restart' - - _members = (CONTINUE, RESTART) - - -class ST_OnOff(XsdBoolean): - - @classmethod - def convert_from_xml(cls, str_value): - if str_value not in ('1', '0', 'true', 'false', 'on', 'off'): - raise InvalidXmlError( - "value must be one of '1', '0', 'true', 'false', 'on', or 'o" - "ff', got '%s'" % str_value - ) - return str_value in ('1', 'true', 'on') - - -class ST_PositiveCoordinate(XsdLong): - - @classmethod - def convert_from_xml(cls, str_value): - return Emu(int(str_value)) - - @classmethod - def validate(cls, value): - cls.validate_int_in_range(value, 0, 27273042316900) - - -class ST_RelationshipId(XsdString): - pass - - -class ST_SignedTwipsMeasure(XsdInt): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Twips(int(str_value)) - - @classmethod - def convert_to_xml(cls, value): - emu = Emu(value) - twips = emu.twips - return str(twips) - - -class ST_String(XsdString): - pass - - -class ST_TblLayoutType(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('fixed', 'autofit') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_TblWidth(XsdString): - - @classmethod - def validate(cls, value): - cls.validate_string(value) - valid_values = ('auto', 'dxa', 'nil', 'pct') - if value not in valid_values: - raise ValueError( - "must be one of %s, got '%s'" % (valid_values, value) - ) - - -class ST_TwipsMeasure(XsdUnsignedLong): - - @classmethod - def convert_from_xml(cls, str_value): - if 'i' in str_value or 'm' in str_value or 'p' in str_value: - return ST_UniversalMeasure.convert_from_xml(str_value) - return Twips(int(str_value)) - - @classmethod - def convert_to_xml(cls, value): - emu = Emu(value) - twips = emu.twips - return str(twips) - - -class ST_UniversalMeasure(BaseSimpleType): - - @classmethod - def convert_from_xml(cls, str_value): - float_part, units_part = str_value[:-2], str_value[-2:] - quantity = float(float_part) - multiplier = { - 'mm': 36000, 'cm': 360000, 'in': 914400, 'pt': 12700, - 'pc': 152400, 'pi': 152400 - }[units_part] - emu_value = Emu(int(round(quantity * multiplier))) - return emu_value - - -class ST_VerticalAlignRun(XsdStringEnumeration): - """ - Valid values for `w:vertAlign/@val`. - """ - BASELINE = 'baseline' - SUPERSCRIPT = 'superscript' - SUBSCRIPT = 'subscript' - - _members = (BASELINE, SUPERSCRIPT, SUBSCRIPT) diff --git a/docx/oxml/styles.py b/docx/oxml/styles.py deleted file mode 100644 index 6f27e45eb..000000000 --- a/docx/oxml/styles.py +++ /dev/null @@ -1,351 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to the styles part -""" - -from ..enum.style import WD_STYLE_TYPE -from .simpletypes import ST_DecimalNumber, ST_OnOff, ST_String -from .xmlchemy import ( - BaseOxmlElement, OptionalAttribute, RequiredAttribute, ZeroOrMore, - ZeroOrOne -) - - -def styleId_from_name(name): - """ - Return the style id corresponding to *name*, taking into account - special-case names such as 'Heading 1'. - """ - return { - 'caption': 'Caption', - 'heading 1': 'Heading1', - 'heading 2': 'Heading2', - 'heading 3': 'Heading3', - 'heading 4': 'Heading4', - 'heading 5': 'Heading5', - 'heading 6': 'Heading6', - 'heading 7': 'Heading7', - 'heading 8': 'Heading8', - 'heading 9': 'Heading9', - }.get(name, name.replace(' ', '')) - - -class CT_LatentStyles(BaseOxmlElement): - """ - `w:latentStyles` element, defining behavior defaults for latent styles - and containing `w:lsdException` child elements that each override those - defaults for a named latent style. - """ - lsdException = ZeroOrMore('w:lsdException', successors=()) - - count = OptionalAttribute('w:count', ST_DecimalNumber) - defLockedState = OptionalAttribute('w:defLockedState', ST_OnOff) - defQFormat = OptionalAttribute('w:defQFormat', ST_OnOff) - defSemiHidden = OptionalAttribute('w:defSemiHidden', ST_OnOff) - defUIPriority = OptionalAttribute('w:defUIPriority', ST_DecimalNumber) - defUnhideWhenUsed = OptionalAttribute('w:defUnhideWhenUsed', ST_OnOff) - - def bool_prop(self, attr_name): - """ - Return the boolean value of the attribute having *attr_name*, or - |False| if not present. - """ - value = getattr(self, attr_name) - if value is None: - return False - return value - - def get_by_name(self, name): - """ - Return the `w:lsdException` child having *name*, or |None| if not - found. - """ - found = self.xpath('w:lsdException[@w:name="%s"]' % name) - if not found: - return None - return found[0] - - def set_bool_prop(self, attr_name, value): - """ - Set the on/off attribute having *attr_name* to *value*. - """ - setattr(self, attr_name, bool(value)) - - -class CT_LsdException(BaseOxmlElement): - """ - ```` element, defining override visibility behaviors for - a named latent style. - """ - locked = OptionalAttribute('w:locked', ST_OnOff) - name = RequiredAttribute('w:name', ST_String) - qFormat = OptionalAttribute('w:qFormat', ST_OnOff) - semiHidden = OptionalAttribute('w:semiHidden', ST_OnOff) - uiPriority = OptionalAttribute('w:uiPriority', ST_DecimalNumber) - unhideWhenUsed = OptionalAttribute('w:unhideWhenUsed', ST_OnOff) - - def delete(self): - """ - Remove this `w:lsdException` element from the XML document. - """ - self.getparent().remove(self) - - def on_off_prop(self, attr_name): - """ - Return the boolean value of the attribute having *attr_name*, or - |None| if not present. - """ - return getattr(self, attr_name) - - def set_on_off_prop(self, attr_name, value): - """ - Set the on/off attribute having *attr_name* to *value*. - """ - setattr(self, attr_name, value) - - -class CT_Style(BaseOxmlElement): - """ - A ```` element, representing a style definition - """ - _tag_seq = ( - 'w:name', 'w:aliases', 'w:basedOn', 'w:next', 'w:link', - 'w:autoRedefine', 'w:hidden', 'w:uiPriority', 'w:semiHidden', - 'w:unhideWhenUsed', 'w:qFormat', 'w:locked', 'w:personal', - 'w:personalCompose', 'w:personalReply', 'w:rsid', 'w:pPr', 'w:rPr', - 'w:tblPr', 'w:trPr', 'w:tcPr', 'w:tblStylePr' - ) - name = ZeroOrOne('w:name', successors=_tag_seq[1:]) - basedOn = ZeroOrOne('w:basedOn', successors=_tag_seq[3:]) - next = ZeroOrOne('w:next', successors=_tag_seq[4:]) - uiPriority = ZeroOrOne('w:uiPriority', successors=_tag_seq[8:]) - semiHidden = ZeroOrOne('w:semiHidden', successors=_tag_seq[9:]) - unhideWhenUsed = ZeroOrOne('w:unhideWhenUsed', successors=_tag_seq[10:]) - qFormat = ZeroOrOne('w:qFormat', successors=_tag_seq[11:]) - locked = ZeroOrOne('w:locked', successors=_tag_seq[12:]) - pPr = ZeroOrOne('w:pPr', successors=_tag_seq[17:]) - rPr = ZeroOrOne('w:rPr', successors=_tag_seq[18:]) - del _tag_seq - - type = OptionalAttribute('w:type', WD_STYLE_TYPE) - styleId = OptionalAttribute('w:styleId', ST_String) - default = OptionalAttribute('w:default', ST_OnOff) - customStyle = OptionalAttribute('w:customStyle', ST_OnOff) - - @property - def basedOn_val(self): - """ - Value of `w:basedOn/@w:val` or |None| if not present. - """ - basedOn = self.basedOn - if basedOn is None: - return None - return basedOn.val - - @basedOn_val.setter - def basedOn_val(self, value): - if value is None: - self._remove_basedOn() - else: - self.get_or_add_basedOn().val = value - - @property - def base_style(self): - """ - Sibling CT_Style element this style is based on or |None| if no base - style or base style not found. - """ - basedOn = self.basedOn - if basedOn is None: - return None - styles = self.getparent() - base_style = styles.get_by_id(basedOn.val) - if base_style is None: - return None - return base_style - - def delete(self): - """ - Remove this `w:style` element from its parent `w:styles` element. - """ - self.getparent().remove(self) - - @property - def locked_val(self): - """ - Value of `w:locked/@w:val` or |False| if not present. - """ - locked = self.locked - if locked is None: - return False - return locked.val - - @locked_val.setter - def locked_val(self, value): - self._remove_locked() - if bool(value) is True: - locked = self._add_locked() - locked.val = value - - @property - def name_val(self): - """ - Value of ```` child or |None| if not present. - """ - name = self.name - if name is None: - return None - return name.val - - @name_val.setter - def name_val(self, value): - self._remove_name() - if value is not None: - name = self._add_name() - name.val = value - - @property - def next_style(self): - """ - Sibling CT_Style element identified by the value of `w:name/@w:val` - or |None| if no value is present or no style with that style id - is found. - """ - next = self.next - if next is None: - return None - styles = self.getparent() - return styles.get_by_id(next.val) # None if not found - - @property - def qFormat_val(self): - """ - Value of `w:qFormat/@w:val` or |False| if not present. - """ - qFormat = self.qFormat - if qFormat is None: - return False - return qFormat.val - - @qFormat_val.setter - def qFormat_val(self, value): - self._remove_qFormat() - if bool(value): - self._add_qFormat() - - @property - def semiHidden_val(self): - """ - Value of ```` child or |False| if not present. - """ - semiHidden = self.semiHidden - if semiHidden is None: - return False - return semiHidden.val - - @semiHidden_val.setter - def semiHidden_val(self, value): - self._remove_semiHidden() - if bool(value) is True: - semiHidden = self._add_semiHidden() - semiHidden.val = value - - @property - def uiPriority_val(self): - """ - Value of ```` child or |None| if not present. - """ - uiPriority = self.uiPriority - if uiPriority is None: - return None - return uiPriority.val - - @uiPriority_val.setter - def uiPriority_val(self, value): - self._remove_uiPriority() - if value is not None: - uiPriority = self._add_uiPriority() - uiPriority.val = value - - @property - def unhideWhenUsed_val(self): - """ - Value of `w:unhideWhenUsed/@w:val` or |False| if not present. - """ - unhideWhenUsed = self.unhideWhenUsed - if unhideWhenUsed is None: - return False - return unhideWhenUsed.val - - @unhideWhenUsed_val.setter - def unhideWhenUsed_val(self, value): - self._remove_unhideWhenUsed() - if bool(value) is True: - unhideWhenUsed = self._add_unhideWhenUsed() - unhideWhenUsed.val = value - - -class CT_Styles(BaseOxmlElement): - """ - ```` element, the root element of a styles part, i.e. - styles.xml - """ - _tag_seq = ('w:docDefaults', 'w:latentStyles', 'w:style') - latentStyles = ZeroOrOne('w:latentStyles', successors=_tag_seq[2:]) - style = ZeroOrMore('w:style', successors=()) - del _tag_seq - - def add_style_of_type(self, name, style_type, builtin): - """ - Return a newly added `w:style` element having *name* and - *style_type*. `w:style/@customStyle` is set based on the value of - *builtin*. - """ - style = self.add_style() - style.type = style_type - style.customStyle = None if builtin else True - style.styleId = styleId_from_name(name) - style.name_val = name - return style - - def default_for(self, style_type): - """ - Return `w:style[@w:type="*{style_type}*][-1]` or |None| if not found. - """ - default_styles_for_type = [ - s for s in self._iter_styles() - if s.type == style_type and s.default - ] - if not default_styles_for_type: - return None - # spec calls for last default in document order - return default_styles_for_type[-1] - - def get_by_id(self, styleId): - """ - Return the ```` child element having ``styleId`` attribute - matching *styleId*, or |None| if not found. - """ - xpath = 'w:style[@w:styleId="%s"]' % styleId - try: - return self.xpath(xpath)[0] - except IndexError: - return None - - def get_by_name(self, name): - """ - Return the ```` child element having ```` child - element with value *name*, or |None| if not found. - """ - xpath = 'w:style[w:name/@w:val="%s"]' % name - try: - return self.xpath(xpath)[0] - except IndexError: - return None - - def _iter_styles(self): - """ - Generate each of the `w:style` child elements in document order. - """ - return (style for style in self.xpath('w:style')) diff --git a/docx/oxml/table.py b/docx/oxml/table.py deleted file mode 100644 index 24d91690e..000000000 --- a/docx/oxml/table.py +++ /dev/null @@ -1,783 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes for tables -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from . import parse_xml -from ..exceptions import InvalidSpanError -from .ns import nsdecls, qn -from ..shared import Emu, Twips -from .simpletypes import ( - ST_Merge, ST_TblLayoutType, ST_TblWidth, ST_TwipsMeasure, XsdInt -) -from .xmlchemy import ( - BaseOxmlElement, OneAndOnlyOne, OneOrMore, OptionalAttribute, - RequiredAttribute, ZeroOrOne, ZeroOrMore -) - - -class CT_Row(BaseOxmlElement): - """ - ```` element - """ - tblPrEx = ZeroOrOne('w:tblPrEx') # custom inserter below - trPr = ZeroOrOne('w:trPr') # custom inserter below - tc = ZeroOrMore('w:tc') - - def tc_at_grid_col(self, idx): - """ - The ```` element appearing at grid column *idx*. Raises - |ValueError| if no ``w:tc`` element begins at that grid column. - """ - grid_col = 0 - for tc in self.tc_lst: - if grid_col == idx: - return tc - grid_col += tc.grid_span - if grid_col > idx: - raise ValueError('no cell on grid column %d' % idx) - raise ValueError('index out of bounds') - - @property - def tr_idx(self): - """ - The index of this ```` element within its parent ```` - element. - """ - return self.getparent().tr_lst.index(self) - - def _insert_tblPrEx(self, tblPrEx): - self.insert(0, tblPrEx) - - def _insert_trPr(self, trPr): - tblPrEx = self.tblPrEx - if tblPrEx is not None: - tblPrEx.addnext(trPr) - else: - self.insert(0, trPr) - - def _new_tc(self): - return CT_Tc.new() - - -class CT_Tbl(BaseOxmlElement): - """ - ```` element - """ - tblPr = OneAndOnlyOne('w:tblPr') - tblGrid = OneAndOnlyOne('w:tblGrid') - tr = ZeroOrMore('w:tr') - - @property - def bidiVisual_val(self): - """ - Value of `w:tblPr/w:bidiVisual/@w:val` or |None| if not present. - Controls whether table cells are displayed right-to-left or - left-to-right. - """ - bidiVisual = self.tblPr.bidiVisual - if bidiVisual is None: - return None - return bidiVisual.val - - @bidiVisual_val.setter - def bidiVisual_val(self, value): - tblPr = self.tblPr - if value is None: - tblPr._remove_bidiVisual() - else: - tblPr.get_or_add_bidiVisual().val = value - - @property - def col_count(self): - """ - The number of grid columns in this table. - """ - return len(self.tblGrid.gridCol_lst) - - def iter_tcs(self): - """ - Generate each of the `w:tc` elements in this table, left to right and - top to bottom. Each cell in the first row is generated, followed by - each cell in the second row, etc. - """ - for tr in self.tr_lst: - for tc in tr.tc_lst: - yield tc - - @classmethod - def new_tbl(cls, rows, cols, width): - """ - Return a new `w:tbl` element having *rows* rows and *cols* columns - with *width* distributed evenly between the columns. - """ - return parse_xml(cls._tbl_xml(rows, cols, width)) - - @property - def tblStyle_val(self): - """ - Value of `w:tblPr/w:tblStyle/@w:val` (a table style id) or |None| if - not present. - """ - tblStyle = self.tblPr.tblStyle - if tblStyle is None: - return None - return tblStyle.val - - @tblStyle_val.setter - def tblStyle_val(self, styleId): - """ - Set the value of `w:tblPr/w:tblStyle/@w:val` (a table style id) to - *styleId*. If *styleId* is None, remove the `w:tblStyle` element. - """ - tblPr = self.tblPr - tblPr._remove_tblStyle() - if styleId is None: - return - tblPr._add_tblStyle().val = styleId - - @classmethod - def _tbl_xml(cls, rows, cols, width): - col_width = Emu(width/cols) if cols > 0 else Emu(0) - return ( - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - '%s' # tblGrid - '%s' # trs - '\n' - ) % ( - nsdecls('w'), - cls._tblGrid_xml(cols, col_width), - cls._trs_xml(rows, cols, col_width) - ) - - @classmethod - def _tblGrid_xml(cls, col_count, col_width): - xml = ' \n' - for i in range(col_count): - xml += ' \n' % col_width.twips - xml += ' \n' - return xml - - @classmethod - def _trs_xml(cls, row_count, col_count, col_width): - xml = '' - for i in range(row_count): - xml += ( - ' \n' - '%s' - ' \n' - ) % cls._tcs_xml(col_count, col_width) - return xml - - @classmethod - def _tcs_xml(cls, col_count, col_width): - xml = '' - for i in range(col_count): - xml += ( - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ) % col_width.twips - return xml - - -class CT_TblGrid(BaseOxmlElement): - """ - ```` element, child of ````, holds ```` - elements that define column count, width, etc. - """ - gridCol = ZeroOrMore('w:gridCol', successors=('w:tblGridChange',)) - - -class CT_TblGridCol(BaseOxmlElement): - """ - ```` element, child of ````, defines a table - column. - """ - w = OptionalAttribute('w:w', ST_TwipsMeasure) - - @property - def gridCol_idx(self): - """ - The index of this ```` element within its parent - ```` element. - """ - return self.getparent().gridCol_lst.index(self) - - -class CT_TblLayoutType(BaseOxmlElement): - """ - ```` element, specifying whether column widths are fixed or - can be automatically adjusted based on content. - """ - type = OptionalAttribute('w:type', ST_TblLayoutType) - - -class CT_TblPr(BaseOxmlElement): - """ - ```` element, child of ````, holds child elements that - define table properties such as style and borders. - """ - _tag_seq = ( - 'w:tblStyle', 'w:tblpPr', 'w:tblOverlap', 'w:bidiVisual', - 'w:tblStyleRowBandSize', 'w:tblStyleColBandSize', 'w:tblW', 'w:jc', - 'w:tblCellSpacing', 'w:tblInd', 'w:tblBorders', 'w:shd', - 'w:tblLayout', 'w:tblCellMar', 'w:tblLook', 'w:tblCaption', - 'w:tblDescription', 'w:tblPrChange' - ) - tblStyle = ZeroOrOne('w:tblStyle', successors=_tag_seq[1:]) - bidiVisual = ZeroOrOne('w:bidiVisual', successors=_tag_seq[4:]) - jc = ZeroOrOne('w:jc', successors=_tag_seq[8:]) - tblLayout = ZeroOrOne('w:tblLayout', successors=_tag_seq[13:]) - del _tag_seq - - @property - def alignment(self): - """ - Member of :ref:`WdRowAlignment` enumeration or |None|, based on the - contents of the `w:val` attribute of `./w:jc`. |None| if no `w:jc` - element is present. - """ - jc = self.jc - if jc is None: - return None - return jc.val - - @alignment.setter - def alignment(self, value): - self._remove_jc() - if value is None: - return - jc = self.get_or_add_jc() - jc.val = value - - @property - def autofit(self): - """ - Return |False| if there is a ```` child with ``w:type`` - attribute set to ``'fixed'``. Otherwise return |True|. - """ - tblLayout = self.tblLayout - if tblLayout is None: - return True - return False if tblLayout.type == 'fixed' else True - - @autofit.setter - def autofit(self, value): - tblLayout = self.get_or_add_tblLayout() - tblLayout.type = 'autofit' if value else 'fixed' - - @property - def style(self): - """ - Return the value of the ``val`` attribute of the ```` - child or |None| if not present. - """ - tblStyle = self.tblStyle - if tblStyle is None: - return None - return tblStyle.val - - @style.setter - def style(self, value): - self._remove_tblStyle() - if value is None: - return - self._add_tblStyle(val=value) - - -class CT_TblWidth(BaseOxmlElement): - """ - Used for ```` and ```` elements and many others, to - specify a table-related width. - """ - # the type for `w` attr is actually ST_MeasurementOrPercent, but using - # XsdInt for now because only dxa (twips) values are being used. It's not - # entirely clear what the semantics are for other values like -01.4mm - w = RequiredAttribute('w:w', XsdInt) - type = RequiredAttribute('w:type', ST_TblWidth) - - @property - def width(self): - """ - Return the EMU length value represented by the combined ``w:w`` and - ``w:type`` attributes. - """ - if self.type != 'dxa': - return None - return Twips(self.w) - - @width.setter - def width(self, value): - self.type = 'dxa' - self.w = Emu(value).twips - - -class CT_Tc(BaseOxmlElement): - """ - ```` table cell element - """ - tcPr = ZeroOrOne('w:tcPr') # bunches of successors, overriding insert - p = OneOrMore('w:p') - tbl = OneOrMore('w:tbl') - - @property - def bottom(self): - """ - The row index that marks the bottom extent of the vertical span of - this cell. This is one greater than the index of the bottom-most row - of the span, similar to how a slice of the cell's rows would be - specified. - """ - if self.vMerge is not None: - tc_below = self._tc_below - if tc_below is not None and tc_below.vMerge == ST_Merge.CONTINUE: - return tc_below.bottom - return self._tr_idx + 1 - - def clear_content(self): - """ - Remove all content child elements, preserving the ```` - element if present. Note that this leaves the ```` element in - an invalid state because it doesn't contain at least one block-level - element. It's up to the caller to add a ````child element as the - last content element. - """ - new_children = [] - tcPr = self.tcPr - if tcPr is not None: - new_children.append(tcPr) - self[:] = new_children - - @property - def grid_span(self): - """ - The integer number of columns this cell spans. Determined by - ./w:tcPr/w:gridSpan/@val, it defaults to 1. - """ - tcPr = self.tcPr - if tcPr is None: - return 1 - return tcPr.grid_span - - @grid_span.setter - def grid_span(self, value): - tcPr = self.get_or_add_tcPr() - tcPr.grid_span = value - - def iter_block_items(self): - """ - Generate a reference to each of the block-level content elements in - this cell, in the order they appear. - """ - block_item_tags = (qn('w:p'), qn('w:tbl'), qn('w:sdt')) - for child in self: - if child.tag in block_item_tags: - yield child - - @property - def left(self): - """ - The grid column index at which this ```` element appears. - """ - return self._grid_col - - def merge(self, other_tc): - """ - Return the top-left ```` element of a new span formed by - merging the rectangular region defined by using this tc element and - *other_tc* as diagonal corners. - """ - top, left, height, width = self._span_dimensions(other_tc) - top_tc = self._tbl.tr_lst[top].tc_at_grid_col(left) - top_tc._grow_to(width, height) - return top_tc - - @classmethod - def new(cls): - """ - Return a new ```` element, containing an empty paragraph as the - required EG_BlockLevelElt. - """ - return parse_xml( - '\n' - ' \n' - '' % nsdecls('w') - ) - - @property - def right(self): - """ - The grid column index that marks the right-side extent of the - horizontal span of this cell. This is one greater than the index of - the right-most column of the span, similar to how a slice of the - cell's columns would be specified. - """ - return self._grid_col + self.grid_span - - @property - def top(self): - """ - The top-most row index in the vertical span of this cell. - """ - if self.vMerge is None or self.vMerge == ST_Merge.RESTART: - return self._tr_idx - return self._tc_above.top - - @property - def vMerge(self): - """ - The value of the ./w:tcPr/w:vMerge/@val attribute, or |None| if the - w:vMerge element is not present. - """ - tcPr = self.tcPr - if tcPr is None: - return None - return tcPr.vMerge_val - - @vMerge.setter - def vMerge(self, value): - tcPr = self.get_or_add_tcPr() - tcPr.vMerge_val = value - - @property - def width(self): - """ - Return the EMU length value represented in the ``./w:tcPr/w:tcW`` - child element or |None| if not present. - """ - tcPr = self.tcPr - if tcPr is None: - return None - return tcPr.width - - @width.setter - def width(self, value): - tcPr = self.get_or_add_tcPr() - tcPr.width = value - - def _add_width_of(self, other_tc): - """ - Add the width of *other_tc* to this cell. Does nothing if either this - tc or *other_tc* does not have a specified width. - """ - if self.width and other_tc.width: - self.width += other_tc.width - - @property - def _grid_col(self): - """ - The grid column at which this cell begins. - """ - tr = self._tr - idx = tr.tc_lst.index(self) - preceding_tcs = tr.tc_lst[:idx] - return sum(tc.grid_span for tc in preceding_tcs) - - def _grow_to(self, width, height, top_tc=None): - """ - Grow this cell to *width* grid columns and *height* rows by expanding - horizontal spans and creating continuation cells to form vertical - spans. - """ - def vMerge_val(top_tc): - if top_tc is not self: - return ST_Merge.CONTINUE - if height == 1: - return None - return ST_Merge.RESTART - - top_tc = self if top_tc is None else top_tc - self._span_to_width(width, top_tc, vMerge_val(top_tc)) - if height > 1: - self._tc_below._grow_to(width, height-1, top_tc) - - def _insert_tcPr(self, tcPr): - """ - ``tcPr`` has a bunch of successors, but it comes first if it appears, - so just overriding and using insert(0, ...) rather than spelling out - successors. - """ - self.insert(0, tcPr) - return tcPr - - @property - def _is_empty(self): - """ - True if this cell contains only a single empty ```` element. - """ - block_items = list(self.iter_block_items()) - if len(block_items) > 1: - return False - p = block_items[0] # cell must include at least one element - if len(p.r_lst) == 0: - return True - return False - - def _move_content_to(self, other_tc): - """ - Append the content of this cell to *other_tc*, leaving this cell with - a single empty ```` element. - """ - if other_tc is self: - return - if self._is_empty: - return - other_tc._remove_trailing_empty_p() - # appending moves each element from self to other_tc - for block_element in self.iter_block_items(): - other_tc.append(block_element) - # add back the required minimum single empty element - self.append(self._new_p()) - - def _new_tbl(self): - return CT_Tbl.new() - - @property - def _next_tc(self): - """ - The `w:tc` element immediately following this one in this row, or - |None| if this is the last `w:tc` element in the row. - """ - following_tcs = self.xpath('./following-sibling::w:tc') - return following_tcs[0] if following_tcs else None - - def _remove(self): - """ - Remove this `w:tc` element from the XML tree. - """ - self.getparent().remove(self) - - def _remove_trailing_empty_p(self): - """ - Remove the last content element from this cell if it is an empty - ```` element. - """ - block_items = list(self.iter_block_items()) - last_content_elm = block_items[-1] - if last_content_elm.tag != qn('w:p'): - return - p = last_content_elm - if len(p.r_lst) > 0: - return - self.remove(p) - - def _span_dimensions(self, other_tc): - """ - Return a (top, left, height, width) 4-tuple specifying the extents of - the merged cell formed by using this tc and *other_tc* as opposite - corner extents. - """ - def raise_on_inverted_L(a, b): - if a.top == b.top and a.bottom != b.bottom: - raise InvalidSpanError('requested span not rectangular') - if a.left == b.left and a.right != b.right: - raise InvalidSpanError('requested span not rectangular') - - def raise_on_tee_shaped(a, b): - top_most, other = (a, b) if a.top < b.top else (b, a) - if top_most.top < other.top and top_most.bottom > other.bottom: - raise InvalidSpanError('requested span not rectangular') - - left_most, other = (a, b) if a.left < b.left else (b, a) - if left_most.left < other.left and left_most.right > other.right: - raise InvalidSpanError('requested span not rectangular') - - raise_on_inverted_L(self, other_tc) - raise_on_tee_shaped(self, other_tc) - - top = min(self.top, other_tc.top) - left = min(self.left, other_tc.left) - bottom = max(self.bottom, other_tc.bottom) - right = max(self.right, other_tc.right) - - return top, left, bottom - top, right - left - - def _span_to_width(self, grid_width, top_tc, vMerge): - """ - Incorporate and then remove `w:tc` elements to the right of this one - until this cell spans *grid_width*. Raises |ValueError| if - *grid_width* cannot be exactly achieved, such as when a merged cell - would drive the span width greater than *grid_width* or if not enough - grid columns are available to make this cell that wide. All content - from incorporated cells is appended to *top_tc*. The val attribute of - the vMerge element on the single remaining cell is set to *vMerge*. - If *vMerge* is |None|, the vMerge element is removed if present. - """ - self._move_content_to(top_tc) - while self.grid_span < grid_width: - self._swallow_next_tc(grid_width, top_tc) - self.vMerge = vMerge - - def _swallow_next_tc(self, grid_width, top_tc): - """ - Extend the horizontal span of this `w:tc` element to incorporate the - following `w:tc` element in the row and then delete that following - `w:tc` element. Any content in the following `w:tc` element is - appended to the content of *top_tc*. The width of the following - `w:tc` element is added to this one, if present. Raises - |InvalidSpanError| if the width of the resulting cell is greater than - *grid_width* or if there is no next `` element in the row. - """ - def raise_on_invalid_swallow(next_tc): - if next_tc is None: - raise InvalidSpanError('not enough grid columns') - if self.grid_span + next_tc.grid_span > grid_width: - raise InvalidSpanError('span is not rectangular') - - next_tc = self._next_tc - raise_on_invalid_swallow(next_tc) - next_tc._move_content_to(top_tc) - self._add_width_of(next_tc) - self.grid_span += next_tc.grid_span - next_tc._remove() - - @property - def _tbl(self): - """ - The tbl element this tc element appears in. - """ - return self.xpath('./ancestor::w:tbl[position()=1]')[0] - - @property - def _tc_above(self): - """ - The `w:tc` element immediately above this one in its grid column. - """ - return self._tr_above.tc_at_grid_col(self._grid_col) - - @property - def _tc_below(self): - """ - The tc element immediately below this one in its grid column. - """ - tr_below = self._tr_below - if tr_below is None: - return None - return tr_below.tc_at_grid_col(self._grid_col) - - @property - def _tr(self): - """ - The tr element this tc element appears in. - """ - return self.xpath('./ancestor::w:tr[position()=1]')[0] - - @property - def _tr_above(self): - """ - The tr element prior in sequence to the tr this cell appears in. - Raises |ValueError| if called on a cell in the top-most row. - """ - tr_lst = self._tbl.tr_lst - tr_idx = tr_lst.index(self._tr) - if tr_idx == 0: - raise ValueError('no tr above topmost tr') - return tr_lst[tr_idx-1] - - @property - def _tr_below(self): - """ - The tr element next in sequence after the tr this cell appears in, or - |None| if this cell appears in the last row. - """ - tr_lst = self._tbl.tr_lst - tr_idx = tr_lst.index(self._tr) - try: - return tr_lst[tr_idx+1] - except IndexError: - return None - - @property - def _tr_idx(self): - """ - The row index of the tr element this tc element appears in. - """ - return self._tbl.tr_lst.index(self._tr) - - -class CT_TcPr(BaseOxmlElement): - """ - ```` element, defining table cell properties - """ - _tag_seq = ( - 'w:cnfStyle', 'w:tcW', 'w:gridSpan', 'w:hMerge', 'w:vMerge', - 'w:tcBorders', 'w:shd', 'w:noWrap', 'w:tcMar', 'w:textDirection', - 'w:tcFitText', 'w:vAlign', 'w:hideMark', 'w:headers', 'w:cellIns', - 'w:cellDel', 'w:cellMerge', 'w:tcPrChange' - ) - tcW = ZeroOrOne('w:tcW', successors=_tag_seq[2:]) - gridSpan = ZeroOrOne('w:gridSpan', successors=_tag_seq[3:]) - vMerge = ZeroOrOne('w:vMerge', successors=_tag_seq[5:]) - del _tag_seq - - @property - def grid_span(self): - """ - The integer number of columns this cell spans. Determined by - ./w:gridSpan/@val, it defaults to 1. - """ - gridSpan = self.gridSpan - if gridSpan is None: - return 1 - return gridSpan.val - - @grid_span.setter - def grid_span(self, value): - self._remove_gridSpan() - if value > 1: - self.get_or_add_gridSpan().val = value - - @property - def vMerge_val(self): - """ - The value of the ./w:vMerge/@val attribute, or |None| if the - w:vMerge element is not present. - """ - vMerge = self.vMerge - if vMerge is None: - return None - return vMerge.val - - @vMerge_val.setter - def vMerge_val(self, value): - self._remove_vMerge() - if value is not None: - self._add_vMerge().val = value - - @property - def width(self): - """ - Return the EMU length value represented in the ```` child - element or |None| if not present or its type is not 'dxa'. - """ - tcW = self.tcW - if tcW is None: - return None - return tcW.width - - @width.setter - def width(self, value): - tcW = self.get_or_add_tcW() - tcW.width = value - - -class CT_VMerge(BaseOxmlElement): - """ - ```` element, specifying vertical merging behavior of a cell. - """ - val = OptionalAttribute('w:val', ST_Merge, default=ST_Merge.CONTINUE) diff --git a/docx/oxml/text/font.py b/docx/oxml/text/font.py deleted file mode 100644 index 810ec2b30..000000000 --- a/docx/oxml/text/font.py +++ /dev/null @@ -1,320 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to run properties (font). -""" - -from .. import parse_xml -from ...enum.dml import MSO_THEME_COLOR -from ...enum.text import WD_COLOR, WD_UNDERLINE -from ..ns import nsdecls, qn -from ..simpletypes import ( - ST_HexColor, ST_HpsMeasure, ST_String, ST_VerticalAlignRun -) -from ..xmlchemy import ( - BaseOxmlElement, OptionalAttribute, RequiredAttribute, ZeroOrOne -) - - -class CT_Color(BaseOxmlElement): - """ - `w:color` element, specifying the color of a font and perhaps other - objects. - """ - val = RequiredAttribute('w:val', ST_HexColor) - themeColor = OptionalAttribute('w:themeColor', MSO_THEME_COLOR) - - -class CT_Fonts(BaseOxmlElement): - """ - ```` element, specifying typeface name for the various language - types. - """ - ascii = OptionalAttribute('w:ascii', ST_String) - hAnsi = OptionalAttribute('w:hAnsi', ST_String) - - -class CT_Highlight(BaseOxmlElement): - """ - `w:highlight` element, specifying font highlighting/background color. - """ - val = RequiredAttribute('w:val', WD_COLOR) - - -class CT_HpsMeasure(BaseOxmlElement): - """ - Used for ```` element and others, specifying font size in - half-points. - """ - val = RequiredAttribute('w:val', ST_HpsMeasure) - - -class CT_RPr(BaseOxmlElement): - """ - ```` element, containing the properties for a run. - """ - _tag_seq = ( - 'w:rStyle', 'w:rFonts', 'w:b', 'w:bCs', 'w:i', 'w:iCs', 'w:caps', - 'w:smallCaps', 'w:strike', 'w:dstrike', 'w:outline', 'w:shadow', - 'w:emboss', 'w:imprint', 'w:noProof', 'w:snapToGrid', 'w:vanish', - 'w:webHidden', 'w:color', 'w:spacing', 'w:w', 'w:kern', 'w:position', - 'w:sz', 'w:szCs', 'w:highlight', 'w:u', 'w:effect', 'w:bdr', 'w:shd', - 'w:fitText', 'w:vertAlign', 'w:rtl', 'w:cs', 'w:em', 'w:lang', - 'w:eastAsianLayout', 'w:specVanish', 'w:oMath' - ) - rStyle = ZeroOrOne('w:rStyle', successors=_tag_seq[1:]) - rFonts = ZeroOrOne('w:rFonts', successors=_tag_seq[2:]) - b = ZeroOrOne('w:b', successors=_tag_seq[3:]) - bCs = ZeroOrOne('w:bCs', successors=_tag_seq[4:]) - i = ZeroOrOne('w:i', successors=_tag_seq[5:]) - iCs = ZeroOrOne('w:iCs', successors=_tag_seq[6:]) - caps = ZeroOrOne('w:caps', successors=_tag_seq[7:]) - smallCaps = ZeroOrOne('w:smallCaps', successors=_tag_seq[8:]) - strike = ZeroOrOne('w:strike', successors=_tag_seq[9:]) - dstrike = ZeroOrOne('w:dstrike', successors=_tag_seq[10:]) - outline = ZeroOrOne('w:outline', successors=_tag_seq[11:]) - shadow = ZeroOrOne('w:shadow', successors=_tag_seq[12:]) - emboss = ZeroOrOne('w:emboss', successors=_tag_seq[13:]) - imprint = ZeroOrOne('w:imprint', successors=_tag_seq[14:]) - noProof = ZeroOrOne('w:noProof', successors=_tag_seq[15:]) - snapToGrid = ZeroOrOne('w:snapToGrid', successors=_tag_seq[16:]) - vanish = ZeroOrOne('w:vanish', successors=_tag_seq[17:]) - webHidden = ZeroOrOne('w:webHidden', successors=_tag_seq[18:]) - color = ZeroOrOne('w:color', successors=_tag_seq[19:]) - sz = ZeroOrOne('w:sz', successors=_tag_seq[24:]) - highlight = ZeroOrOne('w:highlight', successors=_tag_seq[26:]) - u = ZeroOrOne('w:u', successors=_tag_seq[27:]) - vertAlign = ZeroOrOne('w:vertAlign', successors=_tag_seq[32:]) - rtl = ZeroOrOne('w:rtl', successors=_tag_seq[33:]) - cs = ZeroOrOne('w:cs', successors=_tag_seq[34:]) - specVanish = ZeroOrOne('w:specVanish', successors=_tag_seq[38:]) - oMath = ZeroOrOne('w:oMath', successors=_tag_seq[39:]) - del _tag_seq - - def _new_color(self): - """ - Override metaclass method to set `w:color/@val` to RGB black on - create. - """ - return parse_xml('' % nsdecls('w')) - - @property - def highlight_val(self): - """ - Value of `w:highlight/@val` attribute, specifying a font's highlight - color, or `None` if the text is not highlighted. - """ - highlight = self.highlight - if highlight is None: - return None - return highlight.val - - @highlight_val.setter - def highlight_val(self, value): - if value is None: - self._remove_highlight() - return - highlight = self.get_or_add_highlight() - highlight.val = value - - @property - def rFonts_ascii(self): - """ - The value of `w:rFonts/@w:ascii` or |None| if not present. Represents - the assigned typeface name. The rFonts element also specifies other - special-case typeface names; this method handles the case where just - the common name is required. - """ - rFonts = self.rFonts - if rFonts is None: - return None - return rFonts.ascii - - @rFonts_ascii.setter - def rFonts_ascii(self, value): - if value is None: - self._remove_rFonts() - return - rFonts = self.get_or_add_rFonts() - rFonts.ascii = value - - @property - def rFonts_hAnsi(self): - """ - The value of `w:rFonts/@w:hAnsi` or |None| if not present. - """ - rFonts = self.rFonts - if rFonts is None: - return None - return rFonts.hAnsi - - @rFonts_hAnsi.setter - def rFonts_hAnsi(self, value): - if value is None and self.rFonts is None: - return - rFonts = self.get_or_add_rFonts() - rFonts.hAnsi = value - - @property - def style(self): - """ - String contained in child, or None if that element is not - present. - """ - rStyle = self.rStyle - if rStyle is None: - return None - return rStyle.val - - @style.setter - def style(self, style): - """ - Set val attribute of child element to *style*, adding a - new element if necessary. If *style* is |None|, remove the - element if present. - """ - if style is None: - self._remove_rStyle() - elif self.rStyle is None: - self._add_rStyle(val=style) - else: - self.rStyle.val = style - - @property - def subscript(self): - """ - |True| if `w:vertAlign/@w:val` is 'subscript'. |False| if - `w:vertAlign/@w:val` contains any other value. |None| if - `w:vertAlign` is not present. - """ - vertAlign = self.vertAlign - if vertAlign is None: - return None - if vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT: - return True - return False - - @subscript.setter - def subscript(self, value): - if value is None: - self._remove_vertAlign() - elif bool(value) is True: - self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT - elif self.vertAlign is None: - return - elif self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT: - self._remove_vertAlign() - - @property - def superscript(self): - """ - |True| if `w:vertAlign/@w:val` is 'superscript'. |False| if - `w:vertAlign/@w:val` contains any other value. |None| if - `w:vertAlign` is not present. - """ - vertAlign = self.vertAlign - if vertAlign is None: - return None - if vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT: - return True - return False - - @superscript.setter - def superscript(self, value): - if value is None: - self._remove_vertAlign() - elif bool(value) is True: - self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT - elif self.vertAlign is None: - return - elif self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT: - self._remove_vertAlign() - - @property - def sz_val(self): - """ - The value of `w:sz/@w:val` or |None| if not present. - """ - sz = self.sz - if sz is None: - return None - return sz.val - - @sz_val.setter - def sz_val(self, value): - if value is None: - self._remove_sz() - return - sz = self.get_or_add_sz() - sz.val = value - - @property - def u_val(self): - """ - Value of `w:u/@val`, or None if not present. - """ - u = self.u - if u is None: - return None - return u.val - - @u_val.setter - def u_val(self, value): - self._remove_u() - if value is not None: - self._add_u().val = value - - def _get_bool_val(self, name): - """ - Return the value of the boolean child element having *name*, e.g. - 'b', 'i', and 'smallCaps'. - """ - element = getattr(self, name) - if element is None: - return None - return element.val - - def _set_bool_val(self, name, value): - if value is None: - getattr(self, '_remove_%s' % name)() - return - element = getattr(self, 'get_or_add_%s' % name)() - element.val = value - - -class CT_Underline(BaseOxmlElement): - """ - ```` element, specifying the underlining style for a run. - """ - @property - def val(self): - """ - The underline type corresponding to the ``w:val`` attribute value. - """ - val = self.get(qn('w:val')) - underline = WD_UNDERLINE.from_xml(val) - if underline == WD_UNDERLINE.SINGLE: - return True - if underline == WD_UNDERLINE.NONE: - return False - return underline - - @val.setter - def val(self, value): - # works fine without these two mappings, but only because True == 1 - # and False == 0, which happen to match the mapping for WD_UNDERLINE - # .SINGLE and .NONE respectively. - if value is True: - value = WD_UNDERLINE.SINGLE - elif value is False: - value = WD_UNDERLINE.NONE - - val = WD_UNDERLINE.to_xml(value) - self.set(qn('w:val'), val) - - -class CT_VerticalAlignRun(BaseOxmlElement): - """ - ```` element, specifying subscript or superscript. - """ - val = RequiredAttribute('w:val', ST_VerticalAlignRun) diff --git a/docx/oxml/text/paragraph.py b/docx/oxml/text/paragraph.py deleted file mode 100644 index 5e4213776..000000000 --- a/docx/oxml/text/paragraph.py +++ /dev/null @@ -1,78 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to paragraphs (CT_P). -""" - -from ..ns import qn -from ..xmlchemy import BaseOxmlElement, OxmlElement, ZeroOrMore, ZeroOrOne - - -class CT_P(BaseOxmlElement): - """ - ```` element, containing the properties and text for a paragraph. - """ - pPr = ZeroOrOne('w:pPr') - r = ZeroOrMore('w:r') - - def _insert_pPr(self, pPr): - self.insert(0, pPr) - return pPr - - def add_p_before(self): - """ - Return a new ```` element inserted directly prior to this one. - """ - new_p = OxmlElement('w:p') - self.addprevious(new_p) - return new_p - - @property - def alignment(self): - """ - The value of the ```` grandchild element or |None| if not - present. - """ - pPr = self.pPr - if pPr is None: - return None - return pPr.jc_val - - @alignment.setter - def alignment(self, value): - pPr = self.get_or_add_pPr() - pPr.jc_val = value - - def clear_content(self): - """ - Remove all child elements, except the ```` element if present. - """ - for child in self[:]: - if child.tag == qn('w:pPr'): - continue - self.remove(child) - - def set_sectPr(self, sectPr): - """ - Unconditionally replace or add *sectPr* as a grandchild in the - correct sequence. - """ - pPr = self.get_or_add_pPr() - pPr._remove_sectPr() - pPr._insert_sectPr(sectPr) - - @property - def style(self): - """ - String contained in w:val attribute of ./w:pPr/w:pStyle grandchild, - or |None| if not present. - """ - pPr = self.pPr - if pPr is None: - return None - return pPr.style - - @style.setter - def style(self, style): - pPr = self.get_or_add_pPr() - pPr.style = style diff --git a/docx/oxml/text/parfmt.py b/docx/oxml/text/parfmt.py deleted file mode 100644 index 466b11b1b..000000000 --- a/docx/oxml/text/parfmt.py +++ /dev/null @@ -1,348 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to paragraph properties (CT_PPr). -""" - -from ...enum.text import ( - WD_ALIGN_PARAGRAPH, WD_LINE_SPACING, WD_TAB_ALIGNMENT, WD_TAB_LEADER -) -from ...shared import Length -from ..simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure -from ..xmlchemy import ( - BaseOxmlElement, OneOrMore, OptionalAttribute, RequiredAttribute, - ZeroOrOne -) - - -class CT_Ind(BaseOxmlElement): - """ - ```` element, specifying paragraph indentation. - """ - left = OptionalAttribute('w:left', ST_SignedTwipsMeasure) - right = OptionalAttribute('w:right', ST_SignedTwipsMeasure) - firstLine = OptionalAttribute('w:firstLine', ST_TwipsMeasure) - hanging = OptionalAttribute('w:hanging', ST_TwipsMeasure) - - -class CT_Jc(BaseOxmlElement): - """ - ```` element, specifying paragraph justification. - """ - val = RequiredAttribute('w:val', WD_ALIGN_PARAGRAPH) - - -class CT_PPr(BaseOxmlElement): - """ - ```` element, containing the properties for a paragraph. - """ - _tag_seq = ( - 'w:pStyle', 'w:keepNext', 'w:keepLines', 'w:pageBreakBefore', - 'w:framePr', 'w:widowControl', 'w:numPr', 'w:suppressLineNumbers', - 'w:pBdr', 'w:shd', 'w:tabs', 'w:suppressAutoHyphens', 'w:kinsoku', - 'w:wordWrap', 'w:overflowPunct', 'w:topLinePunct', 'w:autoSpaceDE', - 'w:autoSpaceDN', 'w:bidi', 'w:adjustRightInd', 'w:snapToGrid', - 'w:spacing', 'w:ind', 'w:contextualSpacing', 'w:mirrorIndents', - 'w:suppressOverlap', 'w:jc', 'w:textDirection', 'w:textAlignment', - 'w:textboxTightWrap', 'w:outlineLvl', 'w:divId', 'w:cnfStyle', - 'w:rPr', 'w:sectPr', 'w:pPrChange' - ) - pStyle = ZeroOrOne('w:pStyle', successors=_tag_seq[1:]) - keepNext = ZeroOrOne('w:keepNext', successors=_tag_seq[2:]) - keepLines = ZeroOrOne('w:keepLines', successors=_tag_seq[3:]) - pageBreakBefore = ZeroOrOne('w:pageBreakBefore', successors=_tag_seq[4:]) - widowControl = ZeroOrOne('w:widowControl', successors=_tag_seq[6:]) - numPr = ZeroOrOne('w:numPr', successors=_tag_seq[7:]) - tabs = ZeroOrOne('w:tabs', successors=_tag_seq[11:]) - spacing = ZeroOrOne('w:spacing', successors=_tag_seq[22:]) - ind = ZeroOrOne('w:ind', successors=_tag_seq[23:]) - jc = ZeroOrOne('w:jc', successors=_tag_seq[27:]) - sectPr = ZeroOrOne('w:sectPr', successors=_tag_seq[35:]) - del _tag_seq - - @property - def first_line_indent(self): - """ - A |Length| value calculated from the values of `w:ind/@w:firstLine` - and `w:ind/@w:hanging`. Returns |None| if the `w:ind` child is not - present. - """ - ind = self.ind - if ind is None: - return None - hanging = ind.hanging - if hanging is not None: - return Length(-hanging) - firstLine = ind.firstLine - if firstLine is None: - return None - return firstLine - - @first_line_indent.setter - def first_line_indent(self, value): - if self.ind is None and value is None: - return - ind = self.get_or_add_ind() - ind.firstLine = ind.hanging = None - if value is None: - return - elif value < 0: - ind.hanging = -value - else: - ind.firstLine = value - - @property - def ind_left(self): - """ - The value of `w:ind/@w:left` or |None| if not present. - """ - ind = self.ind - if ind is None: - return None - return ind.left - - @ind_left.setter - def ind_left(self, value): - if value is None and self.ind is None: - return - ind = self.get_or_add_ind() - ind.left = value - - @property - def ind_right(self): - """ - The value of `w:ind/@w:right` or |None| if not present. - """ - ind = self.ind - if ind is None: - return None - return ind.right - - @ind_right.setter - def ind_right(self, value): - if value is None and self.ind is None: - return - ind = self.get_or_add_ind() - ind.right = value - - @property - def jc_val(self): - """ - The value of the ```` child element or |None| if not present. - """ - jc = self.jc - if jc is None: - return None - return jc.val - - @jc_val.setter - def jc_val(self, value): - if value is None: - self._remove_jc() - return - self.get_or_add_jc().val = value - - @property - def keepLines_val(self): - """ - The value of `keepLines/@val` or |None| if not present. - """ - keepLines = self.keepLines - if keepLines is None: - return None - return keepLines.val - - @keepLines_val.setter - def keepLines_val(self, value): - if value is None: - self._remove_keepLines() - else: - self.get_or_add_keepLines().val = value - - @property - def keepNext_val(self): - """ - The value of `keepNext/@val` or |None| if not present. - """ - keepNext = self.keepNext - if keepNext is None: - return None - return keepNext.val - - @keepNext_val.setter - def keepNext_val(self, value): - if value is None: - self._remove_keepNext() - else: - self.get_or_add_keepNext().val = value - - @property - def pageBreakBefore_val(self): - """ - The value of `pageBreakBefore/@val` or |None| if not present. - """ - pageBreakBefore = self.pageBreakBefore - if pageBreakBefore is None: - return None - return pageBreakBefore.val - - @pageBreakBefore_val.setter - def pageBreakBefore_val(self, value): - if value is None: - self._remove_pageBreakBefore() - else: - self.get_or_add_pageBreakBefore().val = value - - @property - def spacing_after(self): - """ - The value of `w:spacing/@w:after` or |None| if not present. - """ - spacing = self.spacing - if spacing is None: - return None - return spacing.after - - @spacing_after.setter - def spacing_after(self, value): - if value is None and self.spacing is None: - return - self.get_or_add_spacing().after = value - - @property - def spacing_before(self): - """ - The value of `w:spacing/@w:before` or |None| if not present. - """ - spacing = self.spacing - if spacing is None: - return None - return spacing.before - - @spacing_before.setter - def spacing_before(self, value): - if value is None and self.spacing is None: - return - self.get_or_add_spacing().before = value - - @property - def spacing_line(self): - """ - The value of `w:spacing/@w:line` or |None| if not present. - """ - spacing = self.spacing - if spacing is None: - return None - return spacing.line - - @spacing_line.setter - def spacing_line(self, value): - if value is None and self.spacing is None: - return - self.get_or_add_spacing().line = value - - @property - def spacing_lineRule(self): - """ - The value of `w:spacing/@w:lineRule` as a member of the - :ref:`WdLineSpacing` enumeration. Only the `MULTIPLE`, `EXACTLY`, and - `AT_LEAST` members are used. It is the responsibility of the client - to calculate the use of `SINGLE`, `DOUBLE`, and `MULTIPLE` based on - the value of `w:spacing/@w:line` if that behavior is desired. - """ - spacing = self.spacing - if spacing is None: - return None - lineRule = spacing.lineRule - if lineRule is None and spacing.line is not None: - return WD_LINE_SPACING.MULTIPLE - return lineRule - - @spacing_lineRule.setter - def spacing_lineRule(self, value): - if value is None and self.spacing is None: - return - self.get_or_add_spacing().lineRule = value - - @property - def style(self): - """ - String contained in child, or None if that element is not - present. - """ - pStyle = self.pStyle - if pStyle is None: - return None - return pStyle.val - - @style.setter - def style(self, style): - """ - Set val attribute of child element to *style*, adding a - new element if necessary. If *style* is |None|, remove the - element if present. - """ - if style is None: - self._remove_pStyle() - return - pStyle = self.get_or_add_pStyle() - pStyle.val = style - - @property - def widowControl_val(self): - """ - The value of `widowControl/@val` or |None| if not present. - """ - widowControl = self.widowControl - if widowControl is None: - return None - return widowControl.val - - @widowControl_val.setter - def widowControl_val(self, value): - if value is None: - self._remove_widowControl() - else: - self.get_or_add_widowControl().val = value - - -class CT_Spacing(BaseOxmlElement): - """ - ```` element, specifying paragraph spacing attributes such as - space before and line spacing. - """ - after = OptionalAttribute('w:after', ST_TwipsMeasure) - before = OptionalAttribute('w:before', ST_TwipsMeasure) - line = OptionalAttribute('w:line', ST_SignedTwipsMeasure) - lineRule = OptionalAttribute('w:lineRule', WD_LINE_SPACING) - - -class CT_TabStop(BaseOxmlElement): - """ - ```` element, representing an individual tab stop. - """ - val = RequiredAttribute('w:val', WD_TAB_ALIGNMENT) - leader = OptionalAttribute( - 'w:leader', WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES - ) - pos = RequiredAttribute('w:pos', ST_SignedTwipsMeasure) - - -class CT_TabStops(BaseOxmlElement): - """ - ```` element, container for a sorted sequence of tab stops. - """ - tab = OneOrMore('w:tab', successors=()) - - def insert_tab_in_order(self, pos, align, leader): - """ - Insert a newly created `w:tab` child element in *pos* order. - """ - new_tab = self._new_tab() - new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader - for tab in self.tab_lst: - if new_tab.pos < tab.pos: - tab.addprevious(new_tab) - return new_tab - self.append(new_tab) - return new_tab diff --git a/docx/oxml/text/run.py b/docx/oxml/text/run.py deleted file mode 100644 index 8f0a62e82..000000000 --- a/docx/oxml/text/run.py +++ /dev/null @@ -1,166 +0,0 @@ -# encoding: utf-8 - -""" -Custom element classes related to text runs (CT_R). -""" - -from ..ns import qn -from ..simpletypes import ST_BrClear, ST_BrType -from ..xmlchemy import ( - BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne -) - - -class CT_Br(BaseOxmlElement): - """ - ```` element, indicating a line, page, or column break in a run. - """ - type = OptionalAttribute('w:type', ST_BrType) - clear = OptionalAttribute('w:clear', ST_BrClear) - - -class CT_R(BaseOxmlElement): - """ - ```` element, containing the properties and text for a run. - """ - rPr = ZeroOrOne('w:rPr') - t = ZeroOrMore('w:t') - br = ZeroOrMore('w:br') - cr = ZeroOrMore('w:cr') - tab = ZeroOrMore('w:tab') - drawing = ZeroOrMore('w:drawing') - - def _insert_rPr(self, rPr): - self.insert(0, rPr) - return rPr - - def add_t(self, text): - """ - Return a newly added ```` element containing *text*. - """ - t = self._add_t(text=text) - if len(text.strip()) < len(text): - t.set(qn('xml:space'), 'preserve') - return t - - def add_drawing(self, inline_or_anchor): - """ - Return a newly appended ``CT_Drawing`` (````) child - element having *inline_or_anchor* as its child. - """ - drawing = self._add_drawing() - drawing.append(inline_or_anchor) - return drawing - - def clear_content(self): - """ - Remove all child elements except the ```` element if present. - """ - content_child_elms = self[1:] if self.rPr is not None else self[:] - for child in content_child_elms: - self.remove(child) - - @property - def style(self): - """ - String contained in w:val attribute of grandchild, or - |None| if that element is not present. - """ - rPr = self.rPr - if rPr is None: - return None - return rPr.style - - @style.setter - def style(self, style): - """ - Set the character style of this element to *style*. If *style* - is None, remove the style element. - """ - rPr = self.get_or_add_rPr() - rPr.style = style - - @property - def text(self): - """ - A string representing the textual content of this run, with content - child elements like ```` translated to their Python - equivalent. - """ - text = '' - for child in self: - if child.tag == qn('w:t'): - t_text = child.text - text += t_text if t_text is not None else '' - elif child.tag == qn('w:tab'): - text += '\t' - elif child.tag in (qn('w:br'), qn('w:cr')): - text += '\n' - return text - - @text.setter - def text(self, text): - self.clear_content() - _RunContentAppender.append_to_run_from_text(self, text) - - -class CT_Text(BaseOxmlElement): - """ - ```` element, containing a sequence of characters within a run. - """ - - -class _RunContentAppender(object): - """ - Service object that knows how to translate a Python string into run - content elements appended to a specified ```` element. Contiguous - sequences of regular characters are appended in a single ```` - element. Each tab character ('\t') causes a ```` element to be - appended. Likewise a newline or carriage return character ('\n', '\r') - causes a ```` element to be appended. - """ - def __init__(self, r): - self._r = r - self._bfr = [] - - @classmethod - def append_to_run_from_text(cls, r, text): - """ - Create a "one-shot" ``_RunContentAppender`` instance and use it to - append the run content elements corresponding to *text* to the - ```` element *r*. - """ - appender = cls(r) - appender.add_text(text) - - def add_text(self, text): - """ - Append the run content elements corresponding to *text* to the - ```` element of this instance. - """ - for char in text: - self.add_char(char) - self.flush() - - def add_char(self, char): - """ - Process the next character of input through the translation finite - state maching (FSM). There are two possible states, buffer pending - and not pending, but those are hidden behind the ``.flush()`` method - which must be called at the end of text to ensure any pending - ```` element is written. - """ - if char == '\t': - self.flush() - self._r.add_tab() - elif char in '\r\n': - self.flush() - self._r.add_br() - else: - self._bfr.append(char) - - def flush(self): - text = ''.join(self._bfr) - if text: - self._r.add_t(text) - del self._bfr[:] diff --git a/docx/oxml/xmlchemy.py b/docx/oxml/xmlchemy.py deleted file mode 100644 index 40df33494..000000000 --- a/docx/oxml/xmlchemy.py +++ /dev/null @@ -1,761 +0,0 @@ -# encoding: utf-8 - -""" -Provides a wrapper around lxml that enables declarative definition of custom -element classes. -""" - -from __future__ import absolute_import - -from lxml import etree - -import re - -from . import OxmlElement -from ..compat import Unicode -from .exceptions import InvalidXmlError -from .ns import NamespacePrefixedTag, nsmap, qn -from ..shared import lazyproperty - - -def serialize_for_reading(element): - """ - Serialize *element* to human-readable XML suitable for tests. No XML - declaration. - """ - xml = etree.tostring(element, encoding='unicode', pretty_print=True) - return XmlString(xml) - - -class XmlString(Unicode): - """ - Provides string comparison override suitable for serialized XML that is - useful for tests. - """ - - # ' text' - # | | || | - # +----------+------------------------------------------++-----------+ - # front attrs | text - # close - - _xml_elm_line_patt = re.compile( - '( *)([^<]*)?$' - ) - - def __eq__(self, other): - lines = self.splitlines() - lines_other = other.splitlines() - if len(lines) != len(lines_other): - return False - for line, line_other in zip(lines, lines_other): - if not self._eq_elm_strs(line, line_other): - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def _attr_seq(self, attrs): - """ - Return a sequence of attribute strings parsed from *attrs*. Each - attribute string is stripped of whitespace on both ends. - """ - attrs = attrs.strip() - attr_lst = attrs.split() - return sorted(attr_lst) - - def _eq_elm_strs(self, line, line_2): - """ - Return True if the element in *line_2* is XML equivalent to the - element in *line*. - """ - front, attrs, close, text = self._parse_line(line) - front_2, attrs_2, close_2, text_2 = self._parse_line(line_2) - if front != front_2: - return False - if self._attr_seq(attrs) != self._attr_seq(attrs_2): - return False - if close != close_2: - return False - if text != text_2: - return False - return True - - @classmethod - def _parse_line(cls, line): - """ - Return front, attrs, close, text 4-tuple result of parsing XML element - string *line*. - """ - match = cls._xml_elm_line_patt.match(line) - front, attrs, close, text = [match.group(n) for n in range(1, 5)] - return front, attrs, close, text - - -class MetaOxmlElement(type): - """ - Metaclass for BaseOxmlElement - """ - def __init__(cls, clsname, bases, clsdict): - dispatchable = ( - OneAndOnlyOne, OneOrMore, OptionalAttribute, RequiredAttribute, - ZeroOrMore, ZeroOrOne, ZeroOrOneChoice - ) - for key, value in clsdict.items(): - if isinstance(value, dispatchable): - value.populate_class_members(cls, key) - - -class BaseAttribute(object): - """ - Base class for OptionalAttribute and RequiredAttribute, providing common - methods. - """ - def __init__(self, attr_name, simple_type): - super(BaseAttribute, self).__init__() - self._attr_name = attr_name - self._simple_type = simple_type - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - self._element_cls = element_cls - self._prop_name = prop_name - - self._add_attr_property() - - def _add_attr_property(self): - """ - Add a read/write ``{prop_name}`` property to the element class that - returns the interpreted value of this attribute on access and changes - the attribute value to its ST_* counterpart on assignment. - """ - property_ = property(self._getter, self._setter, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - @property - def _clark_name(self): - if ':' in self._attr_name: - return qn(self._attr_name) - return self._attr_name - - -class OptionalAttribute(BaseAttribute): - """ - Defines an optional attribute on a custom element class. An optional - attribute returns a default value when not present for reading. When - assigned |None|, the attribute is removed. - """ - def __init__(self, attr_name, simple_type, default=None): - super(OptionalAttribute, self).__init__(attr_name, simple_type) - self._default = default - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the attribute - property descriptor. - """ - def get_attr_value(obj): - attr_str_value = obj.get(self._clark_name) - if attr_str_value is None: - return self._default - return self._simple_type.from_xml(attr_str_value) - get_attr_value.__doc__ = self._docstring - return get_attr_value - - @property - def _docstring(self): - """ - Return the string to use as the ``__doc__`` attribute of the property - for this attribute. - """ - return ( - '%s type-converted value of ``%s`` attribute, or |None| (or spec' - 'ified default value) if not present. Assigning the default valu' - 'e causes the attribute to be removed from the element.' % - (self._simple_type.__name__, self._attr_name) - ) - - @property - def _setter(self): - """ - Return a function object suitable for the "set" side of the attribute - property descriptor. - """ - def set_attr_value(obj, value): - if value is None or value == self._default: - if self._clark_name in obj.attrib: - del obj.attrib[self._clark_name] - return - str_value = self._simple_type.to_xml(value) - obj.set(self._clark_name, str_value) - return set_attr_value - - -class RequiredAttribute(BaseAttribute): - """ - Defines a required attribute on a custom element class. A required - attribute is assumed to be present for reading, so does not have - a default value; its actual value is always used. If missing on read, - an |InvalidXmlError| is raised. It also does not remove the attribute if - |None| is assigned. Assigning |None| raises |TypeError| or |ValueError|, - depending on the simple type of the attribute. - """ - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the attribute - property descriptor. - """ - def get_attr_value(obj): - attr_str_value = obj.get(self._clark_name) - if attr_str_value is None: - raise InvalidXmlError( - "required '%s' attribute not present on element %s" % - (self._attr_name, obj.tag) - ) - return self._simple_type.from_xml(attr_str_value) - get_attr_value.__doc__ = self._docstring - return get_attr_value - - @property - def _docstring(self): - """ - Return the string to use as the ``__doc__`` attribute of the property - for this attribute. - """ - return ( - '%s type-converted value of ``%s`` attribute.' % - (self._simple_type.__name__, self._attr_name) - ) - - @property - def _setter(self): - """ - Return a function object suitable for the "set" side of the attribute - property descriptor. - """ - def set_attr_value(obj, value): - str_value = self._simple_type.to_xml(value) - obj.set(self._clark_name, str_value) - return set_attr_value - - -class _BaseChildElement(object): - """ - Base class for the child element classes corresponding to varying - cardinalities, such as ZeroOrOne and ZeroOrMore. - """ - def __init__(self, nsptagname, successors=()): - super(_BaseChildElement, self).__init__() - self._nsptagname = nsptagname - self._successors = successors - - def populate_class_members(self, element_cls, prop_name): - """ - Baseline behavior for adding the appropriate methods to - *element_cls*. - """ - self._element_cls = element_cls - self._prop_name = prop_name - - def _add_adder(self): - """ - Add an ``_add_x()`` method to the element class for this child - element. - """ - def _add_child(obj, **attrs): - new_method = getattr(obj, self._new_method_name) - child = new_method() - for key, value in attrs.items(): - setattr(child, key, value) - insert_method = getattr(obj, self._insert_method_name) - insert_method(child) - return child - - _add_child.__doc__ = ( - 'Add a new ``<%s>`` child element unconditionally, inserted in t' - 'he correct sequence.' % self._nsptagname - ) - self._add_to_class(self._add_method_name, _add_child) - - def _add_creator(self): - """ - Add a ``_new_{prop_name}()`` method to the element class that creates - a new, empty element of the correct type, having no attributes. - """ - creator = self._creator - creator.__doc__ = ( - 'Return a "loose", newly created ``<%s>`` element having no attri' - 'butes, text, or children.' % self._nsptagname - ) - self._add_to_class(self._new_method_name, creator) - - def _add_getter(self): - """ - Add a read-only ``{prop_name}`` property to the element class for - this child element. - """ - property_ = property(self._getter, None, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - def _add_inserter(self): - """ - Add an ``_insert_x()`` method to the element class for this child - element. - """ - def _insert_child(obj, child): - obj.insert_element_before(child, *self._successors) - return child - - _insert_child.__doc__ = ( - 'Return the passed ``<%s>`` element after inserting it as a chil' - 'd in the correct sequence.' % self._nsptagname - ) - self._add_to_class(self._insert_method_name, _insert_child) - - def _add_list_getter(self): - """ - Add a read-only ``{prop_name}_lst`` property to the element class to - retrieve a list of child elements matching this type. - """ - prop_name = '%s_lst' % self._prop_name - property_ = property(self._list_getter, None, None) - setattr(self._element_cls, prop_name, property_) - - @lazyproperty - def _add_method_name(self): - return '_add_%s' % self._prop_name - - def _add_public_adder(self): - """ - Add a public ``add_x()`` method to the parent element class. - """ - def add_child(obj): - private_add_method = getattr(obj, self._add_method_name) - child = private_add_method() - return child - - add_child.__doc__ = ( - 'Add a new ``<%s>`` child element unconditionally, inserted in t' - 'he correct sequence.' % self._nsptagname - ) - self._add_to_class(self._public_add_method_name, add_child) - - def _add_to_class(self, name, method): - """ - Add *method* to the target class as *name*, unless *name* is already - defined on the class. - """ - if hasattr(self._element_cls, name): - return - setattr(self._element_cls, name, method) - - @property - def _creator(self): - """ - Return a function object that creates a new, empty element of the - right type, having no attributes. - """ - def new_child_element(obj): - return OxmlElement(self._nsptagname) - return new_child_element - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. This default getter returns the child element with - matching tag name or |None| if not present. - """ - def get_child_element(obj): - return obj.find(qn(self._nsptagname)) - get_child_element.__doc__ = ( - '``<%s>`` child element or |None| if not present.' - % self._nsptagname - ) - return get_child_element - - @lazyproperty - def _insert_method_name(self): - return '_insert_%s' % self._prop_name - - @property - def _list_getter(self): - """ - Return a function object suitable for the "get" side of a list - property descriptor. - """ - def get_child_element_list(obj): - return obj.findall(qn(self._nsptagname)) - get_child_element_list.__doc__ = ( - 'A list containing each of the ``<%s>`` child elements, in the o' - 'rder they appear.' % self._nsptagname - ) - return get_child_element_list - - @lazyproperty - def _public_add_method_name(self): - """ - add_childElement() is public API for a repeating element, allowing - new elements to be added to the sequence. May be overridden to - provide a friendlier API to clients having domain appropriate - parameter names for required attributes. - """ - return 'add_%s' % self._prop_name - - @lazyproperty - def _remove_method_name(self): - return '_remove_%s' % self._prop_name - - @lazyproperty - def _new_method_name(self): - return '_new_%s' % self._prop_name - - -class Choice(_BaseChildElement): - """ - Defines a child element belonging to a group, only one of which may - appear as a child. - """ - @property - def nsptagname(self): - return self._nsptagname - - def populate_class_members( - self, element_cls, group_prop_name, successors): - """ - Add the appropriate methods to *element_cls*. - """ - self._element_cls = element_cls - self._group_prop_name = group_prop_name - self._successors = successors - - self._add_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_get_or_change_to_method() - - def _add_get_or_change_to_method(self): - """ - Add a ``get_or_change_to_x()`` method to the element class for this - child element. - """ - def get_or_change_to_child(obj): - child = getattr(obj, self._prop_name) - if child is not None: - return child - remove_group_method = getattr( - obj, self._remove_group_method_name - ) - remove_group_method() - add_method = getattr(obj, self._add_method_name) - child = add_method() - return child - - get_or_change_to_child.__doc__ = ( - 'Return the ``<%s>`` child, replacing any other group element if' - ' found.' - ) % self._nsptagname - self._add_to_class( - self._get_or_change_to_method_name, get_or_change_to_child - ) - - @property - def _prop_name(self): - """ - Calculate property name from tag name, e.g. a:schemeClr -> schemeClr. - """ - if ':' in self._nsptagname: - start = self._nsptagname.index(':')+1 - else: - start = 0 - return self._nsptagname[start:] - - @lazyproperty - def _get_or_change_to_method_name(self): - return 'get_or_change_to_%s' % self._prop_name - - @lazyproperty - def _remove_group_method_name(self): - return '_remove_%s' % self._group_prop_name - - -class OneAndOnlyOne(_BaseChildElement): - """ - Defines a required child element for MetaOxmlElement. - """ - def __init__(self, nsptagname): - super(OneAndOnlyOne, self).__init__(nsptagname, None) - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(OneAndOnlyOne, self).populate_class_members( - element_cls, prop_name - ) - self._add_getter() - - @property - def _getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. - """ - def get_child_element(obj): - child = obj.find(qn(self._nsptagname)) - if child is None: - raise InvalidXmlError( - "required ``<%s>`` child element not present" % - self._nsptagname - ) - return child - - get_child_element.__doc__ = ( - 'Required ``<%s>`` child element.' - % self._nsptagname - ) - return get_child_element - - -class OneOrMore(_BaseChildElement): - """ - Defines a repeating child element for MetaOxmlElement that must appear at - least once. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(OneOrMore, self).populate_class_members( - element_cls, prop_name - ) - self._add_list_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_public_adder() - delattr(element_cls, prop_name) - - -class ZeroOrMore(_BaseChildElement): - """ - Defines an optional repeating child element for MetaOxmlElement. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrMore, self).populate_class_members( - element_cls, prop_name - ) - self._add_list_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_public_adder() - delattr(element_cls, prop_name) - - -class ZeroOrOne(_BaseChildElement): - """ - Defines an optional child element for MetaOxmlElement. - """ - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrOne, self).populate_class_members(element_cls, prop_name) - self._add_getter() - self._add_creator() - self._add_inserter() - self._add_adder() - self._add_get_or_adder() - self._add_remover() - - def _add_get_or_adder(self): - """ - Add a ``get_or_add_x()`` method to the element class for this - child element. - """ - def get_or_add_child(obj): - child = getattr(obj, self._prop_name) - if child is None: - add_method = getattr(obj, self._add_method_name) - child = add_method() - return child - get_or_add_child.__doc__ = ( - 'Return the ``<%s>`` child element, newly added if not present.' - ) % self._nsptagname - self._add_to_class(self._get_or_add_method_name, get_or_add_child) - - def _add_remover(self): - """ - Add a ``_remove_x()`` method to the element class for this child - element. - """ - def _remove_child(obj): - obj.remove_all(self._nsptagname) - _remove_child.__doc__ = ( - 'Remove all ``<%s>`` child elements.' - ) % self._nsptagname - self._add_to_class(self._remove_method_name, _remove_child) - - @lazyproperty - def _get_or_add_method_name(self): - return 'get_or_add_%s' % self._prop_name - - -class ZeroOrOneChoice(_BaseChildElement): - """ - Correspondes to an ``EG_*`` element group where at most one of its - members may appear as a child. - """ - def __init__(self, choices, successors=()): - self._choices = choices - self._successors = successors - - def populate_class_members(self, element_cls, prop_name): - """ - Add the appropriate methods to *element_cls*. - """ - super(ZeroOrOneChoice, self).populate_class_members( - element_cls, prop_name - ) - self._add_choice_getter() - for choice in self._choices: - choice.populate_class_members( - element_cls, self._prop_name, self._successors - ) - self._add_group_remover() - - def _add_choice_getter(self): - """ - Add a read-only ``{prop_name}`` property to the element class that - returns the present member of this group, or |None| if none are - present. - """ - property_ = property(self._choice_getter, None, None) - # assign unconditionally to overwrite element name definition - setattr(self._element_cls, self._prop_name, property_) - - def _add_group_remover(self): - """ - Add a ``_remove_eg_x()`` method to the element class for this choice - group. - """ - def _remove_choice_group(obj): - for tagname in self._member_nsptagnames: - obj.remove_all(tagname) - - _remove_choice_group.__doc__ = ( - 'Remove the current choice group child element if present.' - ) - self._add_to_class( - self._remove_choice_group_method_name, _remove_choice_group - ) - - @property - def _choice_getter(self): - """ - Return a function object suitable for the "get" side of the property - descriptor. - """ - def get_group_member_element(obj): - return obj.first_child_found_in(*self._member_nsptagnames) - get_group_member_element.__doc__ = ( - 'Return the child element belonging to this element group, or ' - '|None| if no member child is present.' - ) - return get_group_member_element - - @lazyproperty - def _member_nsptagnames(self): - """ - Sequence of namespace-prefixed tagnames, one for each of the member - elements of this choice group. - """ - return [choice.nsptagname for choice in self._choices] - - @lazyproperty - def _remove_choice_group_method_name(self): - return '_remove_%s' % self._prop_name - - -class _OxmlElementBase(etree.ElementBase): - """ - Effective base class for all custom element classes, to add standardized - behavior to all classes in one place. Actual inheritance is from - BaseOxmlElement below, needed to manage Python 2-3 metaclass declaration - compatibility. - """ - - __metaclass__ = MetaOxmlElement - - def __repr__(self): - return "<%s '<%s>' at 0x%0x>" % ( - self.__class__.__name__, self._nsptag, id(self) - ) - - def first_child_found_in(self, *tagnames): - """ - Return the first child found with tag in *tagnames*, or None if - not found. - """ - for tagname in tagnames: - child = self.find(qn(tagname)) - if child is not None: - return child - return None - - def insert_element_before(self, elm, *tagnames): - successor = self.first_child_found_in(*tagnames) - if successor is not None: - successor.addprevious(elm) - else: - self.append(elm) - return elm - - def remove_all(self, *tagnames): - """ - Remove all child elements whose tagname (e.g. 'a:p') appears in - *tagnames*. - """ - for tagname in tagnames: - matching = self.findall(qn(tagname)) - for child in matching: - self.remove(child) - - @property - def xml(self): - """ - Return XML string for this element, suitable for testing purposes. - Pretty printed for readability and without an XML declaration at the - top. - """ - return serialize_for_reading(self) - - def xpath(self, xpath_str): - """ - Override of ``lxml`` _Element.xpath() method to provide standard Open - XML namespace mapping (``nsmap``) in centralized location. - """ - return super(BaseOxmlElement, self).xpath( - xpath_str, namespaces=nsmap - ) - - @property - def _nsptag(self): - return NamespacePrefixedTag.from_clark_name(self.tag) - - -BaseOxmlElement = MetaOxmlElement( - 'BaseOxmlElement', (etree.ElementBase,), dict(_OxmlElementBase.__dict__) -) diff --git a/docx/package.py b/docx/package.py deleted file mode 100644 index 4c9a6f6a1..000000000 --- a/docx/package.py +++ /dev/null @@ -1,115 +0,0 @@ -# encoding: utf-8 - -""" -WordprocessingML Package class and related objects -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from docx.image.image import Image -from docx.opc.constants import RELATIONSHIP_TYPE as RT -from docx.opc.package import OpcPackage -from docx.opc.packuri import PackURI -from docx.parts.image import ImagePart -from docx.shared import lazyproperty - - -class Package(OpcPackage): - """ - Customizations specific to a WordprocessingML package. - """ - def after_unmarshal(self): - """ - Called by loading code after all parts and relationships have been - loaded, to afford the opportunity for any required post-processing. - """ - self._gather_image_parts() - - @lazyproperty - def image_parts(self): - """ - Collection of all image parts in this package. - """ - return ImageParts() - - def _gather_image_parts(self): - """ - Load the image part collection with all the image parts in package. - """ - for rel in self.iter_rels(): - if rel.is_external: - continue - if rel.reltype != RT.IMAGE: - continue - if rel.target_part in self.image_parts: - continue - self.image_parts.append(rel.target_part) - - -class ImageParts(object): - """ - Collection of |ImagePart| instances corresponding to each image part in - the package. - """ - def __init__(self): - super(ImageParts, self).__init__() - self._image_parts = [] - - def __contains__(self, item): - return self._image_parts.__contains__(item) - - def __iter__(self): - return self._image_parts.__iter__() - - def __len__(self): - return self._image_parts.__len__() - - def append(self, item): - self._image_parts.append(item) - - def get_or_add_image_part(self, image_descriptor): - """ - Return an |ImagePart| instance containing the image identified by - *image_descriptor*, newly created if a matching one is not present in - the collection. - """ - image = Image.from_file(image_descriptor) - matching_image_part = self._get_by_sha1(image.sha1) - if matching_image_part is not None: - return matching_image_part - return self._add_image_part(image) - - def _add_image_part(self, image): - """ - Return an |ImagePart| instance newly created from image and appended - to the collection. - """ - partname = self._next_image_partname(image.ext) - image_part = ImagePart.from_image(image, partname) - self.append(image_part) - return image_part - - def _get_by_sha1(self, sha1): - """ - Return the image part in this collection having a SHA1 hash matching - *sha1*, or |None| if not found. - """ - for image_part in self._image_parts: - if image_part.sha1 == sha1: - return image_part - return None - - def _next_image_partname(self, ext): - """ - The next available image partname, starting from - ``/word/media/image1.{ext}`` where unused numbers are reused. The - partname is unique by number, without regard to the extension. *ext* - does not include the leading period. - """ - def image_partname(n): - return PackURI('/word/media/image%d.%s' % (n, ext)) - used_numbers = [image_part.partname.idx for image_part in self] - for n in range(1, len(self)+1): - if n not in used_numbers: - return image_partname(n) - return image_partname(len(self)+1) diff --git a/docx/parts/document.py b/docx/parts/document.py deleted file mode 100644 index 7a23e9a5e..000000000 --- a/docx/parts/document.py +++ /dev/null @@ -1,172 +0,0 @@ -# encoding: utf-8 - -""" -|DocumentPart| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..document import Document -from .numbering import NumberingPart -from ..opc.constants import RELATIONSHIP_TYPE as RT -from ..opc.part import XmlPart -from ..oxml.shape import CT_Inline -from ..shape import InlineShapes -from ..shared import lazyproperty -from .settings import SettingsPart -from .styles import StylesPart - - -class DocumentPart(XmlPart): - """ - Main document part of a WordprocessingML (WML) package, aka a .docx file. - Acts as broker to other parts such as image, core properties, and style - parts. It also acts as a convenient delegate when a mid-document object - needs a service involving a remote ancestor. The `Parented.part` property - inherited by many content objects provides access to this part object for - that purpose. - """ - @property - def core_properties(self): - """ - A |CoreProperties| object providing read/write access to the core - properties of this document. - """ - return self.package.core_properties - - @property - def document(self): - """ - A |Document| object providing access to the content of this document. - """ - return Document(self._element, self) - - def get_or_add_image(self, image_descriptor): - """ - Return an (rId, image) 2-tuple for the image identified by - *image_descriptor*. *image* is an |Image| instance providing access - to the properties of the image, such as dimensions and image type. - *rId* is the key for the relationship between this document part and - the image part, reused if already present, newly created if not. - """ - image_part = self._package.image_parts.get_or_add_image_part( - image_descriptor - ) - rId = self.relate_to(image_part, RT.IMAGE) - return rId, image_part.image - - def get_style(self, style_id, style_type): - """ - Return the style in this document matching *style_id*. Returns the - default style for *style_type* if *style_id* is |None| or does not - match a defined style of *style_type*. - """ - return self.styles.get_by_id(style_id, style_type) - - def get_style_id(self, style_or_name, style_type): - """ - Return the style_id (|str|) of the style of *style_type* matching - *style_or_name*. Returns |None| if the style resolves to the default - style for *style_type* or if *style_or_name* is itself |None|. Raises - if *style_or_name* is a style of the wrong type or names a style not - present in the document. - """ - return self.styles.get_style_id(style_or_name, style_type) - - @lazyproperty - def inline_shapes(self): - """ - The |InlineShapes| instance containing the inline shapes in the - document. - """ - return InlineShapes(self._element.body, self) - - def new_pic_inline(self, image_descriptor, width, height): - """ - Return a newly-created `w:inline` element containing the image - specified by *image_descriptor* and scaled based on the values of - *width* and *height*. - """ - rId, image = self.get_or_add_image(image_descriptor) - cx, cy = image.scaled_dimensions(width, height) - shape_id, filename = self.next_id, image.filename - return CT_Inline.new_pic_inline(shape_id, rId, filename, cx, cy) - - @property - def next_id(self): - """ - The next available positive integer id value in this document. Gaps - in id sequence are filled. The id attribute value is unique in the - document, without regard to the element type it appears on. - """ - id_str_lst = self._element.xpath('//@id') - used_ids = [int(id_str) for id_str in id_str_lst if id_str.isdigit()] - for n in range(1, len(used_ids)+2): - if n not in used_ids: - return n - - @lazyproperty - def numbering_part(self): - """ - A |NumberingPart| object providing access to the numbering - definitions for this document. Creates an empty numbering part if one - is not present. - """ - try: - return self.part_related_by(RT.NUMBERING) - except KeyError: - numbering_part = NumberingPart.new() - self.relate_to(numbering_part, RT.NUMBERING) - return numbering_part - - def save(self, path_or_stream): - """ - Save this document to *path_or_stream*, which can be either a path to - a filesystem location (a string) or a file-like object. - """ - self.package.save(path_or_stream) - - @property - def settings(self): - """ - A |Settings| object providing access to the settings in the settings - part of this document. - """ - return self._settings_part.settings - - @property - def styles(self): - """ - A |Styles| object providing access to the styles in the styles part - of this document. - """ - return self._styles_part.styles - - @property - def _settings_part(self): - """ - A |SettingsPart| object providing access to the document-level - settings for this document. Creates a default settings part if one is - not present. - """ - try: - return self.part_related_by(RT.SETTINGS) - except KeyError: - settings_part = SettingsPart.default(self.package) - self.relate_to(settings_part, RT.SETTINGS) - return settings_part - - @property - def _styles_part(self): - """ - Instance of |StylesPart| for this document. Creates an empty styles - part if one is not present. - """ - try: - return self.part_related_by(RT.STYLES) - except KeyError: - styles_part = StylesPart.default(self.package) - self.relate_to(styles_part, RT.STYLES) - return styles_part diff --git a/docx/parts/image.py b/docx/parts/image.py deleted file mode 100644 index 6ece20d80..000000000 --- a/docx/parts/image.py +++ /dev/null @@ -1,89 +0,0 @@ -# encoding: utf-8 - -""" -The proxy class for an image part, and related objects. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import hashlib - -from docx.image.image import Image -from docx.opc.part import Part -from docx.shared import Emu, Inches - - -class ImagePart(Part): - """ - An image part. Corresponds to the target part of a relationship with type - RELATIONSHIP_TYPE.IMAGE. - """ - def __init__(self, partname, content_type, blob, image=None): - super(ImagePart, self).__init__(partname, content_type, blob) - self._image = image - - @property - def default_cx(self): - """ - Native width of this image, calculated from its width in pixels and - horizontal dots per inch (dpi). - """ - px_width = self.image.px_width - horz_dpi = self.image.horz_dpi - width_in_inches = px_width / horz_dpi - return Inches(width_in_inches) - - @property - def default_cy(self): - """ - Native height of this image, calculated from its height in pixels and - vertical dots per inch (dpi). - """ - px_height = self.image.px_height - horz_dpi = self.image.horz_dpi - height_in_emu = 914400 * px_height / horz_dpi - return Emu(height_in_emu) - - @property - def filename(self): - """ - Filename from which this image part was originally created. A generic - name, e.g. 'image.png', is substituted if no name is available, for - example when the image was loaded from an unnamed stream. In that - case a default extension is applied based on the detected MIME type - of the image. - """ - if self._image is not None: - return self._image.filename - return 'image.%s' % self.partname.ext - - @classmethod - def from_image(cls, image, partname): - """ - Return an |ImagePart| instance newly created from *image* and - assigned *partname*. - """ - return ImagePart(partname, image.content_type, image.blob, image) - - @property - def image(self): - if self._image is None: - self._image = Image.from_blob(self.blob) - return self._image - - @classmethod - def load(cls, partname, content_type, blob, package): - """ - Called by ``docx.opc.package.PartFactory`` to load an image part from - a package being opened by ``Document(...)`` call. - """ - return cls(partname, content_type, blob) - - @property - def sha1(self): - """ - SHA1 hash digest of the blob of this image part. - """ - return hashlib.sha1(self._blob).hexdigest() diff --git a/docx/parts/numbering.py b/docx/parts/numbering.py deleted file mode 100644 index e324c5aac..000000000 --- a/docx/parts/numbering.py +++ /dev/null @@ -1,47 +0,0 @@ -# encoding: utf-8 - -""" -|NumberingPart| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..opc.part import XmlPart -from ..shared import lazyproperty - - -class NumberingPart(XmlPart): - """ - Proxy for the numbering.xml part containing numbering definitions for - a document or glossary. - """ - @classmethod - def new(cls): - """ - Return newly created empty numbering part, containing only the root - ```` element. - """ - raise NotImplementedError - - @lazyproperty - def numbering_definitions(self): - """ - The |_NumberingDefinitions| instance containing the numbering - definitions ( element proxies) for this numbering part. - """ - return _NumberingDefinitions(self._element) - - -class _NumberingDefinitions(object): - """ - Collection of |_NumberingDefinition| instances corresponding to the - ```` elements in a numbering part. - """ - def __init__(self, numbering_elm): - super(_NumberingDefinitions, self).__init__() - self._numbering = numbering_elm - - def __len__(self): - return len(self._numbering.num_lst) diff --git a/docx/parts/settings.py b/docx/parts/settings.py deleted file mode 100644 index a701b1726..000000000 --- a/docx/parts/settings.py +++ /dev/null @@ -1,54 +0,0 @@ -# encoding: utf-8 - -""" -|SettingsPart| and closely related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import os - -from ..opc.constants import CONTENT_TYPE as CT -from ..opc.packuri import PackURI -from ..opc.part import XmlPart -from ..oxml import parse_xml -from ..settings import Settings - - -class SettingsPart(XmlPart): - """ - Document-level settings part of a WordprocessingML (WML) package. - """ - @classmethod - def default(cls, package): - """ - Return a newly created settings part, containing a default - `w:settings` element tree. - """ - partname = PackURI('/word/settings.xml') - content_type = CT.WML_SETTINGS - element = parse_xml(cls._default_settings_xml()) - return cls(partname, content_type, element, package) - - @property - def settings(self): - """ - A |Settings| proxy object for the `w:settings` element in this part, - containing the document-level settings for this document. - """ - return Settings(self.element) - - @classmethod - def _default_settings_xml(cls): - """ - Return a bytestream containing XML for a default settings part. - """ - path = os.path.join( - os.path.split(__file__)[0], '..', 'templates', - 'default-settings.xml' - ) - with open(path, 'rb') as f: - xml_bytes = f.read() - return xml_bytes diff --git a/docx/parts/styles.py b/docx/parts/styles.py deleted file mode 100644 index 00c7cb3c3..000000000 --- a/docx/parts/styles.py +++ /dev/null @@ -1,55 +0,0 @@ -# encoding: utf-8 - -""" -Provides StylesPart and related objects -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -import os - -from ..opc.constants import CONTENT_TYPE as CT -from ..opc.packuri import PackURI -from ..opc.part import XmlPart -from ..oxml import parse_xml -from ..styles.styles import Styles - - -class StylesPart(XmlPart): - """ - Proxy for the styles.xml part containing style definitions for a document - or glossary. - """ - @classmethod - def default(cls, package): - """ - Return a newly created styles part, containing a default set of - elements. - """ - partname = PackURI('/word/styles.xml') - content_type = CT.WML_STYLES - element = parse_xml(cls._default_styles_xml()) - return cls(partname, content_type, element, package) - - @property - def styles(self): - """ - The |_Styles| instance containing the styles ( element - proxies) for this styles part. - """ - return Styles(self.element) - - @classmethod - def _default_styles_xml(cls): - """ - Return a bytestream containing XML for a default styles part. - """ - path = os.path.join( - os.path.split(__file__)[0], '..', 'templates', - 'default-styles.xml' - ) - with open(path, 'rb') as f: - xml_bytes = f.read() - return xml_bytes diff --git a/docx/section.py b/docx/section.py deleted file mode 100644 index 16221243b..000000000 --- a/docx/section.py +++ /dev/null @@ -1,185 +0,0 @@ -# encoding: utf-8 - -""" -The |Section| object and related proxy classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from collections import Sequence - - -class Sections(Sequence): - """ - Sequence of |Section| objects corresponding to the sections in the - document. Supports ``len()``, iteration, and indexed access. - """ - def __init__(self, document_elm): - super(Sections, self).__init__() - self._document_elm = document_elm - - def __getitem__(self, key): - if isinstance(key, slice): - sectPr_lst = self._document_elm.sectPr_lst[key] - return [Section(sectPr) for sectPr in sectPr_lst] - sectPr = self._document_elm.sectPr_lst[key] - return Section(sectPr) - - def __iter__(self): - for sectPr in self._document_elm.sectPr_lst: - yield Section(sectPr) - - def __len__(self): - return len(self._document_elm.sectPr_lst) - - -class Section(object): - """ - Document section, providing access to section and page setup settings. - """ - def __init__(self, sectPr): - super(Section, self).__init__() - self._sectPr = sectPr - - @property - def bottom_margin(self): - """ - |Length| object representing the bottom margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.bottom_margin - - @bottom_margin.setter - def bottom_margin(self, value): - self._sectPr.bottom_margin = value - - @property - def footer_distance(self): - """ - |Length| object representing the distance from the bottom edge of the - page to the bottom edge of the footer. |None| if no setting is present - in the XML. - """ - return self._sectPr.footer - - @footer_distance.setter - def footer_distance(self, value): - self._sectPr.footer = value - - @property - def gutter(self): - """ - |Length| object representing the page gutter size in English Metric - Units for all pages in this section. The page gutter is extra spacing - added to the *inner* margin to ensure even margins after page - binding. - """ - return self._sectPr.gutter - - @gutter.setter - def gutter(self, value): - self._sectPr.gutter = value - - @property - def header_distance(self): - """ - |Length| object representing the distance from the top edge of the - page to the top edge of the header. |None| if no setting is present - in the XML. - """ - return self._sectPr.header - - @header_distance.setter - def header_distance(self, value): - self._sectPr.header = value - - @property - def left_margin(self): - """ - |Length| object representing the left margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.left_margin - - @left_margin.setter - def left_margin(self, value): - self._sectPr.left_margin = value - - @property - def orientation(self): - """ - Member of the :ref:`WdOrientation` enumeration specifying the page - orientation for this section, one of ``WD_ORIENT.PORTRAIT`` or - ``WD_ORIENT.LANDSCAPE``. - """ - return self._sectPr.orientation - - @orientation.setter - def orientation(self, value): - self._sectPr.orientation = value - - @property - def page_height(self): - """ - Total page height used for this section, inclusive of all edge spacing - values such as margins. Page orientation is taken into account, so - for example, its expected value would be ``Inches(8.5)`` for - letter-sized paper when orientation is landscape. - """ - return self._sectPr.page_height - - @page_height.setter - def page_height(self, value): - self._sectPr.page_height = value - - @property - def page_width(self): - """ - Total page width used for this section, inclusive of all edge spacing - values such as margins. Page orientation is taken into account, so - for example, its expected value would be ``Inches(11)`` for - letter-sized paper when orientation is landscape. - """ - return self._sectPr.page_width - - @page_width.setter - def page_width(self, value): - self._sectPr.page_width = value - - @property - def right_margin(self): - """ - |Length| object representing the right margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.right_margin - - @right_margin.setter - def right_margin(self, value): - self._sectPr.right_margin = value - - @property - def start_type(self): - """ - The member of the :ref:`WdSectionStart` enumeration corresponding to - the initial break behavior of this section, e.g. - ``WD_SECTION.ODD_PAGE`` if the section should begin on the next odd - page. - """ - return self._sectPr.start_type - - @start_type.setter - def start_type(self, value): - self._sectPr.start_type = value - - @property - def top_margin(self): - """ - |Length| object representing the top margin for all pages in this - section in English Metric Units. - """ - return self._sectPr.top_margin - - @top_margin.setter - def top_margin(self, value): - self._sectPr.top_margin = value diff --git a/docx/settings.py b/docx/settings.py deleted file mode 100644 index 737146697..000000000 --- a/docx/settings.py +++ /dev/null @@ -1,20 +0,0 @@ -# encoding: utf-8 - -""" -Settings object, providing access to document-level settings. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from .shared import ElementProxy - - -class Settings(ElementProxy): - """ - Provides access to document-level settings for a document. Accessed using - the :attr:`.Document.settings` property. - """ - - __slots__ = () diff --git a/docx/shared.py b/docx/shared.py deleted file mode 100644 index 919964325..000000000 --- a/docx/shared.py +++ /dev/null @@ -1,250 +0,0 @@ -# encoding: utf-8 - -""" -Objects shared by docx modules. -""" - -from __future__ import absolute_import, print_function, unicode_literals - - -class Length(int): - """ - Base class for length constructor classes Inches, Cm, Mm, Px, and Emu. - Behaves as an int count of English Metric Units, 914,400 to the inch, - 36,000 to the mm. Provides convenience unit conversion methods in the form - of read-only properties. Immutable. - """ - _EMUS_PER_INCH = 914400 - _EMUS_PER_CM = 360000 - _EMUS_PER_MM = 36000 - _EMUS_PER_PT = 12700 - _EMUS_PER_TWIP = 635 - - def __new__(cls, emu): - return int.__new__(cls, emu) - - @property - def cm(self): - """ - The equivalent length expressed in centimeters (float). - """ - return self / float(self._EMUS_PER_CM) - - @property - def emu(self): - """ - The equivalent length expressed in English Metric Units (int). - """ - return self - - @property - def inches(self): - """ - The equivalent length expressed in inches (float). - """ - return self / float(self._EMUS_PER_INCH) - - @property - def mm(self): - """ - The equivalent length expressed in millimeters (float). - """ - return self / float(self._EMUS_PER_MM) - - @property - def pt(self): - """ - Floating point length in points - """ - return self / float(self._EMUS_PER_PT) - - @property - def twips(self): - """ - The equivalent length expressed in twips (int). - """ - return int(round(self / float(self._EMUS_PER_TWIP))) - - -class Inches(Length): - """ - Convenience constructor for length in inches, e.g. - ``width = Inches(0.5)``. - """ - def __new__(cls, inches): - emu = int(inches * Length._EMUS_PER_INCH) - return Length.__new__(cls, emu) - - -class Cm(Length): - """ - Convenience constructor for length in centimeters, e.g. - ``height = Cm(12)``. - """ - def __new__(cls, cm): - emu = int(cm * Length._EMUS_PER_CM) - return Length.__new__(cls, emu) - - -class Emu(Length): - """ - Convenience constructor for length in English Metric Units, e.g. - ``width = Emu(457200)``. - """ - def __new__(cls, emu): - return Length.__new__(cls, int(emu)) - - -class Mm(Length): - """ - Convenience constructor for length in millimeters, e.g. - ``width = Mm(240.5)``. - """ - def __new__(cls, mm): - emu = int(mm * Length._EMUS_PER_MM) - return Length.__new__(cls, emu) - - -class Pt(Length): - """ - Convenience value class for specifying a length in points - """ - def __new__(cls, points): - emu = int(points * Length._EMUS_PER_PT) - return Length.__new__(cls, emu) - - -class Twips(Length): - """ - Convenience constructor for length in twips, e.g. ``width = Twips(42)``. - A twip is a twentieth of a point, 635 EMU. - """ - def __new__(cls, twips): - emu = int(twips * Length._EMUS_PER_TWIP) - return Length.__new__(cls, emu) - - -class RGBColor(tuple): - """ - Immutable value object defining a particular RGB color. - """ - def __new__(cls, r, g, b): - msg = 'RGBColor() takes three integer values 0-255' - for val in (r, g, b): - if not isinstance(val, int) or val < 0 or val > 255: - raise ValueError(msg) - return super(RGBColor, cls).__new__(cls, (r, g, b)) - - def __repr__(self): - return 'RGBColor(0x%02x, 0x%02x, 0x%02x)' % self - - def __str__(self): - """ - Return a hex string rgb value, like '3C2F80' - """ - return '%02X%02X%02X' % self - - @classmethod - def from_string(cls, rgb_hex_str): - """ - Return a new instance from an RGB color hex string like ``'3C2F80'``. - """ - r = int(rgb_hex_str[:2], 16) - g = int(rgb_hex_str[2:4], 16) - b = int(rgb_hex_str[4:], 16) - return cls(r, g, b) - - -def lazyproperty(f): - """ - @lazyprop decorator. Decorated method will be called only on first access - to calculate a cached property value. After that, the cached value is - returned. - """ - cache_attr_name = '_%s' % f.__name__ # like '_foobar' for prop 'foobar' - docstring = f.__doc__ - - def get_prop_value(obj): - try: - return getattr(obj, cache_attr_name) - except AttributeError: - value = f(obj) - setattr(obj, cache_attr_name, value) - return value - - return property(get_prop_value, doc=docstring) - - -def write_only_property(f): - """ - @write_only_property decorator. Creates a property (descriptor attribute) - that accepts assignment, but not getattr (use in an expression). - """ - docstring = f.__doc__ - - return property(fset=f, doc=docstring) - - -class ElementProxy(object): - """ - Base class for lxml element proxy classes. An element proxy class is one - whose primary responsibilities are fulfilled by manipulating the - attributes and child elements of an XML element. They are the most common - type of class in python-docx other than custom element (oxml) classes. - """ - - __slots__ = ('_element', '_parent') - - def __init__(self, element, parent=None): - self._element = element - self._parent = parent - - def __eq__(self, other): - """ - Return |True| if this proxy object refers to the same oxml element as - does *other*. ElementProxy objects are value objects and should - maintain no mutable local state. Equality for proxy objects is - defined as referring to the same XML element, whether or not they are - the same proxy object instance. - """ - if not isinstance(other, ElementProxy): - return False - return self._element is other._element - - def __ne__(self, other): - if not isinstance(other, ElementProxy): - return True - return self._element is not other._element - - @property - def element(self): - """ - The lxml element proxied by this object. - """ - return self._element - - @property - def part(self): - """ - The package part containing this object - """ - return self._parent.part - - -class Parented(object): - """ - Provides common services for document elements that occur below a part - but may occasionally require an ancestor object to provide a service, - such as add or drop a relationship. Provides ``self._parent`` attribute - to subclasses. - """ - def __init__(self, parent): - super(Parented, self).__init__() - self._parent = parent - - @property - def part(self): - """ - The package part containing this object - """ - return self._parent.part diff --git a/docx/styles/__init__.py b/docx/styles/__init__.py deleted file mode 100644 index 3eff43e55..000000000 --- a/docx/styles/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -# encoding: utf-8 - -""" -Sub-package module for docx.styles sub-package. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - - -class BabelFish(object): - """ - Translates special-case style names from UI name (e.g. Heading 1) to - internal/styles.xml name (e.g. heading 1) and back. - """ - - style_aliases = ( - ('Caption', 'caption'), - ('Heading 1', 'heading 1'), - ('Heading 2', 'heading 2'), - ('Heading 3', 'heading 3'), - ('Heading 4', 'heading 4'), - ('Heading 5', 'heading 5'), - ('Heading 6', 'heading 6'), - ('Heading 7', 'heading 7'), - ('Heading 8', 'heading 8'), - ('Heading 9', 'heading 9'), - ) - - internal_style_names = dict(style_aliases) - ui_style_names = dict((item[1], item[0]) for item in style_aliases) - - @classmethod - def ui2internal(cls, ui_style_name): - """ - Return the internal style name corresponding to *ui_style_name*, such - as 'heading 1' for 'Heading 1'. - """ - return cls.internal_style_names.get(ui_style_name, ui_style_name) - - @classmethod - def internal2ui(cls, internal_style_name): - """ - Return the user interface style name corresponding to - *internal_style_name*, such as 'Heading 1' for 'heading 1'. - """ - return cls.ui_style_names.get( - internal_style_name, internal_style_name - ) diff --git a/docx/styles/latent.py b/docx/styles/latent.py deleted file mode 100644 index 99b1514ff..000000000 --- a/docx/styles/latent.py +++ /dev/null @@ -1,224 +0,0 @@ -# encoding: utf-8 - -""" -Latent style-related objects. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from . import BabelFish -from ..shared import ElementProxy - - -class LatentStyles(ElementProxy): - """ - Provides access to the default behaviors for latent styles in this - document and to the collection of |_LatentStyle| objects that define - overrides of those defaults for a particular named latent style. - """ - - __slots__ = () - - def __getitem__(self, key): - """ - Enables dictionary-style access to a latent style by name. - """ - style_name = BabelFish.ui2internal(key) - lsdException = self._element.get_by_name(style_name) - if lsdException is None: - raise KeyError("no latent style with name '%s'" % key) - return _LatentStyle(lsdException) - - def __iter__(self): - return (_LatentStyle(ls) for ls in self._element.lsdException_lst) - - def __len__(self): - return len(self._element.lsdException_lst) - - def add_latent_style(self, name): - """ - Return a newly added |_LatentStyle| object to override the inherited - defaults defined in this latent styles object for the built-in style - having *name*. - """ - lsdException = self._element.add_lsdException() - lsdException.name = BabelFish.ui2internal(name) - return _LatentStyle(lsdException) - - @property - def default_priority(self): - """ - Integer between 0 and 99 inclusive specifying the default sort order - for latent styles in style lists and the style gallery. |None| if no - value is assigned, which causes Word to use the default value 99. - """ - return self._element.defUIPriority - - @default_priority.setter - def default_priority(self, value): - self._element.defUIPriority = value - - @property - def default_to_hidden(self): - """ - Boolean specifying whether the default behavior for latent styles is - to be hidden. A hidden style does not appear in the recommended list - or in the style gallery. - """ - return self._element.bool_prop('defSemiHidden') - - @default_to_hidden.setter - def default_to_hidden(self, value): - self._element.set_bool_prop('defSemiHidden', value) - - @property - def default_to_locked(self): - """ - Boolean specifying whether the default behavior for latent styles is - to be locked. A locked style does not appear in the styles panel or - the style gallery and cannot be applied to document content. This - behavior is only active when formatting protection is turned on for - the document (via the Developer menu). - """ - return self._element.bool_prop('defLockedState') - - @default_to_locked.setter - def default_to_locked(self, value): - self._element.set_bool_prop('defLockedState', value) - - @property - def default_to_quick_style(self): - """ - Boolean specifying whether the default behavior for latent styles is - to appear in the style gallery when not hidden. - """ - return self._element.bool_prop('defQFormat') - - @default_to_quick_style.setter - def default_to_quick_style(self, value): - self._element.set_bool_prop('defQFormat', value) - - @property - def default_to_unhide_when_used(self): - """ - Boolean specifying whether the default behavior for latent styles is - to be unhidden when first applied to content. - """ - return self._element.bool_prop('defUnhideWhenUsed') - - @default_to_unhide_when_used.setter - def default_to_unhide_when_used(self, value): - self._element.set_bool_prop('defUnhideWhenUsed', value) - - @property - def load_count(self): - """ - Integer specifying the number of built-in styles to initialize to the - defaults specified in this |LatentStyles| object. |None| if there is - no setting in the XML (very uncommon). The default Word 2011 template - sets this value to 276, accounting for the built-in styles in Word - 2010. - """ - return self._element.count - - @load_count.setter - def load_count(self, value): - self._element.count = value - - -class _LatentStyle(ElementProxy): - """ - Proxy for an `w:lsdException` element, which specifies display behaviors - for a built-in style when no definition for that style is stored yet in - the `styles.xml` part. The values in this element override the defaults - specified in the parent `w:latentStyles` element. - """ - - __slots__ = () - - def delete(self): - """ - Remove this latent style definition such that the defaults defined in - the containing |LatentStyles| object provide the effective value for - each of its attributes. Attempting to access any attributes on this - object after calling this method will raise |AttributeError|. - """ - self._element.delete() - self._element = None - - @property - def hidden(self): - """ - Tri-state value specifying whether this latent style should appear in - the recommended list. |None| indicates the effective value is - inherited from the parent ```` element. - """ - return self._element.on_off_prop('semiHidden') - - @hidden.setter - def hidden(self, value): - self._element.set_on_off_prop('semiHidden', value) - - @property - def locked(self): - """ - Tri-state value specifying whether this latent styles is locked. - A locked style does not appear in the styles panel or the style - gallery and cannot be applied to document content. This behavior is - only active when formatting protection is turned on for the document - (via the Developer menu). - """ - return self._element.on_off_prop('locked') - - @locked.setter - def locked(self, value): - self._element.set_on_off_prop('locked', value) - - @property - def name(self): - """ - The name of the built-in style this exception applies to. - """ - return BabelFish.internal2ui(self._element.name) - - @property - def priority(self): - """ - The integer sort key for this latent style in the Word UI. - """ - return self._element.uiPriority - - @priority.setter - def priority(self, value): - self._element.uiPriority = value - - @property - def quick_style(self): - """ - Tri-state value specifying whether this latent style should appear in - the Word styles gallery when not hidden. |None| indicates the - effective value should be inherited from the default values in its - parent |LatentStyles| object. - """ - return self._element.on_off_prop('qFormat') - - @quick_style.setter - def quick_style(self, value): - self._element.set_on_off_prop('qFormat', value) - - @property - def unhide_when_used(self): - """ - Tri-state value specifying whether this style should have its - :attr:`hidden` attribute set |False| the next time the style is - applied to content. |None| indicates the effective value should be - inherited from the default specified by its parent |LatentStyles| - object. - """ - return self._element.on_off_prop('unhideWhenUsed') - - @unhide_when_used.setter - def unhide_when_used(self, value): - self._element.set_on_off_prop('unhideWhenUsed', value) diff --git a/docx/styles/style.py b/docx/styles/style.py deleted file mode 100644 index 24371b231..000000000 --- a/docx/styles/style.py +++ /dev/null @@ -1,265 +0,0 @@ -# encoding: utf-8 - -""" -Style object hierarchy. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from . import BabelFish -from ..enum.style import WD_STYLE_TYPE -from ..shared import ElementProxy -from ..text.font import Font -from ..text.parfmt import ParagraphFormat - - -def StyleFactory(style_elm): - """ - Return a style object of the appropriate |BaseStyle| subclass, according - to the type of *style_elm*. - """ - style_cls = { - WD_STYLE_TYPE.PARAGRAPH: _ParagraphStyle, - WD_STYLE_TYPE.CHARACTER: _CharacterStyle, - WD_STYLE_TYPE.TABLE: _TableStyle, - WD_STYLE_TYPE.LIST: _NumberingStyle - }[style_elm.type] - - return style_cls(style_elm) - - -class BaseStyle(ElementProxy): - """ - Base class for the various types of style object, paragraph, character, - table, and numbering. These properties and methods are inherited by all - style objects. - """ - - __slots__ = () - - @property - def builtin(self): - """ - Read-only. |True| if this style is a built-in style. |False| - indicates it is a custom (user-defined) style. Note this value is - based on the presence of a `customStyle` attribute in the XML, not on - specific knowledge of which styles are built into Word. - """ - return not self._element.customStyle - - def delete(self): - """ - Remove this style definition from the document. Note that calling - this method does not remove or change the style applied to any - document content. Content items having the deleted style will be - rendered using the default style, as is any content with a style not - defined in the document. - """ - self._element.delete() - self._element = None - - @property - def hidden(self): - """ - |True| if display of this style in the style gallery and list of - recommended styles is suppressed. |False| otherwise. In order to be - shown in the style gallery, this value must be |False| and - :attr:`.quick_style` must be |True|. - """ - return self._element.semiHidden_val - - @hidden.setter - def hidden(self, value): - self._element.semiHidden_val = value - - @property - def locked(self): - """ - Read/write Boolean. |True| if this style is locked. A locked style - does not appear in the styles panel or the style gallery and cannot - be applied to document content. This behavior is only active when - formatting protection is turned on for the document (via the - Developer menu). - """ - return self._element.locked_val - - @locked.setter - def locked(self, value): - self._element.locked_val = value - - @property - def name(self): - """ - The UI name of this style. - """ - name = self._element.name_val - if name is None: - return None - return BabelFish.internal2ui(name) - - @name.setter - def name(self, value): - self._element.name_val = value - - @property - def priority(self): - """ - The integer sort key governing display sequence of this style in the - Word UI. |None| indicates no setting is defined, causing Word to use - the default value of 0. Style name is used as a secondary sort key to - resolve ordering of styles having the same priority value. - """ - return self._element.uiPriority_val - - @priority.setter - def priority(self, value): - self._element.uiPriority_val = value - - @property - def quick_style(self): - """ - |True| if this style should be displayed in the style gallery when - :attr:`.hidden` is |False|. Read/write Boolean. - """ - return self._element.qFormat_val - - @quick_style.setter - def quick_style(self, value): - self._element.qFormat_val = value - - @property - def style_id(self): - """ - The unique key name (string) for this style. This value is subject to - rewriting by Word and should generally not be changed unless you are - familiar with the internals involved. - """ - return self._element.styleId - - @style_id.setter - def style_id(self, value): - self._element.styleId = value - - @property - def type(self): - """ - Member of :ref:`WdStyleType` corresponding to the type of this style, - e.g. ``WD_STYLE_TYPE.PARAGRAPH``. - """ - type = self._element.type - if type is None: - return WD_STYLE_TYPE.PARAGRAPH - return type - - @property - def unhide_when_used(self): - """ - |True| if an application should make this style visible the next time - it is applied to content. False otherwise. Note that |docx| does not - automatically unhide a style having |True| for this attribute when it - is applied to content. - """ - return self._element.unhideWhenUsed_val - - @unhide_when_used.setter - def unhide_when_used(self, value): - self._element.unhideWhenUsed_val = value - - -class _CharacterStyle(BaseStyle): - """ - A character style. A character style is applied to a |Run| object and - primarily provides character-level formatting via the |Font| object in - its :attr:`.font` property. - """ - - __slots__ = () - - @property - def base_style(self): - """ - Style object this style inherits from or |None| if this style is - not based on another style. - """ - base_style = self._element.base_style - if base_style is None: - return None - return StyleFactory(base_style) - - @base_style.setter - def base_style(self, style): - style_id = style.style_id if style is not None else None - self._element.basedOn_val = style_id - - @property - def font(self): - """ - The |Font| object providing access to the character formatting - properties for this style, such as font name and size. - """ - return Font(self._element) - - -class _ParagraphStyle(_CharacterStyle): - """ - A paragraph style. A paragraph style provides both character formatting - and paragraph formatting such as indentation and line-spacing. - """ - - __slots__ = () - - def __repr__(self): - return '_ParagraphStyle(\'%s\') id: %s' % (self.name, id(self)) - - @property - def next_paragraph_style(self): - """ - |_ParagraphStyle| object representing the style to be applied - automatically to a new paragraph inserted after a paragraph of this - style. Returns self if no next paragraph style is defined. Assigning - |None| or *self* removes the setting such that new paragraphs are - created using this same style. - """ - next_style_elm = self._element.next_style - if next_style_elm is None: - return self - if next_style_elm.type != WD_STYLE_TYPE.PARAGRAPH: - return self - return StyleFactory(next_style_elm) - - @next_paragraph_style.setter - def next_paragraph_style(self, style): - if style is None or style.style_id == self.style_id: - self._element._remove_next() - else: - self._element.get_or_add_next().val = style.style_id - - @property - def paragraph_format(self): - """ - The |ParagraphFormat| object providing access to the paragraph - formatting properties for this style such as indentation. - """ - return ParagraphFormat(self._element) - - -class _TableStyle(_ParagraphStyle): - """ - A table style. A table style provides character and paragraph formatting - for its contents as well as special table formatting properties. - """ - - __slots__ = () - - def __repr__(self): - return '_TableStyle(\'%s\') id: %s' % (self.name, id(self)) - - -class _NumberingStyle(BaseStyle): - """ - A numbering style. Not yet implemented. - """ - - __slots__ = () diff --git a/docx/styles/styles.py b/docx/styles/styles.py deleted file mode 100644 index eabe53b20..000000000 --- a/docx/styles/styles.py +++ /dev/null @@ -1,157 +0,0 @@ -# encoding: utf-8 - -""" -Styles object, container for all objects in the styles part. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from warnings import warn - -from . import BabelFish -from .latent import LatentStyles -from ..shared import ElementProxy -from .style import BaseStyle, StyleFactory - - -class Styles(ElementProxy): - """ - A collection providing access to the styles defined in a document. - Accessed using the :attr:`.Document.styles` property. Supports ``len()``, - iteration, and dictionary-style access by style name. - """ - - __slots__ = () - - def __contains__(self, name): - """ - Enables `in` operator on style name. - """ - internal_name = BabelFish.ui2internal(name) - for style in self._element.style_lst: - if style.name_val == internal_name: - return True - return False - - def __getitem__(self, key): - """ - Enables dictionary-style access by UI name. Lookup by style id is - deprecated, triggers a warning, and will be removed in a near-future - release. - """ - style_elm = self._element.get_by_name(BabelFish.ui2internal(key)) - if style_elm is not None: - return StyleFactory(style_elm) - - style_elm = self._element.get_by_id(key) - if style_elm is not None: - msg = ( - 'style lookup by style_id is deprecated. Use style name as ' - 'key instead.' - ) - warn(msg, UserWarning) - return StyleFactory(style_elm) - - raise KeyError("no style with name '%s'" % key) - - def __iter__(self): - return (StyleFactory(style) for style in self._element.style_lst) - - def __len__(self): - return len(self._element.style_lst) - - def add_style(self, name, style_type, builtin=False): - """ - Return a newly added style object of *style_type* and identified - by *name*. A builtin style can be defined by passing True for the - optional *builtin* argument. - """ - style_name = BabelFish.ui2internal(name) - if style_name in self: - raise ValueError("document already contains style '%s'" % name) - style = self._element.add_style_of_type( - style_name, style_type, builtin - ) - return StyleFactory(style) - - def default(self, style_type): - """ - Return the default style for *style_type* or |None| if no default is - defined for that type (not common). - """ - style = self._element.default_for(style_type) - if style is None: - return None - return StyleFactory(style) - - def get_by_id(self, style_id, style_type): - """ - Return the style of *style_type* matching *style_id*. Returns the - default for *style_type* if *style_id* is not found or is |None|, or - if the style having *style_id* is not of *style_type*. - """ - if style_id is None: - return self.default(style_type) - return self._get_by_id(style_id, style_type) - - def get_style_id(self, style_or_name, style_type): - """ - Return the id of the style corresponding to *style_or_name*, or - |None| if *style_or_name* is |None|. If *style_or_name* is not - a style object, the style is looked up using *style_or_name* as - a style name, raising |ValueError| if no style with that name is - defined. Raises |ValueError| if the target style is not of - *style_type*. - """ - if style_or_name is None: - return None - elif isinstance(style_or_name, BaseStyle): - return self._get_style_id_from_style(style_or_name, style_type) - else: - return self._get_style_id_from_name(style_or_name, style_type) - - @property - def latent_styles(self): - """ - A |LatentStyles| object providing access to the default behaviors for - latent styles and the collection of |_LatentStyle| objects that - define overrides of those defaults for a particular named latent - style. - """ - return LatentStyles(self._element.get_or_add_latentStyles()) - - def _get_by_id(self, style_id, style_type): - """ - Return the style of *style_type* matching *style_id*. Returns the - default for *style_type* if *style_id* is not found or if the style - having *style_id* is not of *style_type*. - """ - style = self._element.get_by_id(style_id) - if style is None or style.type != style_type: - return self.default(style_type) - return StyleFactory(style) - - def _get_style_id_from_name(self, style_name, style_type): - """ - Return the id of the style of *style_type* corresponding to - *style_name*. Returns |None| if that style is the default style for - *style_type*. Raises |ValueError| if the named style is not found in - the document or does not match *style_type*. - """ - return self._get_style_id_from_style(self[style_name], style_type) - - def _get_style_id_from_style(self, style, style_type): - """ - Return the id of *style*, or |None| if it is the default style of - *style_type*. Raises |ValueError| if style is not of *style_type*. - """ - if style.type != style_type: - raise ValueError( - "assigned style is type %s, need type %s" % - (style.type, style_type) - ) - if style == self.default(style_type): - return None - return style.style_id diff --git a/docx/table.py b/docx/table.py deleted file mode 100644 index d0b472fc8..000000000 --- a/docx/table.py +++ /dev/null @@ -1,427 +0,0 @@ -# encoding: utf-8 - -""" -The |Table| object and related proxy classes. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from .blkcntnr import BlockItemContainer -from .enum.style import WD_STYLE_TYPE -from .oxml.simpletypes import ST_Merge -from .shared import Inches, lazyproperty, Parented - - -class Table(Parented): - """ - Proxy class for a WordprocessingML ```` element. - """ - def __init__(self, tbl, parent): - super(Table, self).__init__(parent) - self._element = self._tbl = tbl - - def add_column(self, width): - """ - Return a |_Column| object of *width*, newly added rightmost to the - table. - """ - tblGrid = self._tbl.tblGrid - gridCol = tblGrid.add_gridCol() - gridCol.w = width - for tr in self._tbl.tr_lst: - tc = tr.add_tc() - tc.width = width - return _Column(gridCol, self) - - def add_row(self): - """ - Return a |_Row| instance, newly added bottom-most to the table. - """ - tbl = self._tbl - tr = tbl.add_tr() - for gridCol in tbl.tblGrid.gridCol_lst: - tc = tr.add_tc() - tc.width = gridCol.w - return _Row(tr, self) - - @property - def alignment(self): - """ - Read/write. A member of :ref:`WdRowAlignment` or None, specifying the - positioning of this table between the page margins. |None| if no - setting is specified, causing the effective value to be inherited - from the style hierarchy. - """ - return self._tblPr.alignment - - @alignment.setter - def alignment(self, value): - self._tblPr.alignment = value - - @property - def autofit(self): - """ - |True| if column widths can be automatically adjusted to improve the - fit of cell contents. |False| if table layout is fixed. Column widths - are adjusted in either case if total column width exceeds page width. - Read/write boolean. - """ - return self._tblPr.autofit - - @autofit.setter - def autofit(self, value): - self._tblPr.autofit = value - - def cell(self, row_idx, col_idx): - """ - Return |_Cell| instance correponding to table cell at *row_idx*, - *col_idx* intersection, where (0, 0) is the top, left-most cell. - """ - cell_idx = col_idx + (row_idx * self._column_count) - return self._cells[cell_idx] - - def column_cells(self, column_idx): - """ - Sequence of cells in the column at *column_idx* in this table. - """ - cells = self._cells - idxs = range(column_idx, len(cells), self._column_count) - return [cells[idx] for idx in idxs] - - @lazyproperty - def columns(self): - """ - |_Columns| instance representing the sequence of columns in this - table. - """ - return _Columns(self._tbl, self) - - def row_cells(self, row_idx): - """ - Sequence of cells in the row at *row_idx* in this table. - """ - column_count = self._column_count - start = row_idx * column_count - end = start + column_count - return self._cells[start:end] - - @lazyproperty - def rows(self): - """ - |_Rows| instance containing the sequence of rows in this table. - """ - return _Rows(self._tbl, self) - - @property - def style(self): - """ - Read/write. A |_TableStyle| object representing the style applied to - this table. The default table style for the document (often `Normal - Table`) is returned if the table has no directly-applied style. - Assigning |None| to this property removes any directly-applied table - style causing it to inherit the default table style of the document. - Note that the style name of a table style differs slightly from that - displayed in the user interface; a hyphen, if it appears, must be - removed. For example, `Light Shading - Accent 1` becomes `Light - Shading Accent 1`. - """ - style_id = self._tbl.tblStyle_val - return self.part.get_style(style_id, WD_STYLE_TYPE.TABLE) - - @style.setter - def style(self, style_or_name): - style_id = self.part.get_style_id( - style_or_name, WD_STYLE_TYPE.TABLE - ) - self._tbl.tblStyle_val = style_id - - @property - def table(self): - """ - Provide child objects with reference to the |Table| object they - belong to, without them having to know their direct parent is - a |Table| object. This is the terminus of a series of `parent._table` - calls from an arbitrary child through its ancestors. - """ - return self - - @property - def table_direction(self): - """ - A member of :ref:`WdTableDirection` indicating the direction in which - the table cells are ordered, e.g. `WD_TABLE_DIRECTION.LTR`. |None| - indicates the value is inherited from the style hierarchy. - """ - return self._element.bidiVisual_val - - @table_direction.setter - def table_direction(self, value): - self._element.bidiVisual_val = value - - @property - def _cells(self): - """ - A sequence of |_Cell| objects, one for each cell of the layout grid. - If the table contains a span, one or more |_Cell| object references - are repeated. - """ - col_count = self._column_count - cells = [] - for tc in self._tbl.iter_tcs(): - for grid_span_idx in range(tc.grid_span): - if tc.vMerge == ST_Merge.CONTINUE: - cells.append(cells[-col_count]) - elif grid_span_idx > 0: - cells.append(cells[-1]) - else: - cells.append(_Cell(tc, self)) - return cells - - @property - def _column_count(self): - """ - The number of grid columns in this table. - """ - return self._tbl.col_count - - @property - def _tblPr(self): - return self._tbl.tblPr - - -class _Cell(BlockItemContainer): - """ - Table cell - """ - def __init__(self, tc, parent): - super(_Cell, self).__init__(tc, parent) - self._tc = tc - - def add_paragraph(self, text='', style=None): - """ - Return a paragraph newly added to the end of the content in this - cell. If present, *text* is added to the paragraph in a single run. - If specified, the paragraph style *style* is applied. If *style* is - not specified or is |None|, the result is as though the 'Normal' - style was applied. Note that the formatting of text in a cell can be - influenced by the table style. *text* can contain tab (``\\t``) - characters, which are converted to the appropriate XML form for - a tab. *text* can also include newline (``\\n``) or carriage return - (``\\r``) characters, each of which is converted to a line break. - """ - return super(_Cell, self).add_paragraph(text, style) - - def add_table(self, rows, cols): - """ - Return a table newly added to this cell after any existing cell - content, having *rows* rows and *cols* columns. An empty paragraph is - added after the table because Word requires a paragraph element as - the last element in every cell. - """ - width = self.width if self.width is not None else Inches(1) - table = super(_Cell, self).add_table(rows, cols, width) - self.add_paragraph() - return table - - def merge(self, other_cell): - """ - Return a merged cell created by spanning the rectangular region - having this cell and *other_cell* as diagonal corners. Raises - |InvalidSpanError| if the cells do not define a rectangular region. - """ - tc, tc_2 = self._tc, other_cell._tc - merged_tc = tc.merge(tc_2) - return _Cell(merged_tc, self._parent) - - @property - def paragraphs(self): - """ - List of paragraphs in the cell. A table cell is required to contain - at least one block-level element and end with a paragraph. By - default, a new cell contains a single paragraph. Read-only - """ - return super(_Cell, self).paragraphs - - @property - def tables(self): - """ - List of tables in the cell, in the order they appear. Read-only. - """ - return super(_Cell, self).tables - - @property - def text(self): - """ - The entire contents of this cell as a string of text. Assigning - a string to this property replaces all existing content with a single - paragraph containing the assigned text in a single run. - """ - return '\n'.join(p.text for p in self.paragraphs) - - @text.setter - def text(self, text): - """ - Write-only. Set entire contents of cell to the string *text*. Any - existing content or revisions are replaced. - """ - tc = self._tc - tc.clear_content() - p = tc.add_p() - r = p.add_r() - r.text = text - - @property - def width(self): - """ - The width of this cell in EMU, or |None| if no explicit width is set. - """ - return self._tc.width - - @width.setter - def width(self, value): - self._tc.width = value - - -class _Column(Parented): - """ - Table column - """ - def __init__(self, gridCol, parent): - super(_Column, self).__init__(parent) - self._gridCol = gridCol - - @property - def cells(self): - """ - Sequence of |_Cell| instances corresponding to cells in this column. - """ - return tuple(self.table.column_cells(self._index)) - - @property - def table(self): - """ - Reference to the |Table| object this column belongs to. - """ - return self._parent.table - - @property - def width(self): - """ - The width of this column in EMU, or |None| if no explicit width is - set. - """ - return self._gridCol.w - - @width.setter - def width(self, value): - self._gridCol.w = value - - @property - def _index(self): - """ - Index of this column in its table, starting from zero. - """ - return self._gridCol.gridCol_idx - - -class _Columns(Parented): - """ - Sequence of |_Column| instances corresponding to the columns in a table. - Supports ``len()``, iteration and indexed access. - """ - def __init__(self, tbl, parent): - super(_Columns, self).__init__(parent) - self._tbl = tbl - - def __getitem__(self, idx): - """ - Provide indexed access, e.g. 'columns[0]' - """ - try: - gridCol = self._gridCol_lst[idx] - except IndexError: - msg = "column index [%d] is out of range" % idx - raise IndexError(msg) - return _Column(gridCol, self) - - def __iter__(self): - for gridCol in self._gridCol_lst: - yield _Column(gridCol, self) - - def __len__(self): - return len(self._gridCol_lst) - - @property - def table(self): - """ - Reference to the |Table| object this column collection belongs to. - """ - return self._parent.table - - @property - def _gridCol_lst(self): - """ - Sequence containing ```` elements for this table, each - representing a table column. - """ - tblGrid = self._tbl.tblGrid - return tblGrid.gridCol_lst - - -class _Row(Parented): - """ - Table row - """ - def __init__(self, tr, parent): - super(_Row, self).__init__(parent) - self._tr = tr - - @property - def cells(self): - """ - Sequence of |_Cell| instances corresponding to cells in this row. - """ - return tuple(self.table.row_cells(self._index)) - - @property - def table(self): - """ - Reference to the |Table| object this row belongs to. - """ - return self._parent.table - - @property - def _index(self): - """ - Index of this row in its table, starting from zero. - """ - return self._tr.tr_idx - - -class _Rows(Parented): - """ - Sequence of |_Row| objects corresponding to the rows in a table. - Supports ``len()``, iteration, indexed access, and slicing. - """ - def __init__(self, tbl, parent): - super(_Rows, self).__init__(parent) - self._tbl = tbl - - def __getitem__(self, idx): - """ - Provide indexed access, (e.g. 'rows[0]') - """ - return list(self)[idx] - - def __iter__(self): - return (_Row(tr, self) for tr in self._tbl.tr_lst) - - def __len__(self): - return len(self._tbl.tr_lst) - - @property - def table(self): - """ - Reference to the |Table| object this row collection belongs to. - """ - return self._parent.table diff --git a/docx/templates/default-src.docx b/docx/templates/default-src.docx deleted file mode 100644 index 31c8e20b4..000000000 Binary files a/docx/templates/default-src.docx and /dev/null differ diff --git a/docx/text/font.py b/docx/text/font.py deleted file mode 100644 index 162832101..000000000 --- a/docx/text/font.py +++ /dev/null @@ -1,411 +0,0 @@ -# encoding: utf-8 - -""" -Font-related proxy objects. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..dml.color import ColorFormat -from ..shared import ElementProxy - - -class Font(ElementProxy): - """ - Proxy object wrapping the parent of a ```` element and providing - access to character properties such as font name, font size, bold, and - subscript. - """ - - __slots__ = () - - @property - def all_caps(self): - """ - Read/write. Causes text in this font to appear in capital letters. - """ - return self._get_bool_prop('caps') - - @all_caps.setter - def all_caps(self, value): - self._set_bool_prop('caps', value) - - @property - def bold(self): - """ - Read/write. Causes text in this font to appear in bold. - """ - return self._get_bool_prop('b') - - @bold.setter - def bold(self, value): - self._set_bool_prop('b', value) - - @property - def color(self): - """ - A |ColorFormat| object providing a way to get and set the text color - for this font. - """ - return ColorFormat(self._element) - - @property - def complex_script(self): - """ - Read/write tri-state value. When |True|, causes the characters in the - run to be treated as complex script regardless of their Unicode - values. - """ - return self._get_bool_prop('cs') - - @complex_script.setter - def complex_script(self, value): - self._set_bool_prop('cs', value) - - @property - def cs_bold(self): - """ - Read/write tri-state value. When |True|, causes the complex script - characters in the run to be displayed in bold typeface. - """ - return self._get_bool_prop('bCs') - - @cs_bold.setter - def cs_bold(self, value): - self._set_bool_prop('bCs', value) - - @property - def cs_italic(self): - """ - Read/write tri-state value. When |True|, causes the complex script - characters in the run to be displayed in italic typeface. - """ - return self._get_bool_prop('iCs') - - @cs_italic.setter - def cs_italic(self, value): - self._set_bool_prop('iCs', value) - - @property - def double_strike(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear with double strikethrough. - """ - return self._get_bool_prop('dstrike') - - @double_strike.setter - def double_strike(self, value): - self._set_bool_prop('dstrike', value) - - @property - def emboss(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear as if raised off the page in relief. - """ - return self._get_bool_prop('emboss') - - @emboss.setter - def emboss(self, value): - self._set_bool_prop('emboss', value) - - @property - def hidden(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to be hidden from display, unless applications settings force hidden - text to be shown. - """ - return self._get_bool_prop('vanish') - - @hidden.setter - def hidden(self, value): - self._set_bool_prop('vanish', value) - - @property - def highlight_color(self): - """ - A member of :ref:`WdColorIndex` indicating the color of highlighting - applied, or `None` if no highlighting is applied. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.highlight_val - - @highlight_color.setter - def highlight_color(self, value): - rPr = self._element.get_or_add_rPr() - rPr.highlight_val = value - - @property - def italic(self): - """ - Read/write tri-state value. When |True|, causes the text of the run - to appear in italics. |None| indicates the effective value is - inherited from the style hierarchy. - """ - return self._get_bool_prop('i') - - @italic.setter - def italic(self, value): - self._set_bool_prop('i', value) - - @property - def imprint(self): - """ - Read/write tri-state value. When |True|, causes the text in the run - to appear as if pressed into the page. - """ - return self._get_bool_prop('imprint') - - @imprint.setter - def imprint(self, value): - self._set_bool_prop('imprint', value) - - @property - def math(self): - """ - Read/write tri-state value. When |True|, specifies this run contains - WML that should be handled as though it was Office Open XML Math. - """ - return self._get_bool_prop('oMath') - - @math.setter - def math(self, value): - self._set_bool_prop('oMath', value) - - @property - def name(self): - """ - Get or set the typeface name for this |Font| instance, causing the - text it controls to appear in the named font, if a matching font is - found. |None| indicates the typeface is inherited from the style - hierarchy. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.rFonts_ascii - - @name.setter - def name(self, value): - rPr = self._element.get_or_add_rPr() - rPr.rFonts_ascii = value - rPr.rFonts_hAnsi = value - - @property - def no_proof(self): - """ - Read/write tri-state value. When |True|, specifies that the contents - of this run should not report any errors when the document is scanned - for spelling and grammar. - """ - return self._get_bool_prop('noProof') - - @no_proof.setter - def no_proof(self, value): - self._set_bool_prop('noProof', value) - - @property - def outline(self): - """ - Read/write tri-state value. When |True| causes the characters in the - run to appear as if they have an outline, by drawing a one pixel wide - border around the inside and outside borders of each character glyph. - """ - return self._get_bool_prop('outline') - - @outline.setter - def outline(self, value): - self._set_bool_prop('outline', value) - - @property - def rtl(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to have right-to-left characteristics. - """ - return self._get_bool_prop('rtl') - - @rtl.setter - def rtl(self, value): - self._set_bool_prop('rtl', value) - - @property - def shadow(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to appear as if each character has a shadow. - """ - return self._get_bool_prop('shadow') - - @shadow.setter - def shadow(self, value): - self._set_bool_prop('shadow', value) - - @property - def size(self): - """ - Read/write |Length| value or |None|, indicating the font height in - English Metric Units (EMU). |None| indicates the font size should be - inherited from the style hierarchy. |Length| is a subclass of |int| - having properties for convenient conversion into points or other - length units. The :class:`docx.shared.Pt` class allows convenient - specification of point values:: - - >> font.size = Pt(24) - >> font.size - 304800 - >> font.size.pt - 24.0 - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.sz_val - - @size.setter - def size(self, emu): - rPr = self._element.get_or_add_rPr() - rPr.sz_val = emu - - @property - def small_caps(self): - """ - Read/write tri-state value. When |True| causes the lowercase - characters in the run to appear as capital letters two points smaller - than the font size specified for the run. - """ - return self._get_bool_prop('smallCaps') - - @small_caps.setter - def small_caps(self, value): - self._set_bool_prop('smallCaps', value) - - @property - def snap_to_grid(self): - """ - Read/write tri-state value. When |True| causes the run to use the - document grid characters per line settings defined in the docGrid - element when laying out the characters in this run. - """ - return self._get_bool_prop('snapToGrid') - - @snap_to_grid.setter - def snap_to_grid(self, value): - self._set_bool_prop('snapToGrid', value) - - @property - def spec_vanish(self): - """ - Read/write tri-state value. When |True|, specifies that the given run - shall always behave as if it is hidden, even when hidden text is - being displayed in the current document. The property has a very - narrow, specialized use related to the table of contents. Consult the - spec (§17.3.2.36) for more details. - """ - return self._get_bool_prop('specVanish') - - @spec_vanish.setter - def spec_vanish(self, value): - self._set_bool_prop('specVanish', value) - - @property - def strike(self): - """ - Read/write tri-state value. When |True| causes the text in the run - to appear with a single horizontal line through the center of the - line. - """ - return self._get_bool_prop('strike') - - @strike.setter - def strike(self, value): - self._set_bool_prop('strike', value) - - @property - def subscript(self): - """ - Boolean indicating whether the characters in this |Font| appear as - subscript. |None| indicates the subscript/subscript value is - inherited from the style hierarchy. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.subscript - - @subscript.setter - def subscript(self, value): - rPr = self._element.get_or_add_rPr() - rPr.subscript = value - - @property - def superscript(self): - """ - Boolean indicating whether the characters in this |Font| appear as - superscript. |None| indicates the subscript/superscript value is - inherited from the style hierarchy. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.superscript - - @superscript.setter - def superscript(self, value): - rPr = self._element.get_or_add_rPr() - rPr.superscript = value - - @property - def underline(self): - """ - The underline style for this |Font|, one of |None|, |True|, |False|, - or a value from :ref:`WdUnderline`. |None| indicates the font - inherits its underline value from the style hierarchy. |False| - indicates no underline. |True| indicates single underline. The values - from :ref:`WdUnderline` are used to specify other outline styles such - as double, wavy, and dotted. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr.u_val - - @underline.setter - def underline(self, value): - rPr = self._element.get_or_add_rPr() - rPr.u_val = value - - @property - def web_hidden(self): - """ - Read/write tri-state value. When |True|, specifies that the contents - of this run shall be hidden when the document is displayed in web - page view. - """ - return self._get_bool_prop('webHidden') - - @web_hidden.setter - def web_hidden(self, value): - self._set_bool_prop('webHidden', value) - - def _get_bool_prop(self, name): - """ - Return the value of boolean child of `w:rPr` having *name*. - """ - rPr = self._element.rPr - if rPr is None: - return None - return rPr._get_bool_val(name) - - def _set_bool_prop(self, name, value): - """ - Assign *value* to the boolean child *name* of `w:rPr`. - """ - rPr = self._element.get_or_add_rPr() - rPr._set_bool_val(name, value) diff --git a/docx/text/paragraph.py b/docx/text/paragraph.py deleted file mode 100644 index 4fb583b94..000000000 --- a/docx/text/paragraph.py +++ /dev/null @@ -1,145 +0,0 @@ -# encoding: utf-8 - -""" -Paragraph-related proxy types. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..enum.style import WD_STYLE_TYPE -from .parfmt import ParagraphFormat -from .run import Run -from ..shared import Parented - - -class Paragraph(Parented): - """ - Proxy object wrapping ```` element. - """ - def __init__(self, p, parent): - super(Paragraph, self).__init__(parent) - self._p = self._element = p - - def add_run(self, text=None, style=None): - """ - Append a run to this paragraph containing *text* and having character - style identified by style ID *style*. *text* can contain tab - (``\\t``) characters, which are converted to the appropriate XML form - for a tab. *text* can also include newline (``\\n``) or carriage - return (``\\r``) characters, each of which is converted to a line - break. - """ - r = self._p.add_r() - run = Run(r, self) - if text: - run.text = text - if style: - run.style = style - return run - - @property - def alignment(self): - """ - A member of the :ref:`WdParagraphAlignment` enumeration specifying - the justification setting for this paragraph. A value of |None| - indicates the paragraph has no directly-applied alignment value and - will inherit its alignment value from its style hierarchy. Assigning - |None| to this property removes any directly-applied alignment value. - """ - return self._p.alignment - - @alignment.setter - def alignment(self, value): - self._p.alignment = value - - def clear(self): - """ - Return this same paragraph after removing all its content. - Paragraph-level formatting, such as style, is preserved. - """ - self._p.clear_content() - return self - - def insert_paragraph_before(self, text=None, style=None): - """ - Return a newly created paragraph, inserted directly before this - paragraph. If *text* is supplied, the new paragraph contains that - text in a single run. If *style* is provided, that style is assigned - to the new paragraph. - """ - paragraph = self._insert_paragraph_before() - if text: - paragraph.add_run(text) - if style is not None: - paragraph.style = style - return paragraph - - @property - def paragraph_format(self): - """ - The |ParagraphFormat| object providing access to the formatting - properties for this paragraph, such as line spacing and indentation. - """ - return ParagraphFormat(self._element) - - @property - def runs(self): - """ - Sequence of |Run| instances corresponding to the elements in - this paragraph. - """ - return [Run(r, self) for r in self._p.r_lst] - - @property - def style(self): - """ - Read/Write. |_ParagraphStyle| object representing the style assigned - to this paragraph. If no explicit style is assigned to this - paragraph, its value is the default paragraph style for the document. - A paragraph style name can be assigned in lieu of a paragraph style - object. Assigning |None| removes any applied style, making its - effective value the default paragraph style for the document. - """ - style_id = self._p.style - return self.part.get_style(style_id, WD_STYLE_TYPE.PARAGRAPH) - - @style.setter - def style(self, style_or_name): - style_id = self.part.get_style_id( - style_or_name, WD_STYLE_TYPE.PARAGRAPH - ) - self._p.style = style_id - - @property - def text(self): - """ - String formed by concatenating the text of each run in the paragraph. - Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` - characters respectively. - - Assigning text to this property causes all existing paragraph content - to be replaced with a single run containing the assigned text. - A ``\\t`` character in the text is mapped to a ```` element - and each ``\\n`` or ``\\r`` character is mapped to a line break. - Paragraph-level formatting, such as style, is preserved. All - run-level formatting, such as bold or italic, is removed. - """ - text = '' - for run in self.runs: - text += run.text - return text - - @text.setter - def text(self, text): - self.clear() - self.add_run(text) - - def _insert_paragraph_before(self): - """ - Return a newly created paragraph, inserted directly before this - paragraph. - """ - p = self._p.add_p_before() - return Paragraph(p, self._parent) diff --git a/docx/text/run.py b/docx/text/run.py deleted file mode 100644 index 97d6da7db..000000000 --- a/docx/text/run.py +++ /dev/null @@ -1,191 +0,0 @@ -# encoding: utf-8 - -""" -Run-related proxy objects for python-docx, Run in particular. -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from ..enum.style import WD_STYLE_TYPE -from ..enum.text import WD_BREAK -from .font import Font -from ..shape import InlineShape -from ..shared import Parented - - -class Run(Parented): - """ - Proxy object wrapping ```` element. Several of the properties on Run - take a tri-state value, |True|, |False|, or |None|. |True| and |False| - correspond to on and off respectively. |None| indicates the property is - not specified directly on the run and its effective value is taken from - the style hierarchy. - """ - def __init__(self, r, parent): - super(Run, self).__init__(parent) - self._r = self._element = self.element = r - - def add_break(self, break_type=WD_BREAK.LINE): - """ - Add a break element of *break_type* to this run. *break_type* can - take the values `WD_BREAK.LINE`, `WD_BREAK.PAGE`, and - `WD_BREAK.COLUMN` where `WD_BREAK` is imported from `docx.enum.text`. - *break_type* defaults to `WD_BREAK.LINE`. - """ - type_, clear = { - WD_BREAK.LINE: (None, None), - WD_BREAK.PAGE: ('page', None), - WD_BREAK.COLUMN: ('column', None), - WD_BREAK.LINE_CLEAR_LEFT: ('textWrapping', 'left'), - WD_BREAK.LINE_CLEAR_RIGHT: ('textWrapping', 'right'), - WD_BREAK.LINE_CLEAR_ALL: ('textWrapping', 'all'), - }[break_type] - br = self._r.add_br() - if type_ is not None: - br.type = type_ - if clear is not None: - br.clear = clear - - def add_picture(self, image_path_or_stream, width=None, height=None): - """ - Return an |InlineShape| instance containing the image identified by - *image_path_or_stream*, added to the end of this run. - *image_path_or_stream* can be a path (a string) or a file-like object - containing a binary image. If neither width nor height is specified, - the picture appears at its native size. If only one is specified, it - is used to compute a scaling factor that is then applied to the - unspecified dimension, preserving the aspect ratio of the image. The - native size of the picture is calculated using the dots-per-inch - (dpi) value specified in the image file, defaulting to 72 dpi if no - value is specified, as is often the case. - """ - inline = self.part.new_pic_inline(image_path_or_stream, width, height) - self._r.add_drawing(inline) - return InlineShape(inline) - - def add_tab(self): - """ - Add a ```` element at the end of the run, which Word - interprets as a tab character. - """ - self._r._add_tab() - - def add_text(self, text): - """ - Returns a newly appended |_Text| object (corresponding to a new - ```` child element) to the run, containing *text*. Compare with - the possibly more friendly approach of assigning text to the - :attr:`Run.text` property. - """ - t = self._r.add_t(text) - return _Text(t) - - @property - def bold(self): - """ - Read/write. Causes the text of the run to appear in bold. - """ - return self.font.bold - - @bold.setter - def bold(self, value): - self.font.bold = value - - def clear(self): - """ - Return reference to this run after removing all its content. All run - formatting is preserved. - """ - self._r.clear_content() - return self - - @property - def font(self): - """ - The |Font| object providing access to the character formatting - properties for this run, such as font name and size. - """ - return Font(self._element) - - @property - def italic(self): - """ - Read/write tri-state value. When |True|, causes the text of the run - to appear in italics. - """ - return self.font.italic - - @italic.setter - def italic(self, value): - self.font.italic = value - - @property - def style(self): - """ - Read/write. A |_CharacterStyle| object representing the character - style applied to this run. The default character style for the - document (often `Default Character Font`) is returned if the run has - no directly-applied character style. Setting this property to |None| - removes any directly-applied character style. - """ - style_id = self._r.style - return self.part.get_style(style_id, WD_STYLE_TYPE.CHARACTER) - - @style.setter - def style(self, style_or_name): - style_id = self.part.get_style_id( - style_or_name, WD_STYLE_TYPE.CHARACTER - ) - self._r.style = style_id - - @property - def text(self): - """ - String formed by concatenating the text equivalent of each run - content child element into a Python string. Each ```` element - adds the text characters it contains. A ```` element adds - a ``\\t`` character. A ```` or ```` element each add - a ``\\n`` character. Note that a ```` element can indicate - a page break or column break as well as a line break. All ```` - elements translate to a single ``\\n`` character regardless of their - type. All other content child elements, such as ````, are - ignored. - - Assigning text to this property has the reverse effect, translating - each ``\\t`` character to a ```` element and each ``\\n`` or - ``\\r`` character to a ```` element. Any existing run content - is replaced. Run formatting is preserved. - """ - return self._r.text - - @text.setter - def text(self, text): - self._r.text = text - - @property - def underline(self): - """ - The underline style for this |Run|, one of |None|, |True|, |False|, - or a value from :ref:`WdUnderline`. A value of |None| indicates the - run has no directly-applied underline value and so will inherit the - underline value of its containing paragraph. Assigning |None| to this - property removes any directly-applied underline value. A value of - |False| indicates a directly-applied setting of no underline, - overriding any inherited value. A value of |True| indicates single - underline. The values from :ref:`WdUnderline` are used to specify - other outline styles such as double, wavy, and dotted. - """ - return self.font.underline - - @underline.setter - def underline(self, value): - self.font.underline = value - - -class _Text(object): - """ - Proxy object wrapping ```` element. - """ - def __init__(self, t_elm): - super(_Text, self).__init__() - self._t = t_elm diff --git a/docx/text/tabstops.py b/docx/text/tabstops.py deleted file mode 100644 index c22b9bc91..000000000 --- a/docx/text/tabstops.py +++ /dev/null @@ -1,143 +0,0 @@ -# encoding: utf-8 - -""" -Tabstop-related proxy types. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from ..shared import ElementProxy -from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER - - -class TabStops(ElementProxy): - """ - A sequence of |TabStop| objects providing access to the tab stops of - a paragraph or paragraph style. Supports iteration, indexed access, del, - and len(). It is accesed using the :attr:`~.ParagraphFormat.tab_stops` - property of ParagraphFormat; it is not intended to be constructed - directly. - """ - - __slots__ = ('_pPr') - - def __init__(self, element): - super(TabStops, self).__init__(element, None) - self._pPr = element - - def __delitem__(self, idx): - """ - Remove the tab at offset *idx* in this sequence. - """ - tabs = self._pPr.tabs - try: - tabs.remove(tabs[idx]) - except (AttributeError, IndexError): - raise IndexError('tab index out of range') - - if len(tabs) == 0: - self._pPr.remove(tabs) - - def __getitem__(self, idx): - """ - Enables list-style access by index. - """ - tabs = self._pPr.tabs - if tabs is None: - raise IndexError('TabStops object is empty') - tab = tabs.tab_lst[idx] - return TabStop(tab) - - def __iter__(self): - """ - Generate a TabStop object for each of the w:tab elements, in XML - document order. - """ - tabs = self._pPr.tabs - if tabs is not None: - for tab in tabs.tab_lst: - yield TabStop(tab) - - def __len__(self): - tabs = self._pPr.tabs - if tabs is None: - return 0 - return len(tabs.tab_lst) - - def add_tab_stop(self, position, alignment=WD_TAB_ALIGNMENT.LEFT, - leader=WD_TAB_LEADER.SPACES): - """ - Add a new tab stop at *position*, a |Length| object specifying the - location of the tab stop relative to the paragraph edge. A negative - *position* value is valid and appears in hanging indentation. Tab - alignment defaults to left, but may be specified by passing a member - of the :ref:`WdTabAlignment` enumeration as *alignment*. An optional - leader character can be specified by passing a member of the - :ref:`WdTabLeader` enumeration as *leader*. - """ - tabs = self._pPr.get_or_add_tabs() - tab = tabs.insert_tab_in_order(position, alignment, leader) - return TabStop(tab) - - def clear_all(self): - """ - Remove all custom tab stops. - """ - self._pPr._remove_tabs() - - -class TabStop(ElementProxy): - """ - An individual tab stop applying to a paragraph or style. Accessed using - list semantics on its containing |TabStops| object. - """ - - __slots__ = ('_tab') - - def __init__(self, element): - super(TabStop, self).__init__(element, None) - self._tab = element - - @property - def alignment(self): - """ - A member of :ref:`WdTabAlignment` specifying the alignment setting - for this tab stop. Read/write. - """ - return self._tab.val - - @alignment.setter - def alignment(self, value): - self._tab.val = value - - @property - def leader(self): - """ - A member of :ref:`WdTabLeader` specifying a repeating character used - as a "leader", filling in the space spanned by this tab. Assigning - |None| produces the same result as assigning `WD_TAB_LEADER.SPACES`. - Read/write. - """ - return self._tab.leader - - @leader.setter - def leader(self, value): - self._tab.leader = value - - @property - def position(self): - """ - A |Length| object representing the distance of this tab stop from the - inside edge of the paragraph. May be positive or negative. - Read/write. - """ - return self._tab.pos - - @position.setter - def position(self, value): - tab = self._tab - tabs = tab.getparent() - self._tab = tabs.insert_tab_in_order(value, tab.val, tab.leader) - tabs.remove(tab) diff --git a/features/blk-iter-inner-content.feature b/features/blk-iter-inner-content.feature new file mode 100644 index 000000000..047efb9ee --- /dev/null +++ b/features/blk-iter-inner-content.feature @@ -0,0 +1,24 @@ +Feature: Iterate paragraphs and tables in document-order + In order to access paragraphs and tables in the same order they appear in the document + As a developer using python-docx + I need the ability to iterate the inner-content of a block-item-container + + + Scenario: Document.iter_inner_content() + Given a Document object with paragraphs and tables + Then document.iter_inner_content() produces the block-items in document order + + + Scenario: Header.iter_inner_content() + Given a Header object with paragraphs and tables + Then header.iter_inner_content() produces the block-items in document order + + + Scenario: Footer.iter_inner_content() + Given a Footer object with paragraphs and tables + Then footer.iter_inner_content() produces the block-items in document order + + + Scenario: _Cell.iter_inner_content() + Given a _Cell object with paragraphs and tables + Then cell.iter_inner_content() produces the block-items in document order diff --git a/features/cmt-mutations.feature b/features/cmt-mutations.feature new file mode 100644 index 000000000..1ef9ad2db --- /dev/null +++ b/features/cmt-mutations.feature @@ -0,0 +1,59 @@ +Feature: Comment mutations + In order to add and modify the content of a comment + As a developer using python-docx + I need mutation methods on Comment objects + + + Scenario: Comments.add_comment() + Given a Comments object with 0 comments + When I assign comment = comments.add_comment() + Then comment.comment_id == 0 + And len(comment.paragraphs) == 1 + And comment.paragraphs[0].style.name == "CommentText" + And len(comments) == 1 + And comments.get(0) == comment + + + Scenario: Comments.add_comment() specifying author and initials + Given a Comments object with 0 comments + When I assign comment = comments.add_comment(author="John Doe", initials="JD") + Then comment.author == "John Doe" + And comment.initials == "JD" + + + Scenario: Comment.add_paragraph() specifying text and style + Given a default Comment object + When I assign paragraph = comment.add_paragraph(text, style) + Then len(comment.paragraphs) == 2 + And paragraph.text == text + And paragraph.style == style + And comment.paragraphs[-1] == paragraph + + + Scenario: Comment.add_paragraph() not specifying text or style + Given a default Comment object + When I assign paragraph = comment.add_paragraph() + Then len(comment.paragraphs) == 2 + And paragraph.text == "" + And paragraph.style == "CommentText" + And comment.paragraphs[-1] == paragraph + + + Scenario: Add image to comment + Given a default Comment object + When I assign paragraph = comment.add_paragraph() + And I assign run = paragraph.add_run() + And I call run.add_picture() + Then run.iter_inner_content() yields a single Picture drawing + + + Scenario: update Comment.author + Given a Comment object + When I assign "Jane Smith" to comment.author + Then comment.author == "Jane Smith" + + + Scenario: update Comment.initials + Given a Comment object + When I assign "JS" to comment.initials + Then comment.initials == "JS" diff --git a/features/cmt-props.feature b/features/cmt-props.feature new file mode 100644 index 000000000..e4e620828 --- /dev/null +++ b/features/cmt-props.feature @@ -0,0 +1,35 @@ +Feature: Get comment properties + In order to characterize comments by their metadata + As a developer using python-docx + I need methods to access comment metadata properties + + + Scenario: Comment.id + Given a Comment object + Then comment.comment_id is the comment identifier + + + Scenario: Comment.author + Given a Comment object + Then comment.author is the author of the comment + + + Scenario: Comment.initials + Given a Comment object + Then comment.initials is the initials of the comment author + + + Scenario: Comment.timestamp + Given a Comment object + Then comment.timestamp is the date and time the comment was authored + + + Scenario: Comment.paragraphs[0].text + Given a Comment object + When I assign para_text = comment.paragraphs[0].text + Then para_text is the text of the first paragraph in the comment + + + Scenario: Retrieve embedded image from a comment + Given a Comment object containing an embedded image + Then I can extract the image from the comment diff --git a/features/doc-add-comment.feature b/features/doc-add-comment.feature new file mode 100644 index 000000000..36f46244a --- /dev/null +++ b/features/doc-add-comment.feature @@ -0,0 +1,13 @@ +Feature: Add a comment to a document + In order add a comment to a document + As a developer using python-docx + I need a way to add a comment specifying both its content and its reference + + + Scenario: Document.add_comment(runs, text, author, initials) + Given a document having a comments part + When I assign comment = document.add_comment(runs, "A comment", "John Doe", "JD") + Then comment is a Comment object + And comment.text == "A comment" + And comment.author == "John Doe" + And comment.initials == "JD" diff --git a/features/doc-add-section.feature b/features/doc-add-section.feature index 9b1c6e30c..4f9a8e860 100644 --- a/features/doc-add-section.feature +++ b/features/doc-add-section.feature @@ -11,3 +11,14 @@ Feature: Add a document section Then the document has two sections And the first section is portrait And the second section is landscape + + + Scenario: Document.add_section() adds a section that inherits headers and footers + Given a single-section Document object with headers and footers as document + When I execute section = document.add_section() + Then section.header.is_linked_to_previous is True + And section.even_page_header.is_linked_to_previous is True + And section.first_page_header.is_linked_to_previous is True + And section.footer.is_linked_to_previous is True + And section.even_page_footer.is_linked_to_previous is True + And section.first_page_footer.is_linked_to_previous is True diff --git a/features/doc-comments.feature b/features/doc-comments.feature new file mode 100644 index 000000000..944146e5e --- /dev/null +++ b/features/doc-comments.feature @@ -0,0 +1,36 @@ +Feature: Document.comments + In order to operate on comments added to a document + As a developer using python-docx + I need access to the comments collection for the document + And I need methods allowing access to the comments in the collection + + + Scenario Outline: Access document comments + Given a document having comments part + Then document.comments is a Comments object + + Examples: having a comments part or not + | a-or-no | + | a | + | no | + + + Scenario Outline: Comments.__len__() + Given a Comments object with comments + Then len(comments) == + + Examples: len(comments) values + | count | + | 0 | + | 4 | + + + Scenario: Comments.__iter__() + Given a Comments object with 4 comments + Then iterating comments yields 4 Comment objects + + + Scenario: Comments.get() + Given a Comments object with 4 comments + When I call comments.get(2) + Then the result is a Comment object with id 2 diff --git a/features/doc-settings.feature b/features/doc-settings.feature index 612dfcc19..89d812f83 100644 --- a/features/doc-settings.feature +++ b/features/doc-settings.feature @@ -1,7 +1,8 @@ -Feature: Access to document settings +Feature: Document.settings In order to operate on document-level settings As a developer using python-docx - I access to settings stored in the settings part + I need access to settings to the Settings object for the document + And I need properties and methods on Settings Scenario Outline: Access document settings @@ -12,3 +13,26 @@ Feature: Access to document settings | a-or-no | | a | | no | + + + Scenario Outline: Settings.odd_and_even_pages_header_footer getter + Given a Settings object odd and even page headers as settings + Then settings.odd_and_even_pages_header_footer is + + Examples: Settings.odd_and_even_pages_header_footer states + | with-or-without | value | + | with | True | + | without | False | + + + Scenario Outline: Settings.odd_and_even_pages_header_footer setter + Given a Settings object odd and even page headers as settings + When I assign to settings.odd_and_even_pages_header_footer + Then settings.odd_and_even_pages_header_footer is + + Examples: Settings.odd_and_even_pages_header_footer assignment cases + | with-or-without | value | + | with | True | + | with | False | + | without | True | + | without | False | diff --git a/features/environment.py b/features/environment.py index e144106cf..dfd2028a3 100644 --- a/features/environment.py +++ b/features/environment.py @@ -1,15 +1,8 @@ -# encoding: utf-8 - -""" -Used by behave to set testing environment before and after running acceptance -tests. -""" +"""Set testing environment before and after behave acceptance test runs.""" import os -scratch_dir = os.path.abspath( - os.path.join(os.path.split(__file__)[0], '_scratch') -) +scratch_dir = os.path.abspath(os.path.join(os.path.split(__file__)[0], "_scratch")) def before_all(context): diff --git a/features/hdr-header-footer.feature b/features/hdr-header-footer.feature new file mode 100644 index 000000000..eb2bb00d6 --- /dev/null +++ b/features/hdr-header-footer.feature @@ -0,0 +1,88 @@ +Feature: Header and footer behaviors + In order to control the appearance of page headers and footers + As a developer using python-docx + I need properties and methods on _Header and _Footer objects + + + Scenario Outline: _Header.is_linked_to_previous getter + Given a _Header object header definition as header + Then header.is_linked_to_previous is + + Examples: _Header.is_linked_to_previous states + | with-or-no | value | + | with a | False | + | with no | True | + + + Scenario Outline: _Header.is_linked_to_previous setter + Given a _Header object header definition as header + When I assign to header.is_linked_to_previous + Then header.is_linked_to_previous is + + Examples: _Header.is_linked_to_previous state changes + | with-or-no | value | + | with a | True | + | with no | False | + | with a | False | + | with no | True | + + + Scenario: _Header inherits content + Given a _Header object with a header definition as header + And the next _Header object with no header definition as header_2 + Then header_2.paragraphs[0].text == header.paragraphs[0].text + And header_2.is_linked_to_previous is True + + + Scenario: _Header text accepts style assignment + Given a _Header object with a header definition as header + When I assign "Normal" to header.paragraphs[0].style + Then header.paragraphs[0].style.name == "Normal" + + + Scenario: _Header allows image insertion + Given a _Run object from a header as run + When I call run.add_picture() + Then I can't detect the image but no exception is raised + + + Scenario Outline: _Footer.is_linked_to_previous getter + Given a _Footer object footer definition as footer + Then footer.is_linked_to_previous is + + Examples: _Footer.is_linked_to_previous states + | with-or-no | value | + | with a | False | + | with no | True | + + + Scenario Outline: _Footer.is_linked_to_previous setter + Given a _Footer object footer definition as footer + When I assign to footer.is_linked_to_previous + Then footer.is_linked_to_previous is + + Examples: _Footer.is_linked_to_previous state changes + | with-or-no | value | + | with a | True | + | with no | False | + | with a | False | + | with no | True | + + + Scenario: _Footer inherits content + Given a _Footer object with a footer definition as footer + And the next _Footer object with no footer definition as footer_2 + Then footer_2.paragraphs[0].text == footer.paragraphs[0].text + And footer_2.is_linked_to_previous is True + + + Scenario: _Footer text accepts style assignment + Given a _Footer object with a footer definition as footer + When I assign "Normal" to footer.paragraphs[0].style + Then footer.paragraphs[0].style.name == "Normal" + + + Scenario: _Footer allows image insertion + Given a _Run object from a footer as run + When I call run.add_picture() + Then I can't detect the image but no exception is raised diff --git a/features/hlk-props.feature b/features/hlk-props.feature new file mode 100644 index 000000000..a315318e0 --- /dev/null +++ b/features/hlk-props.feature @@ -0,0 +1,54 @@ +Feature: Access hyperlink properties + In order to access the URL and other details for a hyperlink + As a developer using python-docx + I need properties on Hyperlink + + + Scenario: Hyperlink.address has the URL of the hyperlink + Given a hyperlink + Then hyperlink.address is the URL of the hyperlink + + + Scenario Outline: Hyperlink.contains_page_break reports presence of page-break + Given a hyperlink having rendered page breaks + Then hyperlink.contains_page_break is + + Examples: Hyperlink.contains_page_break cases + | zero-or-more | value | + | no | False | + | one | True | + + + Scenario: Hyperlink.fragment has the URI fragment of the hyperlink + Given a hyperlink having a URI fragment + Then hyperlink.fragment is the URI fragment of the hyperlink + + + Scenario Outline: Hyperlink.runs contains Run for each run in hyperlink + Given a hyperlink having runs + Then hyperlink.runs has length + And hyperlink.runs contains only Run instances + + Examples: Hyperlink.runs cases + | zero-or-more | value | + | one | 1 | + | two | 2 | + + + Scenario: Hyperlink.text has the visible text of the hyperlink + Given a hyperlink + Then hyperlink.text is the visible text of the hyperlink + + + Scenario Outline: Hyperlink.url is the full URL of an internet hyperlink + Given a hyperlink having address
and fragment + Then hyperlink.url is + + Examples: Hyperlink.url cases + | address | fragment | url | + | '' | linkedBookmark | '' | + | https://foo.com | '' | https://foo.com | + | https://foo.com?q=bar | '' | https://foo.com?q=bar | + | http://foo.com/ | intro | http://foo.com/#intro | + | https://foo.com?q=bar#baz | '' | https://foo.com?q=bar#baz | + | court-exif.jpg | '' | court-exif.jpg | diff --git a/features/par-access-inner-content.feature b/features/par-access-inner-content.feature new file mode 100644 index 000000000..047168fcf --- /dev/null +++ b/features/par-access-inner-content.feature @@ -0,0 +1,49 @@ +Feature: Access paragraph inner-content including hyperlinks + In order to extract paragraph content with high-fidelity + As a developer using python-docx + I need to access differentiated paragraph content in document order + + + Scenario Outline: Paragraph.contains_page_break reports presence of page-break + Given a paragraph having rendered page breaks + Then paragraph.contains_page_break is + + Examples: Paragraph.contains_page_break cases + | zero-or-more | value | + | no | False | + | one | True | + | two | True | + + + Scenario Outline: Paragraph.hyperlinks contains Hyperlink for each link in paragraph + Given a paragraph having hyperlinks + Then paragraph.hyperlinks has length + And paragraph.hyperlinks contains only Hyperlink instances + + Examples: Paragraph.hyperlinks cases + | zero-or-more | value | + | no | 0 | + | one | 1 | + | three | 3 | + + + Scenario: Paragraph.iter_inner_content() generates the paragraph's runs and hyperlinks + Given a paragraph having three hyperlinks + Then paragraph.iter_inner_content() generates the paragraph runs and hyperlinks + + + Scenario Outline: Paragraph.rendered_page_breaks contains paragraph RenderedPageBreaks + Given a paragraph having rendered page breaks + Then paragraph.rendered_page_breaks has length + And paragraph.rendered_page_breaks contains only RenderedPageBreak instances + + Examples: Paragraph.rendered_page_breaks cases + | zero-or-more | value | + | no | 0 | + | one | 1 | + | two | 2 | + + + Scenario: Paragraph.text contains both run-text and hyperlink-text + Given a paragraph having three hyperlinks + Then paragraph.text contains the text of both the runs and the hyperlinks diff --git a/features/pbk-split-para.feature b/features/pbk-split-para.feature new file mode 100644 index 000000000..8ce048a40 --- /dev/null +++ b/features/pbk-split-para.feature @@ -0,0 +1,24 @@ +Feature: Split paragraph on rendered page-breaks + In order to extract document content with high page-attribution fidelity + As a developer using python-docx + I need to a way to split a paragraph on its first rendered page break + + + Scenario: RenderedPageBreak.preceding_paragraph_fragment is the content before break + Given a rendered_page_break in a paragraph + Then rendered_page_break.preceding_paragraph_fragment is the content before break + + + Scenario: RenderedPageBreak.preceding_paragraph_fragment includes the hyperlink + Given a rendered_page_break in a hyperlink + Then rendered_page_break.preceding_paragraph_fragment includes the hyperlink + + + Scenario: RenderedPageBreak.following_paragraph_fragment is the content after break + Given a rendered_page_break in a paragraph + Then rendered_page_break.following_paragraph_fragment is the content after break + + + Scenario: RenderedPageBreak.following_paragraph_fragment excludes the hyperlink + Given a rendered_page_break in a hyperlink + Then rendered_page_break.following_paragraph_fragment excludes the hyperlink diff --git a/features/run-access-content.feature b/features/run-access-content.feature deleted file mode 100644 index ad30f6feb..000000000 --- a/features/run-access-content.feature +++ /dev/null @@ -1,9 +0,0 @@ -Feature: Access run content - In order to discover or locate existing inline content - As a developer using python-docx - I need ways to access the run content - - - Scenario: Get run content as Python text - Given a run having mixed text content - Then the text of the run represents the textual run content diff --git a/features/run-access-inner-content.feature b/features/run-access-inner-content.feature new file mode 100644 index 000000000..a9bbb170c --- /dev/null +++ b/features/run-access-inner-content.feature @@ -0,0 +1,25 @@ +Feature: Access run inner-content including rendered page-breaks + In order to extract run content with high-fidelity + As a developer using python-docx + I need to access differentiated run content in document order + + + Scenario Outline: Run.contains_page_break reports presence of page-break + Given a run having rendered page breaks + Then run.contains_page_break is + + Examples: Run.contains_page_break cases + | zero-or-more | value | + | no | False | + | one | True | + | two | True | + + + Scenario: Run.iter_inner_content() generates the run's text and rendered page-breaks + Given a run having two rendered page breaks + Then run.iter_inner_content() generates the run text and rendered page-breaks + + + Scenario: Run.text contains the text content of the run + Given a run having mixed text content + Then run.text contains the text content of the run diff --git a/features/run-add-content.feature b/features/run-add-content.feature index d4257925c..078dccd33 100644 --- a/features/run-add-content.feature +++ b/features/run-add-content.feature @@ -11,4 +11,4 @@ Feature: Add content to a run Scenario: Assign mixed text to text property Given a run When I assign mixed text to the text property - Then the text of the run represents the textual run content + Then run.text contains the text content of the run diff --git a/features/sct-section-props.feature b/features/sct-section.feature similarity index 64% rename from features/sct-section-props.feature rename to features/sct-section.feature index 412d93f02..7017d77aa 100644 --- a/features/sct-section-props.feature +++ b/features/sct-section.feature @@ -4,6 +4,64 @@ Feature: Access and change section properties I need a way to get and set the properties of a section + Scenario Outline: Section.different_first_page_header_footer getter + Given a Section object a distinct first-page header as section + Then section.different_first_page_header_footer is + + Examples: Section.different_first_page_header_footer states + | with-or-without | value | + | with | True | + | without | False | + + + Scenario Outline: Section.different_first_page_header_footer setter + Given a Section object a distinct first-page header as section + When I assign to section.different_first_page_header_footer + Then section.different_first_page_header_footer is + + Examples: Section.different_first_page_header_footer assignment cases + | with-or-without | value | + | with | True | + | with | False | + | without | True | + | without | False | + + + Scenario: Section.even_page_footer + Given a Section object as section + Then section.even_page_footer is a _Footer object + + + Scenario: Section.even_page_header + Given a Section object as section + Then section.even_page_header is a _Header object + + + Scenario: Section.first_page_footer + Given a Section object as section + Then section.first_page_footer is a _Footer object + + + Scenario: Section.first_page_header + Given a Section object as section + Then section.first_page_header is a _Header object + + + Scenario: Section.footer + Given a Section object as section + Then section.footer is a _Footer object + + + Scenario: Section.header + Given a Section object as section + Then section.header is a _Header object + + + Scenario: Section.iter_inner_content() + Given a Section object of a multi-section document as section + Then section.iter_inner_content() produces the paragraphs and tables in section + + Scenario Outline: Get section start type Given a section having start type Then the reported section start type is diff --git a/features/steps/api.py b/features/steps/api.py index a3325567b..16038ffe7 100644 --- a/features/steps/api.py +++ b/features/steps/api.py @@ -1,46 +1,43 @@ -# encoding: utf-8 - -""" -Step implementations for basic API features -""" +"""Step implementations for basic API features.""" from behave import given, then, when import docx - from docx import Document from helpers import test_docx - # given ==================================================== -@given('I have python-docx installed') + +@given("I have python-docx installed") def given_I_have_python_docx_installed(context): pass # when ===================================================== -@when('I call docx.Document() with no arguments') + +@when("I call docx.Document() with no arguments") def when_I_call_docx_Document_with_no_arguments(context): context.document = Document() -@when('I call docx.Document() with the path of a .docx file') +@when("I call docx.Document() with the path of a .docx file") def when_I_call_docx_Document_with_the_path_of_a_docx_file(context): - context.document = Document(test_docx('doc-default')) + context.document = Document(test_docx("doc-default")) # then ===================================================== -@then('document is a Document object') + +@then("document is a Document object") def then_document_is_a_Document_object(context): document = context.document assert isinstance(document, docx.document.Document) -@then('the last paragraph contains the text I specified') +@then("the last paragraph contains the text I specified") def then_last_p_contains_specified_text(context): document = context.document text = context.paragraph_text @@ -48,15 +45,15 @@ def then_last_p_contains_specified_text(context): assert p.text == text -@then('the last paragraph has the style I specified') +@then("the last paragraph has the style I specified") def then_the_last_paragraph_has_the_style_I_specified(context): document, expected_style = context.document, context.style paragraph = document.paragraphs[-1] assert paragraph.style == expected_style -@then('the last paragraph is the empty paragraph I added') +@then("the last paragraph is the empty paragraph I added") def then_last_p_is_empty_paragraph_added(context): document = context.document p = document.paragraphs[-1] - assert p.text == '' + assert p.text == "" diff --git a/features/steps/block.py b/features/steps/block.py index 1eee70cd2..e3d5c6154 100644 --- a/features/steps/block.py +++ b/features/steps/block.py @@ -1,53 +1,100 @@ -# encoding: utf-8 - -""" -Step implementations for block content containers -""" +"""Step implementations for block content containers.""" from behave import given, then, when +from behave.runner import Context from docx import Document from docx.table import Table from helpers import test_docx - # given =================================================== -@given('a document containing a table') -def given_a_document_containing_a_table(context): - context.document = Document(test_docx('blk-containing-table')) + +@given("a _Cell object with paragraphs and tables") +def given_a_cell_with_paragraphs_and_tables(context: Context): + context.cell = Document(test_docx("blk-paras-and-tables")).tables[1].rows[0].cells[0] + + +@given("a Document object with paragraphs and tables") +def given_a_document_with_paragraphs_and_tables(context: Context): + context.document = Document(test_docx("blk-paras-and-tables")) + + +@given("a document containing a table") +def given_a_document_containing_a_table(context: Context): + context.document = Document(test_docx("blk-containing-table")) + + +@given("a Footer object with paragraphs and tables") +def given_a_footer_with_paragraphs_and_tables(context: Context): + context.footer = Document(test_docx("blk-paras-and-tables")).sections[0].footer + + +@given("a Header object with paragraphs and tables") +def given_a_header_with_paragraphs_and_tables(context: Context): + context.header = Document(test_docx("blk-paras-and-tables")).sections[0].header -@given('a paragraph') -def given_a_paragraph(context): +@given("a paragraph") +def given_a_paragraph(context: Context): context.document = Document() context.paragraph = context.document.add_paragraph() # when ==================================================== -@when('I add a paragraph') -def when_add_paragraph(context): + +@when("I add a paragraph") +def when_add_paragraph(context: Context): document = context.document context.p = document.add_paragraph() -@when('I add a table') -def when_add_table(context): +@when("I add a table") +def when_add_table(context: Context): rows, cols = 2, 2 context.document.add_table(rows, cols) # then ===================================================== -@then('I can access the table') -def then_can_access_table(context): + +@then("cell.iter_inner_content() produces the block-items in document order") +def then_cell_iter_inner_content_produces_the_block_items(context: Context): + actual = [type(item).__name__ for item in context.cell.iter_inner_content()] + expected = ["Paragraph", "Table", "Paragraph"] + assert actual == expected, f"expected: {expected}, got: {actual}" + + +@then("document.iter_inner_content() produces the block-items in document order") +def then_document_iter_inner_content_produces_the_block_items(context: Context): + actual = [type(item).__name__ for item in context.document.iter_inner_content()] + expected = ["Table", "Paragraph", "Table", "Paragraph", "Table", "Paragraph"] + assert actual == expected, f"expected: {expected}, got: {actual}" + + +@then("footer.iter_inner_content() produces the block-items in document order") +def then_footer_iter_inner_content_produces_the_block_items(context: Context): + actual = [type(item).__name__ for item in context.footer.iter_inner_content()] + expected = ["Paragraph", "Table", "Paragraph"] + assert actual == expected, f"expected: {expected}, got: {actual}" + + +@then("header.iter_inner_content() produces the block-items in document order") +def then_header_iter_inner_content_produces_the_block_items(context: Context): + actual = [type(item).__name__ for item in context.header.iter_inner_content()] + expected = ["Table", "Paragraph"] + assert actual == expected, f"expected: {expected}, got: {actual}" + + +@then("I can access the table") +def then_can_access_table(context: Context): table = context.document.tables[-1] assert isinstance(table, Table) -@then('the new table appears in the document') -def then_new_table_appears_in_document(context): +@then("the new table appears in the document") +def then_new_table_appears_in_document(context: Context): table = context.document.tables[-1] assert isinstance(table, Table) diff --git a/features/steps/cell.py b/features/steps/cell.py deleted file mode 100644 index d1385c921..000000000 --- a/features/steps/cell.py +++ /dev/null @@ -1,54 +0,0 @@ -# encoding: utf-8 - -""" -Step implementations for table cell-related features -""" - -from __future__ import absolute_import, print_function, unicode_literals - -from behave import given, then, when - -from docx import Document - -from helpers import test_docx - - -# given =================================================== - -@given('a table cell') -def given_a_table_cell(context): - table = Document(test_docx('tbl-2x2-table')).tables[0] - context.cell = table.cell(0, 0) - - -# when ===================================================== - -@when('I add a 2 x 2 table into the first cell') -def when_I_add_a_2x2_table_into_the_first_cell(context): - context.table_ = context.cell.add_table(2, 2) - - -@when('I assign a string to the cell text attribute') -def when_assign_string_to_cell_text_attribute(context): - cell = context.cell - text = 'foobar' - cell.text = text - context.expected_text = text - - -# then ===================================================== - -@then('cell.tables[0] is a 2 x 2 table') -def then_cell_tables_0_is_a_2x2_table(context): - cell = context.cell - table = cell.tables[0] - assert len(table.rows) == 2 - assert len(table.columns) == 2 - - -@then('the cell contains the string I assigned') -def then_cell_contains_string_assigned(context): - cell, expected_text = context.cell, context.expected_text - text = cell.paragraphs[0].runs[0].text - msg = "expected '%s', got '%s'" % (expected_text, text) - assert text == expected_text, msg diff --git a/features/steps/comments.py b/features/steps/comments.py new file mode 100644 index 000000000..39680f257 --- /dev/null +++ b/features/steps/comments.py @@ -0,0 +1,284 @@ +"""Step implementations for document comments-related features.""" + +import datetime as dt + +from behave import given, then, when +from behave.runner import Context + +from docx import Document +from docx.comments import Comment, Comments +from docx.drawing import Drawing + +from helpers import test_docx + +# given ==================================================== + + +@given("a Comment object") +def given_a_comment_object(context: Context): + context.comment = Document(test_docx("comments-rich-para")).comments.get(0) + + +@given("a Comment object containing an embedded image") +def given_a_comment_object_containing_an_embedded_image(context: Context): + context.comment = Document(test_docx("comments-rich-para")).comments.get(1) + + +@given("a Comments object with {count} comments") +def given_a_comments_object_with_count_comments(context: Context, count: str): + testfile_name = {"0": "doc-default", "4": "comments-rich-para"}[count] + context.comments = Document(test_docx(testfile_name)).comments + + +@given("a default Comment object") +def given_a_default_comment_object(context: Context): + context.comment = Document(test_docx("comments-rich-para")).comments.add_comment() + + +@given("a document having a comments part") +def given_a_document_having_a_comments_part(context: Context): + context.document = Document(test_docx("comments-rich-para")) + + +@given("a document having no comments part") +def given_a_document_having_no_comments_part(context: Context): + context.document = Document(test_docx("doc-default")) + + +# when ===================================================== + + +@when('I assign "{author}" to comment.author') +def when_I_assign_author_to_comment_author(context: Context, author: str): + context.comment.author = author + + +@when("I assign comment = comments.add_comment()") +def when_I_assign_comment_eq_add_comment(context: Context): + context.comment = context.comments.add_comment() + + +@when('I assign comment = comments.add_comment(author="John Doe", initials="JD")') +def when_I_assign_comment_eq_comments_add_comment_with_author_and_initials(context: Context): + context.comment = context.comments.add_comment(author="John Doe", initials="JD") + + +@when('I assign comment = document.add_comment(runs, "A comment", "John Doe", "JD")') +def when_I_assign_comment_eq_document_add_comment(context: Context): + runs = list(context.document.paragraphs[0].runs) + context.comment = context.document.add_comment( + runs=runs, + text="A comment", + author="John Doe", + initials="JD", + ) + + +@when('I assign "{initials}" to comment.initials') +def when_I_assign_initials(context: Context, initials: str): + context.comment.initials = initials + + +@when("I assign para_text = comment.paragraphs[0].text") +def when_I_assign_para_text(context: Context): + context.para_text = context.comment.paragraphs[0].text + + +@when("I assign paragraph = comment.add_paragraph()") +def when_I_assign_default_add_paragraph(context: Context): + context.paragraph = context.comment.add_paragraph() + + +@when("I assign paragraph = comment.add_paragraph(text, style)") +def when_I_assign_add_paragraph_with_text_and_style(context: Context): + context.para_text = text = "Comment text" + context.para_style = style = "Normal" + context.paragraph = context.comment.add_paragraph(text, style) + + +@when("I assign run = paragraph.add_run()") +def when_I_assign_paragraph_add_run(context: Context): + context.run = context.paragraph.add_run() + + +@when("I call comments.get(2)") +def when_I_call_comments_get_2(context: Context): + context.comment = context.comments.get(2) + + +# then ===================================================== + + +@then("comment is a Comment object") +def then_comment_is_a_Comment_object(context: Context): + assert type(context.comment) is Comment + + +@then('comment.author == "{author}"') +def then_comment_author_eq_author(context: Context, author: str): + actual = context.comment.author + assert actual == author, f"expected author '{author}', got '{actual}'" + + +@then("comment.author is the author of the comment") +def then_comment_author_is_the_author_of_the_comment(context: Context): + actual = context.comment.author + assert actual == "Steve Canny", f"expected author 'Steve Canny', got '{actual}'" + + +@then("comment.comment_id == 0") +def then_comment_id_is_0(context: Context): + assert context.comment.comment_id == 0 + + +@then("comment.comment_id is the comment identifier") +def then_comment_comment_id_is_the_comment_identifier(context: Context): + assert context.comment.comment_id == 0 + + +@then("comment.initials is the initials of the comment author") +def then_comment_initials_is_the_initials_of_the_comment_author(context: Context): + initials = context.comment.initials + assert initials == "SJC", f"expected initials 'SJC', got '{initials}'" + + +@then('comment.initials == "{initials}"') +def then_comment_initials_eq_initials(context: Context, initials: str): + actual = context.comment.initials + assert actual == initials, f"expected initials '{initials}', got '{actual}'" + + +@then("comment.paragraphs[{idx}] == paragraph") +def then_comment_paragraphs_idx_eq_paragraph(context: Context, idx: str): + actual = context.comment.paragraphs[int(idx)]._p + expected = context.paragraph._p + assert actual == expected, "paragraphs do not compare equal" + + +@then('comment.paragraphs[{idx}].style.name == "{style}"') +def then_comment_paragraphs_idx_style_name_eq_style(context: Context, idx: str, style: str): + actual = context.comment.paragraphs[int(idx)]._p.style + expected = style + assert actual == expected, f"expected style name '{expected}', got '{actual}'" + + +@then('comment.text == "{text}"') +def then_comment_text_eq_text(context: Context, text: str): + actual = context.comment.text + expected = text + assert actual == expected, f"expected text '{expected}', got '{actual}'" + + +@then("comment.timestamp is the date and time the comment was authored") +def then_comment_timestamp_is_the_date_and_time_the_comment_was_authored(context: Context): + assert context.comment.timestamp == dt.datetime(2025, 6, 7, 11, 20, 0, tzinfo=dt.timezone.utc) + + +@then("comments.get({id}) == comment") +def then_comments_get_comment_id_eq_comment(context: Context, id: str): + comment_id = int(id) + comment = context.comments.get(comment_id) + + assert type(comment) is Comment, f"expected a Comment object, got {type(comment)}" + assert comment.comment_id == comment_id, ( + f"expected comment_id '{comment_id}', got '{comment.comment_id}'" + ) + + +@then("document.comments is a Comments object") +def then_document_comments_is_a_Comments_object(context: Context): + document = context.document + assert type(document.comments) is Comments + + +@then("I can extract the image from the comment") +def then_I_can_extract_the_image_from_the_comment(context: Context): + paragraph = context.comment.paragraphs[0] + run = paragraph.runs[2] + drawing = next(d for d in run.iter_inner_content() if isinstance(d, Drawing)) + assert drawing.has_picture + + image = drawing.image + + assert image.content_type == "image/jpeg", f"got {image.content_type}" + assert image.filename == "image.jpg", f"got {image.filename}" + assert image.sha1 == "1be010ea47803b00e140b852765cdf84f491da47", f"got {image.sha1}" + + +@then("iterating comments yields {count} Comment objects") +def then_iterating_comments_yields_count_comments(context: Context, count: str): + comment_iter = iter(context.comments) + + comment = next(comment_iter) + assert type(comment) is Comment, f"expected a Comment object, got {type(comment)}" + + remaining = list(comment_iter) + assert len(remaining) == int(count) - 1, "iterating comments did not yield the expected count" + + +@then("len(comment.paragraphs) == {count}") +def then_len_comment_paragraphs_eq_count(context: Context, count: str): + actual = len(context.comment.paragraphs) + expected = int(count) + assert actual == expected, f"expected len(comment.paragraphs) of {expected}, got {actual}" + + +@then("len(comments) == {count}") +def then_len_comments_eq_count(context: Context, count: str): + actual = len(context.comments) + expected = int(count) + assert actual == expected, f"expected len(comments) of {expected}, got {actual}" + + +@then("para_text is the text of the first paragraph in the comment") +def then_para_text_is_the_text_of_the_first_paragraph_in_the_comment(context: Context): + actual = context.para_text + expected = "Text with hyperlink https://google.com embedded." + assert actual == expected, f"expected para_text '{expected}', got '{actual}'" + + +@then("paragraph.style == style") +def then_paragraph_style_eq_known_style(context: Context): + actual = context.paragraph.style.name + expected = context.para_style + assert actual == expected, f"expected paragraph.style '{expected}', got '{actual}'" + + +@then('paragraph.style == "{style}"') +def then_paragraph_style_eq_style(context: Context, style: str): + actual = context.paragraph._p.style + expected = style + assert actual == expected, f"expected paragraph.style '{expected}', got '{actual}'" + + +@then("paragraph.text == text") +def then_paragraph_text_eq_known_text(context: Context): + actual = context.paragraph.text + expected = context.para_text + assert actual == expected, f"expected paragraph.text '{expected}', got '{actual}'" + + +@then('paragraph.text == ""') +def then_paragraph_text_eq_text(context: Context): + actual = context.paragraph.text + expected = "" + assert actual == expected, f"expected paragraph.text '{expected}', got '{actual}'" + + +@then("run.iter_inner_content() yields a single Picture drawing") +def then_run_iter_inner_content_yields_a_single_picture_drawing(context: Context): + inner_content = list(context.run.iter_inner_content()) + + assert len(inner_content) == 1, ( + f"expected a single inner content element, got {len(inner_content)}" + ) + inner_content_item = inner_content[0] + assert isinstance(inner_content_item, Drawing) + assert inner_content_item.has_picture + + +@then("the result is a Comment object with id 2") +def then_the_result_is_a_comment_object_with_id_2(context: Context): + comment = context.comment + assert type(comment) is Comment, f"expected a Comment object, got {type(comment)}" + assert comment.comment_id == 2, f"expected comment_id `2`, got '{comment.comment_id}'" diff --git a/features/steps/coreprops.py b/features/steps/coreprops.py index dc6be2e6c..90467fb67 100644 --- a/features/steps/coreprops.py +++ b/features/steps/coreprops.py @@ -1,60 +1,54 @@ -# encoding: utf-8 +"""Gherkin step implementations for core properties-related features.""" -""" -Gherkin step implementations for core properties-related features. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) - -from datetime import datetime, timedelta +import datetime as dt from behave import given, then, when +from behave.runner import Context from docx import Document from docx.opc.coreprops import CoreProperties from helpers import test_docx - # given =================================================== -@given('a document having known core properties') -def given_a_document_having_known_core_properties(context): - context.document = Document(test_docx('doc-coreprops')) +@given("a document having known core properties") +def given_a_document_having_known_core_properties(context: Context): + context.document = Document(test_docx("doc-coreprops")) -@given('a document having no core properties part') -def given_a_document_having_no_core_properties_part(context): - context.document = Document(test_docx('doc-no-coreprops')) + +@given("a document having no core properties part") +def given_a_document_having_no_core_properties_part(context: Context): + context.document = Document(test_docx("doc-no-coreprops")) # when ==================================================== -@when('I access the core properties object') -def when_I_access_the_core_properties_object(context): + +@when("I access the core properties object") +def when_I_access_the_core_properties_object(context: Context): context.document.core_properties @when("I assign new values to the properties") -def when_I_assign_new_values_to_the_properties(context): +def when_I_assign_new_values_to_the_properties(context: Context): context.propvals = ( - ('author', 'Creator'), - ('category', 'Category'), - ('comments', 'Description'), - ('content_status', 'Content Status'), - ('created', datetime(2013, 6, 15, 12, 34, 56)), - ('identifier', 'Identifier'), - ('keywords', 'key; word; keyword'), - ('language', 'Language'), - ('last_modified_by', 'Last Modified By'), - ('last_printed', datetime(2013, 6, 15, 12, 34, 56)), - ('modified', datetime(2013, 6, 15, 12, 34, 56)), - ('revision', 9), - ('subject', 'Subject'), - ('title', 'Title'), - ('version', 'Version'), + ("author", "Creator"), + ("category", "Category"), + ("comments", "Description"), + ("content_status", "Content Status"), + ("created", dt.datetime(2013, 6, 15, 12, 34, 56, tzinfo=dt.timezone.utc)), + ("identifier", "Identifier"), + ("keywords", "key; word; keyword"), + ("language", "Language"), + ("last_modified_by", "Last Modified By"), + ("last_printed", dt.datetime(2013, 6, 15, 12, 34, 56, tzinfo=dt.timezone.utc)), + ("modified", dt.datetime(2013, 6, 15, 12, 34, 56, tzinfo=dt.timezone.utc)), + ("revision", 9), + ("subject", "Subject"), + ("title", "Title"), + ("version", "Version"), ) core_properties = context.document.core_properties for name, value in context.propvals: @@ -63,58 +57,61 @@ def when_I_assign_new_values_to_the_properties(context): # then ==================================================== -@then('a core properties part with default values is added') -def then_a_core_properties_part_with_default_values_is_added(context): + +@then("a core properties part with default values is added") +def then_a_core_properties_part_with_default_values_is_added(context: Context): core_properties = context.document.core_properties - assert core_properties.title == 'Word Document' - assert core_properties.last_modified_by == 'python-docx' + assert core_properties.title == "Word Document" + assert core_properties.last_modified_by == "python-docx" assert core_properties.revision == 1 # core_properties.modified only stores time with seconds resolution, so # comparison needs to be a little loose (within two seconds) - modified_timedelta = datetime.utcnow() - core_properties.modified - max_expected_timedelta = timedelta(seconds=2) + modified_timedelta = dt.datetime.now(dt.timezone.utc) - core_properties.modified + max_expected_timedelta = dt.timedelta(seconds=2) assert modified_timedelta < max_expected_timedelta -@then('I can access the core properties object') -def then_I_can_access_the_core_properties_object(context): +@then("I can access the core properties object") +def then_I_can_access_the_core_properties_object(context: Context): document = context.document core_properties = document.core_properties assert isinstance(core_properties, CoreProperties) -@then('the core property values match the known values') -def then_the_core_property_values_match_the_known_values(context): +@then("the core property values match the known values") +def then_the_core_property_values_match_the_known_values(context: Context): known_propvals = ( - ('author', 'Steve Canny'), - ('category', 'Category'), - ('comments', 'Description'), - ('content_status', 'Content Status'), - ('created', datetime(2014, 12, 13, 22, 2, 0)), - ('identifier', 'Identifier'), - ('keywords', 'key; word; keyword'), - ('language', 'Language'), - ('last_modified_by', 'Steve Canny'), - ('last_printed', datetime(2014, 12, 13, 22, 2, 42)), - ('modified', datetime(2014, 12, 13, 22, 6, 0)), - ('revision', 2), - ('subject', 'Subject'), - ('title', 'Title'), - ('version', '0.7.1a3'), + ("author", "Steve Canny"), + ("category", "Category"), + ("comments", "Description"), + ("content_status", "Content Status"), + ("created", dt.datetime(2014, 12, 13, 22, 2, 0, tzinfo=dt.timezone.utc)), + ("identifier", "Identifier"), + ("keywords", "key; word; keyword"), + ("language", "Language"), + ("last_modified_by", "Steve Canny"), + ("last_printed", dt.datetime(2014, 12, 13, 22, 2, 42, tzinfo=dt.timezone.utc)), + ("modified", dt.datetime(2014, 12, 13, 22, 6, 0, tzinfo=dt.timezone.utc)), + ("revision", 2), + ("subject", "Subject"), + ("title", "Title"), + ("version", "0.7.1a3"), ) core_properties = context.document.core_properties for name, expected_value in known_propvals: value = getattr(core_properties, name) - assert value == expected_value, ( - "got '%s' for core property '%s'" % (value, name) + assert value == expected_value, "got '%s' for core property '%s'" % ( + value, + name, ) -@then('the core property values match the new values') -def then_the_core_property_values_match_the_new_values(context): +@then("the core property values match the new values") +def then_the_core_property_values_match_the_new_values(context: Context): core_properties = context.document.core_properties for name, expected_value in context.propvals: value = getattr(core_properties, name) - assert value == expected_value, ( - "got '%s' for core property '%s'" % (value, name) + assert value == expected_value, "got '%s' for core property '%s'" % ( + value, + name, ) diff --git a/features/steps/document.py b/features/steps/document.py index 2827d8582..1c12ac106 100644 --- a/features/steps/document.py +++ b/features/steps/document.py @@ -1,115 +1,115 @@ -# encoding: utf-8 - -""" -Step implementations for document-related features -""" - -from __future__ import absolute_import, print_function, unicode_literals +"""Step implementations for document-related features.""" from behave import given, then, when from docx import Document from docx.enum.section import WD_ORIENT, WD_SECTION +from docx.section import Sections from docx.shape import InlineShapes from docx.shared import Inches -from docx.section import Sections from docx.styles.styles import Styles from docx.table import Table from docx.text.paragraph import Paragraph from helpers import test_docx, test_file - # given =================================================== -@given('a blank document') + +@given("a blank document") def given_a_blank_document(context): - context.document = Document(test_docx('doc-word-default-blank')) + context.document = Document(test_docx("doc-word-default-blank")) -@given('a document having built-in styles') +@given("a document having built-in styles") def given_a_document_having_builtin_styles(context): context.document = Document() -@given('a document having inline shapes') +@given("a document having inline shapes") def given_a_document_having_inline_shapes(context): - context.document = Document(test_docx('shp-inline-shape-access')) + context.document = Document(test_docx("shp-inline-shape-access")) -@given('a document having sections') +@given("a document having sections") def given_a_document_having_sections(context): - context.document = Document(test_docx('doc-access-sections')) + context.document = Document(test_docx("doc-access-sections")) -@given('a document having styles') +@given("a document having styles") def given_a_document_having_styles(context): - context.document = Document(test_docx('sty-having-styles-part')) + context.document = Document(test_docx("sty-having-styles-part")) -@given('a document having three tables') +@given("a document having three tables") def given_a_document_having_three_tables(context): - context.document = Document(test_docx('tbl-having-tables')) + context.document = Document(test_docx("tbl-having-tables")) -@given('a single-section document having portrait layout') +@given("a single-section document having portrait layout") def given_a_single_section_document_having_portrait_layout(context): - context.document = Document(test_docx('doc-add-section')) + context.document = Document(test_docx("doc-add-section")) section = context.document.sections[-1] context.original_dimensions = (section.page_width, section.page_height) +@given("a single-section Document object with headers and footers as document") +def given_a_single_section_Document_object_with_headers_and_footers(context): + context.document = Document(test_docx("doc-add-section")) + + # when ==================================================== -@when('I add a 2 x 2 table specifying only row and column count') + +@when("I add a 2 x 2 table specifying only row and column count") def when_add_2x2_table_specifying_only_row_and_col_count(context): document = context.document document.add_table(rows=2, cols=2) -@when('I add a 2 x 2 table specifying style \'{style_name}\'') +@when("I add a 2 x 2 table specifying style '{style_name}'") def when_add_2x2_table_specifying_style_name(context, style_name): document = context.document document.add_table(rows=2, cols=2, style=style_name) -@when('I add a heading specifying level={level}') +@when("I add a heading specifying level={level}") def when_add_heading_specifying_level(context, level): context.document.add_heading(level=int(level)) -@when('I add a heading specifying only its text') +@when("I add a heading specifying only its text") def when_add_heading_specifying_only_its_text(context): document = context.document - context.heading_text = text = 'Spam vs. Eggs' + context.heading_text = text = "Spam vs. Eggs" document.add_heading(text) -@when('I add a page break to the document') +@when("I add a page break to the document") def when_add_page_break_to_document(context): document = context.document document.add_page_break() -@when('I add a paragraph specifying its style as a {kind}') +@when("I add a paragraph specifying its style as a {kind}") def when_I_add_a_paragraph_specifying_its_style_as_a(context, kind): document = context.document - style = context.style = document.styles['Heading 1'] + style = context.style = document.styles["Heading 1"] style_spec = { - 'style object': style, - 'style name': 'Heading 1', + "style object": style, + "style name": "Heading 1", }[kind] document.add_paragraph(style=style_spec) -@when('I add a paragraph specifying its text') +@when("I add a paragraph specifying its text") def when_add_paragraph_specifying_text(context): document = context.document - context.paragraph_text = 'foobar' + context.paragraph_text = "foobar" document.add_paragraph(context.paragraph_text) -@when('I add a paragraph without specifying text or style') +@when("I add a paragraph without specifying text or style") def when_add_paragraph_without_specifying_text_or_style(context): document = context.document document.add_paragraph() @@ -119,39 +119,34 @@ def when_add_paragraph_without_specifying_text_or_style(context): def when_add_picture_specifying_width_and_height(context): document = context.document context.picture = document.add_picture( - test_file('monty-truth.png'), - width=Inches(1.75), height=Inches(2.5) + test_file("monty-truth.png"), width=Inches(1.75), height=Inches(2.5) ) -@when('I add a picture specifying a height of 1.5 inches') +@when("I add a picture specifying a height of 1.5 inches") def when_add_picture_specifying_height(context): document = context.document - context.picture = document.add_picture( - test_file('monty-truth.png'), height=Inches(1.5) - ) + context.picture = document.add_picture(test_file("monty-truth.png"), height=Inches(1.5)) -@when('I add a picture specifying a width of 1.5 inches') +@when("I add a picture specifying a width of 1.5 inches") def when_add_picture_specifying_width(context): document = context.document - context.picture = document.add_picture( - test_file('monty-truth.png'), width=Inches(1.5) - ) + context.picture = document.add_picture(test_file("monty-truth.png"), width=Inches(1.5)) -@when('I add a picture specifying only the image file') +@when("I add a picture specifying only the image file") def when_add_picture_specifying_only_image_file(context): document = context.document - context.picture = document.add_picture(test_file('monty-truth.png')) + context.picture = document.add_picture(test_file("monty-truth.png")) -@when('I add an even-page section to the document') +@when("I add an even-page section to the document") def when_I_add_an_even_page_section_to_the_document(context): context.section = context.document.add_section(WD_SECTION.EVEN_PAGE) -@when('I change the new section layout to landscape') +@when("I change the new section layout to landscape") def when_I_change_the_new_section_layout_to_landscape(context): new_height, new_width = context.original_dimensions section = context.section @@ -160,16 +155,22 @@ def when_I_change_the_new_section_layout_to_landscape(context): section.page_height = new_height +@when("I execute section = document.add_section()") +def when_I_execute_section_eq_document_add_section(context): + context.section = context.document.add_section() + + # then ==================================================== -@then('document.inline_shapes is an InlineShapes object') + +@then("document.inline_shapes is an InlineShapes object") def then_document_inline_shapes_is_an_InlineShapes_object(context): document = context.document inline_shapes = document.inline_shapes assert isinstance(inline_shapes, InlineShapes) -@then('document.paragraphs is a list containing three paragraphs') +@then("document.paragraphs is a list containing three paragraphs") def then_document_paragraphs_is_a_list_containing_three_paragraphs(context): document = context.document paragraphs = document.paragraphs @@ -179,20 +180,20 @@ def then_document_paragraphs_is_a_list_containing_three_paragraphs(context): assert isinstance(paragraph, Paragraph) -@then('document.sections is a Sections object') +@then("document.sections is a Sections object") def then_document_sections_is_a_Sections_object(context): sections = context.document.sections - msg = 'document.sections not instance of Sections' + msg = "document.sections not instance of Sections" assert isinstance(sections, Sections), msg -@then('document.styles is a Styles object') +@then("document.styles is a Styles object") def then_document_styles_is_a_Styles_object(context): styles = context.document.styles assert isinstance(styles, Styles) -@then('document.tables is a list containing three tables') +@then("document.tables is a list containing three tables") def then_document_tables_is_a_list_containing_three_tables(context): document = context.document tables = document.tables @@ -202,7 +203,7 @@ def then_document_tables_is_a_list_containing_three_tables(context): assert isinstance(table, Table) -@then('the document contains a 2 x 2 table') +@then("the document contains a 2 x 2 table") def then_the_document_contains_a_2x2_table(context): table = context.document.tables[-1] assert isinstance(table, Table) @@ -211,12 +212,12 @@ def then_the_document_contains_a_2x2_table(context): context.table_ = table -@then('the document has two sections') +@then("the document has two sections") def then_the_document_has_two_sections(context): assert len(context.document.sections) == 2 -@then('the first section is portrait') +@then("the first section is portrait") def then_the_first_section_is_portrait(context): first_section = context.document.sections[0] expected_width, expected_height = context.original_dimensions @@ -225,16 +226,16 @@ def then_the_first_section_is_portrait(context): assert first_section.page_height == expected_height -@then('the last paragraph contains only a page break') +@then("the last paragraph contains only a page break") def then_last_paragraph_contains_only_a_page_break(context): document = context.document paragraph = document.paragraphs[-1] assert len(paragraph.runs) == 1 assert len(paragraph.runs[0]._r) == 1 - assert paragraph.runs[0]._r[0].type == 'page' + assert paragraph.runs[0]._r[0].type == "page" -@then('the last paragraph contains the heading text') +@then("the last paragraph contains the heading text") def then_last_p_contains_heading_text(context): document = context.document text = context.heading_text @@ -242,7 +243,7 @@ def then_last_p_contains_heading_text(context): assert paragraph.text == text -@then('the second section is landscape') +@then("the second section is landscape") def then_the_second_section_is_landscape(context): new_section = context.document.sections[-1] expected_height, expected_width = context.original_dimensions @@ -251,10 +252,8 @@ def then_the_second_section_is_landscape(context): assert new_section.page_height == expected_height -@then('the style of the last paragraph is \'{style_name}\'') +@then("the style of the last paragraph is '{style_name}'") def then_the_style_of_the_last_paragraph_is_style(context, style_name): document = context.document paragraph = document.paragraphs[-1] - assert paragraph.style.name == style_name, ( - 'got %s' % paragraph.style.name - ) + assert paragraph.style.name == style_name, "got %s" % paragraph.style.name diff --git a/features/steps/font.py b/features/steps/font.py index 60f308d86..63ca6b48e 100644 --- a/features/steps/font.py +++ b/features/steps/font.py @@ -1,12 +1,4 @@ -# encoding: utf-8 - -""" -Step implementations for font-related features. -""" - -from __future__ import ( - absolute_import, division, print_function, unicode_literals -) +"""Step implementations for font-related features.""" from behave import given, then, when @@ -18,140 +10,139 @@ from helpers import test_docx - # given =================================================== -@given('a font') + +@given("a font") def given_a_font(context): - document = Document(test_docx('txt-font-props')) + document = Document(test_docx("txt-font-props")) context.font = document.paragraphs[0].runs[0].font -@given('a font having {color} highlighting') +@given("a font having {color} highlighting") def given_a_font_having_color_highlighting(context, color): paragraph_index = { - 'no': 0, - 'yellow': 1, - 'bright green': 2, + "no": 0, + "yellow": 1, + "bright green": 2, }[color] - document = Document(test_docx('txt-font-highlight-color')) + document = Document(test_docx("txt-font-highlight-color")) context.font = document.paragraphs[paragraph_index].runs[0].font -@given('a font having {type} color') +@given("a font having {type} color") def given_a_font_having_type_color(context, type): - run_idx = ['no', 'auto', 'an RGB', 'a theme'].index(type) - document = Document(test_docx('fnt-color')) + run_idx = ["no", "auto", "an RGB", "a theme"].index(type) + document = Document(test_docx("fnt-color")) context.font = document.paragraphs[0].runs[run_idx].font -@given('a font having typeface name {name}') +@given("a font having typeface name {name}") def given_a_font_having_typeface_name(context, name): - document = Document(test_docx('txt-font-props')) + document = Document(test_docx("txt-font-props")) style_name = { - 'not specified': 'Normal', - 'Avenir Black': 'Having Typeface', + "not specified": "Normal", + "Avenir Black": "Having Typeface", }[name] context.font = document.styles[style_name].font -@given('a font having {underline_type} underline') +@given("a font having {underline_type} underline") def given_a_font_having_type_underline(context, underline_type): style_name = { - 'inherited': 'Normal', - 'no': 'None Underlined', - 'single': 'Underlined', - 'double': 'Double Underlined', + "inherited": "Normal", + "no": "None Underlined", + "single": "Underlined", + "double": "Double Underlined", }[underline_type] - document = Document(test_docx('txt-font-props')) + document = Document(test_docx("txt-font-props")) context.font = document.styles[style_name].font -@given('a font having {vertAlign_state} vertical alignment') +@given("a font having {vertAlign_state} vertical alignment") def given_a_font_having_vertAlign_state(context, vertAlign_state): style_name = { - 'inherited': 'Normal', - 'subscript': 'Subscript', - 'superscript': 'Superscript', + "inherited": "Normal", + "subscript": "Subscript", + "superscript": "Superscript", }[vertAlign_state] - document = Document(test_docx('txt-font-props')) + document = Document(test_docx("txt-font-props")) context.font = document.styles[style_name].font -@given('a font of size {size}') +@given("a font of size {size}") def given_a_font_of_size(context, size): - document = Document(test_docx('txt-font-props')) + document = Document(test_docx("txt-font-props")) style_name = { - 'unspecified': 'Normal', - '14 pt': 'Having Typeface', - '18 pt': 'Large Size', + "unspecified": "Normal", + "14 pt": "Having Typeface", + "18 pt": "Large Size", }[size] context.font = document.styles[style_name].font # when ==================================================== -@when('I assign {value} to font.color.rgb') + +@when("I assign {value} to font.color.rgb") def when_I_assign_value_to_font_color_rgb(context, value): font = context.font - new_value = None if value == 'None' else RGBColor.from_string(value) + new_value = None if value == "None" else RGBColor.from_string(value) font.color.rgb = new_value -@when('I assign {value} to font.color.theme_color') +@when("I assign {value} to font.color.theme_color") def when_I_assign_value_to_font_color_theme_color(context, value): font = context.font - new_value = None if value == 'None' else getattr(MSO_THEME_COLOR, value) + new_value = None if value == "None" else getattr(MSO_THEME_COLOR, value) font.color.theme_color = new_value -@when('I assign {value} to font.highlight_color') +@when("I assign {value} to font.highlight_color") def when_I_assign_value_to_font_highlight_color(context, value): font = context.font - expected_value = ( - None if value == 'None' else getattr(WD_COLOR_INDEX, value) - ) + expected_value = None if value == "None" else getattr(WD_COLOR_INDEX, value) font.highlight_color = expected_value -@when('I assign {value} to font.name') +@when("I assign {value} to font.name") def when_I_assign_value_to_font_name(context, value): font = context.font - value = None if value == 'None' else value + value = None if value == "None" else value font.name = value -@when('I assign {value} to font.size') +@when("I assign {value} to font.size") def when_I_assign_value_str_to_font_size(context, value): - value = None if value == 'None' else int(value) + value = None if value == "None" else int(value) font = context.font font.size = value -@when('I assign {value} to font.underline') +@when("I assign {value} to font.underline") def when_I_assign_value_to_font_underline(context, value): new_value = { - 'True': True, - 'False': False, - 'None': None, - 'WD_UNDERLINE.SINGLE': WD_UNDERLINE.SINGLE, - 'WD_UNDERLINE.DOUBLE': WD_UNDERLINE.DOUBLE, + "True": True, + "False": False, + "None": None, + "WD_UNDERLINE.SINGLE": WD_UNDERLINE.SINGLE, + "WD_UNDERLINE.DOUBLE": WD_UNDERLINE.DOUBLE, }[value] font = context.font font.underline = new_value -@when('I assign {value} to font.{sub_super}script') +@when("I assign {value} to font.{sub_super}script") def when_I_assign_value_to_font_sub_super(context, value, sub_super): font = context.font name = { - 'sub': 'subscript', - 'super': 'superscript', + "sub": "subscript", + "super": "superscript", }[sub_super] new_value = { - 'None': None, - 'True': True, - 'False': False, + "None": None, + "True": True, + "False": False, }[value] setattr(font, name, new_value) @@ -159,82 +150,77 @@ def when_I_assign_value_to_font_sub_super(context, value, sub_super): # then ===================================================== -@then('font.color is a ColorFormat object') + +@then("font.color is a ColorFormat object") def then_font_color_is_a_ColorFormat_object(context): font = context.font assert isinstance(font.color, ColorFormat) -@then('font.color.rgb is {value}') +@then("font.color.rgb is {value}") def then_font_color_rgb_is_value(context, value): font = context.font - expected_value = None if value == 'None' else RGBColor.from_string(value) + expected_value = None if value == "None" else RGBColor.from_string(value) assert font.color.rgb == expected_value -@then('font.color.theme_color is {value}') +@then("font.color.theme_color is {value}") def then_font_color_theme_color_is_value(context, value): font = context.font - expected_value = ( - None if value == 'None' else getattr(MSO_THEME_COLOR, value) - ) + expected_value = None if value == "None" else getattr(MSO_THEME_COLOR, value) assert font.color.theme_color == expected_value -@then('font.color.type is {value}') +@then("font.color.type is {value}") def then_font_color_type_is_value(context, value): font = context.font - expected_value = ( - None if value == 'None' else getattr(MSO_COLOR_TYPE, value) - ) + expected_value = None if value == "None" else getattr(MSO_COLOR_TYPE, value) assert font.color.type == expected_value -@then('font.highlight_color is {value}') +@then("font.highlight_color is {value}") def then_font_highlight_color_is_value(context, value): font = context.font - expected_value = ( - None if value == 'None' else getattr(WD_COLOR_INDEX, value) - ) + expected_value = None if value == "None" else getattr(WD_COLOR_INDEX, value) assert font.highlight_color == expected_value -@then('font.name is {value}') +@then("font.name is {value}") def then_font_name_is_value(context, value): font = context.font - value = None if value == 'None' else value + value = None if value == "None" else value assert font.name == value -@then('font.size is {value}') +@then("font.size is {value}") def then_font_size_is_value(context, value): - value = None if value == 'None' else int(value) + value = None if value == "None" else int(value) font = context.font assert font.size == value -@then('font.underline is {value}') +@then("font.underline is {value}") def then_font_underline_is_value(context, value): expected_value = { - 'None': None, - 'True': True, - 'False': False, - 'WD_UNDERLINE.DOUBLE': WD_UNDERLINE.DOUBLE, + "None": None, + "True": True, + "False": False, + "WD_UNDERLINE.DOUBLE": WD_UNDERLINE.DOUBLE, }[value] font = context.font assert font.underline == expected_value -@then('font.{sub_super}script is {value}') +@then("font.{sub_super}script is {value}") def then_font_sub_super_is_value(context, sub_super, value): name = { - 'sub': 'subscript', - 'super': 'superscript', + "sub": "subscript", + "super": "superscript", }[sub_super] expected_value = { - 'None': None, - 'True': True, - 'False': False, + "None": None, + "True": True, + "False": False, }[value] font = context.font actual_value = getattr(font, name) diff --git a/features/steps/hdrftr.py b/features/steps/hdrftr.py new file mode 100644 index 000000000..5949f961c --- /dev/null +++ b/features/steps/hdrftr.py @@ -0,0 +1,137 @@ +"""Step implementations for header and footer-related features.""" + +from behave import given, then, when + +from docx import Document + +from helpers import test_docx, test_file + +# given ==================================================== + + +@given("a _Footer object {with_or_no} footer definition as footer") +def given_a_Footer_object_with_or_no_footer_definition(context, with_or_no): + section_idx = {"with a": 0, "with no": 1}[with_or_no] + context.sections = Document(test_docx("hdr-header-footer")).sections + context.footer = context.sections[section_idx].footer + + +@given("a _Header object {with_or_no} header definition as header") +def given_a_Header_object_with_or_no_header_definition(context, with_or_no): + section_idx = {"with a": 0, "with no": 1}[with_or_no] + context.sections = Document(test_docx("hdr-header-footer")).sections + context.header = context.sections[section_idx].header + + +@given("a _Run object from a footer as run") +def given_a_Run_object_from_a_footer_as_run(context): + footer = Document(test_docx("hdr-header-footer")).sections[0].footer + context.run = footer.paragraphs[0].add_run() + + +@given("a _Run object from a header as run") +def given_a_Run_object_from_a_header_as_run(context): + header = Document(test_docx("hdr-header-footer")).sections[0].header + context.run = header.paragraphs[0].add_run() + + +@given("the next _Footer object with no footer definition as footer_2") +def given_the_next_Footer_object_with_no_footer_definition(context): + context.footer_2 = context.sections[1].footer + + +@given("the next _Header object with no header definition as header_2") +def given_the_next_Header_object_with_no_header_definition(context): + context.header_2 = context.sections[1].header + + +# when ===================================================== + + +@when('I assign "Normal" to footer.paragraphs[0].style') +def when_I_assign_Body_Text_to_footer_style(context): + context.footer.paragraphs[0].style = "Normal" + + +@when('I assign "Normal" to header.paragraphs[0].style') +def when_I_assign_Body_Text_to_header_style(context): + context.header.paragraphs[0].style = "Normal" + + +@when("I assign {value} to header.is_linked_to_previous") +def when_I_assign_value_to_header_is_linked_to_previous(context, value): + context.header.is_linked_to_previous = eval(value) + + +@when("I assign {value} to footer.is_linked_to_previous") +def when_I_assign_value_to_footer_is_linked_to_previous(context, value): + context.footer.is_linked_to_previous = eval(value) + + +@when("I call run.add_picture()") +def when_I_call_run_add_picture(context): + context.run.add_picture(test_file("test.png")) + + +# then ===================================================== + + +@then("footer.is_linked_to_previous is {value}") +def then_footer_is_linked_to_previous_is_value(context, value): + actual = context.footer.is_linked_to_previous + expected = eval(value) + assert actual == expected, "footer.is_linked_to_previous is %s" % actual + + +@then('footer.paragraphs[0].style.name == "Normal"') +def then_footer_paragraphs_0_style_name_eq_Normal(context): + actual = context.footer.paragraphs[0].style.name + expected = "Normal" + assert actual == expected, "footer.paragraphs[0].style.name is %s" % actual + + +@then("footer_2.is_linked_to_previous is {value}") +def then_footer_2_is_linked_to_previous_is_value(context, value): + actual = context.footer_2.is_linked_to_previous + expected = eval(value) + assert actual == expected, "footer_2.is_linked_to_previous is %s" % actual + + +@then("footer_2.paragraphs[0].text == footer.paragraphs[0].text") +def then_footer_2_text_eq_footer_text(context): + actual = context.footer_2.paragraphs[0].text + expected = context.footer.paragraphs[0].text + assert actual == expected, "footer_2.paragraphs[0].text == %s" % actual + + +@then("header.is_linked_to_previous is {value}") +def then_header_is_linked_to_previous_is_value(context, value): + actual = context.header.is_linked_to_previous + expected = eval(value) + assert actual == expected, "header.is_linked_to_previous is %s" % actual + + +@then('header.paragraphs[0].style.name == "Normal"') +def then_header_paragraphs_0_style_name_eq_Normal(context): + actual = context.header.paragraphs[0].style.name + expected = "Normal" + assert actual == expected, "header.paragraphs[0].style.name is %s" % actual + + +@then("header_2.is_linked_to_previous is {value}") +def then_header_2_is_linked_to_previous_is_value(context, value): + actual = context.header_2.is_linked_to_previous + expected = eval(value) + assert actual == expected, "header_2.is_linked_to_previous is %s" % actual + + +@then("header_2.paragraphs[0].text == header.paragraphs[0].text") +def then_header_2_text_eq_header_text(context): + actual = context.header_2.paragraphs[0].text + expected = context.header.paragraphs[0].text + assert actual == expected, "header_2.paragraphs[0].text == %s" % actual + + +@then("I can't detect the image but no exception is raised") +def then_I_cant_detect_the_image_but_no_exception_is_raised(context): + pass diff --git a/features/steps/helpers.py b/features/steps/helpers.py index 24ce059fe..fc40697b2 100644 --- a/features/steps/helpers.py +++ b/features/steps/helpers.py @@ -1,44 +1,34 @@ -# encoding: utf-8 - -""" -Helper methods and variables for acceptance tests. -""" +"""Helper methods and variables for acceptance tests.""" import os -def absjoin(*paths): +def absjoin(*paths: str) -> str: return os.path.abspath(os.path.join(*paths)) -thisdir = os.path.split(__file__)[0] -scratch_dir = absjoin(thisdir, '../_scratch') + +thisdir: str = os.path.split(__file__)[0] +scratch_dir: str = absjoin(thisdir, "../_scratch") # scratch output docx file ------------- -saved_docx_path = absjoin(scratch_dir, 'test_out.docx') +saved_docx_path: str = absjoin(scratch_dir, "test_out.docx") -bool_vals = { - 'True': True, - 'False': False -} +bool_vals = {"True": True, "False": False} -test_text = 'python-docx was here!' +test_text = "python-docx was here!" tri_state_vals = { - 'True': True, - 'False': False, - 'None': None, + "True": True, + "False": False, + "None": None, } -def test_docx(name): - """ - Return the absolute path to test .docx file with root name *name*. - """ - return absjoin(thisdir, 'test_files', '%s.docx' % name) +def test_docx(name: str): + """Return the absolute path to test .docx file with root name `name`.""" + return absjoin(thisdir, "test_files", "%s.docx" % name) -def test_file(name): - """ - Return the absolute path to file with *name* in test_files directory - """ - return absjoin(thisdir, 'test_files', '%s' % name) +def test_file(name: str): + """Return the absolute path to file with `name` in test_files directory""" + return absjoin(thisdir, "test_files", "%s" % name) diff --git a/features/steps/hyperlink.py b/features/steps/hyperlink.py new file mode 100644 index 000000000..14fa9f7be --- /dev/null +++ b/features/steps/hyperlink.py @@ -0,0 +1,116 @@ +"""Step implementations for hyperlink-related features.""" + +from __future__ import annotations + +from typing import Dict, Tuple + +from behave import given, then +from behave.runner import Context + +from docx import Document + +from helpers import test_docx + +# given =================================================== + + +@given("a hyperlink") +def given_a_hyperlink(context: Context): + document = Document(test_docx("par-hyperlinks")) + context.hyperlink = document.paragraphs[1].hyperlinks[0] + + +@given("a hyperlink having a URI fragment") +def given_a_hyperlink_having_a_uri_fragment(context: Context): + document = Document(test_docx("par-hlink-frags")) + context.hyperlink = document.paragraphs[1].hyperlinks[0] + + +@given("a hyperlink having address {address} and fragment {fragment}") +def given_a_hyperlink_having_address_and_fragment(context: Context, address: str, fragment: str): + paragraph_idxs: Dict[Tuple[str, str], int] = { + ("''", "linkedBookmark"): 1, + ("https://foo.com", "''"): 2, + ("https://foo.com?q=bar", "''"): 3, + ("http://foo.com/", "intro"): 4, + ("https://foo.com?q=bar#baz", "''"): 5, + ("court-exif.jpg", "''"): 7, + } + paragraph_idx = paragraph_idxs[(address, fragment)] + document = Document(test_docx("par-hlink-frags")) + paragraph = document.paragraphs[paragraph_idx] + context.hyperlink = paragraph.hyperlinks[0] + + +@given("a hyperlink having {zero_or_more} rendered page breaks") +def given_a_hyperlink_having_rendered_page_breaks(context: Context, zero_or_more: str): + paragraph_idx = { + "no": 1, + "one": 2, + }[zero_or_more] + document = Document(test_docx("par-hyperlinks")) + paragraph = document.paragraphs[paragraph_idx] + context.hyperlink = paragraph.hyperlinks[0] + + +@given("a hyperlink having {one_or_more} runs") +def given_a_hyperlink_having_one_or_more_runs(context: Context, one_or_more: str): + paragraph_idx, hyperlink_idx = { + "one": (1, 0), + "two": (2, 1), + }[one_or_more] + document = Document(test_docx("par-hyperlinks")) + paragraph = document.paragraphs[paragraph_idx] + context.hyperlink = paragraph.hyperlinks[hyperlink_idx] + + +# then ===================================================== + + +@then("hyperlink.address is the URL of the hyperlink") +def then_hyperlink_address_is_the_URL_of_the_hyperlink(context: Context): + actual_value = context.hyperlink.address + expected_value = "http://yahoo.com/" + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.contains_page_break is {value}") +def then_hyperlink_contains_page_break_is_value(context: Context, value: str): + actual_value = context.hyperlink.contains_page_break + expected_value = {"True": True, "False": False}[value] + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.fragment is the URI fragment of the hyperlink") +def then_hyperlink_fragment_is_the_URI_fragment_of_the_hyperlink(context: Context): + actual_value = context.hyperlink.fragment + expected_value = "linkedBookmark" + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.runs contains only Run instances") +def then_hyperlink_runs_contains_only_Run_instances(context: Context): + actual_value = [type(item).__name__ for item in context.hyperlink.runs] + expected_value = ["Run" for _ in context.hyperlink.runs] + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.runs has length {value}") +def then_hyperlink_runs_has_length(context: Context, value: str): + actual_value = len(context.hyperlink.runs) + expected_value = int(value) + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.text is the visible text of the hyperlink") +def then_hyperlink_text_is_the_visible_text_of_the_hyperlink(context: Context): + actual_value = context.hyperlink.text + expected_value = "awesome hyperlink" + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" + + +@then("hyperlink.url is {value}") +def then_hyperlink_url_is_value(context: Context, value: str): + actual_value = context.hyperlink.url + expected_value = "" if value == "''" else value + assert actual_value == expected_value, f"expected: {expected_value}, got: {actual_value}" diff --git a/features/steps/image.py b/features/steps/image.py index ee2a35c17..5ac54169b 100644 --- a/features/steps/image.py +++ b/features/steps/image.py @@ -1,10 +1,4 @@ -# encoding: utf-8 - -""" -Step implementations for image characterization features -""" - -from __future__ import absolute_import, print_function, unicode_literals +"""Step implementations for image characterization features.""" from behave import given, then, when @@ -12,62 +6,69 @@ from helpers import test_file - # given =================================================== -@given('the image file \'{filename}\'') + +@given("the image file '{filename}'") def given_image_filename(context, filename): context.image_path = test_file(filename) # when ==================================================== -@when('I construct an image using the image path') + +@when("I construct an image using the image path") def when_construct_image_using_path(context): context.image = Image.from_file(context.image_path) # then ==================================================== -@then('the image has content type \'{mime_type}\'') + +@then("the image has content type '{mime_type}'") def then_image_has_content_type(context, mime_type): content_type = context.image.content_type - assert content_type == mime_type, ( - "expected MIME type '%s', got '%s'" % (mime_type, content_type) + assert content_type == mime_type, "expected MIME type '%s', got '%s'" % ( + mime_type, + content_type, ) -@then('the image has {horz_dpi_str} horizontal dpi') +@then("the image has {horz_dpi_str} horizontal dpi") def then_image_has_horizontal_dpi(context, horz_dpi_str): expected_horz_dpi = int(horz_dpi_str) horz_dpi = context.image.horz_dpi - assert horz_dpi == expected_horz_dpi, ( - "expected horizontal dpi %d, got %d" % (expected_horz_dpi, horz_dpi) + assert horz_dpi == expected_horz_dpi, "expected horizontal dpi %d, got %d" % ( + expected_horz_dpi, + horz_dpi, ) -@then('the image has {vert_dpi_str} vertical dpi') +@then("the image has {vert_dpi_str} vertical dpi") def then_image_has_vertical_dpi(context, vert_dpi_str): expected_vert_dpi = int(vert_dpi_str) vert_dpi = context.image.vert_dpi - assert vert_dpi == expected_vert_dpi, ( - "expected vertical dpi %d, got %d" % (expected_vert_dpi, vert_dpi) + assert vert_dpi == expected_vert_dpi, "expected vertical dpi %d, got %d" % ( + expected_vert_dpi, + vert_dpi, ) -@then('the image is {px_height_str} pixels high') +@then("the image is {px_height_str} pixels high") def then_image_is_cx_pixels_high(context, px_height_str): expected_px_height = int(px_height_str) px_height = context.image.px_height - assert px_height == expected_px_height, ( - "expected pixel height %d, got %d" % (expected_px_height, px_height) + assert px_height == expected_px_height, "expected pixel height %d, got %d" % ( + expected_px_height, + px_height, ) -@then('the image is {px_width_str} pixels wide') +@then("the image is {px_width_str} pixels wide") def then_image_is_cx_pixels_wide(context, px_width_str): expected_px_width = int(px_width_str) px_width = context.image.px_width - assert px_width == expected_px_width, ( - "expected pixel width %d, got %d" % (expected_px_width, px_width) + assert px_width == expected_px_width, "expected pixel width %d, got %d" % ( + expected_px_width, + px_width, ) diff --git a/features/steps/numbering.py b/features/steps/numbering.py index ea41cdeb5..be88ceee7 100644 --- a/features/steps/numbering.py +++ b/features/steps/numbering.py @@ -1,8 +1,4 @@ -# encoding: utf-8 - -""" -Step implementations for numbering-related features -""" +"""Step implementations for numbering-related features.""" from behave import given, then, when @@ -10,17 +6,18 @@ from helpers import test_docx - # given =================================================== -@given('a document having a numbering part') + +@given("a document having a numbering part") def given_a_document_having_a_numbering_part(context): - context.document = Document(test_docx('num-having-numbering-part')) + context.document = Document(test_docx("num-having-numbering-part")) # when ==================================================== -@when('I get the numbering part from the document') + +@when("I get the numbering part from the document") def when_get_numbering_part_from_document(context): document = context.document context.numbering_part = document.part.numbering_part @@ -28,7 +25,8 @@ def when_get_numbering_part_from_document(context): # then ===================================================== -@then('the numbering part has the expected numbering definitions') + +@then("the numbering part has the expected numbering definitions") def then_numbering_part_has_expected_numbering_definitions(context): numbering_part = context.numbering_part assert len(numbering_part.numbering_definitions) == 10 diff --git a/features/steps/pagebreak.py b/features/steps/pagebreak.py new file mode 100644 index 000000000..870428127 --- /dev/null +++ b/features/steps/pagebreak.py @@ -0,0 +1,135 @@ +"""Step implementations for rendered page-break related features.""" + +from __future__ import annotations + +from behave import given, then +from behave.runner import Context + +from docx import Document +from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + +from helpers import test_docx + +# given =================================================== + + +@given("a rendered_page_break in a hyperlink") +def given_a_rendered_page_break_in_a_hyperlink(context: Context): + document = Document(test_docx("par-rendered-page-breaks")) + paragraph = document.paragraphs[2] + context.rendered_page_break = paragraph.rendered_page_breaks[0] + + +@given("a rendered_page_break in a paragraph") +def given_a_rendered_page_break_in_a_paragraph(context: Context): + document = Document(test_docx("par-rendered-page-breaks")) + paragraph = document.paragraphs[1] + context.rendered_page_break = paragraph.rendered_page_breaks[0] + + +# then ===================================================== + + +@then("rendered_page_break.preceding_paragraph_fragment includes the hyperlink") +def then_rendered_page_break_preceding_paragraph_fragment_includes_the_hyperlink( + context: Context, +): + para_frag = context.rendered_page_break.preceding_paragraph_fragment + + actual_value = type(para_frag).__name__ + expected_value = "Paragraph" + assert actual_value == expected_value, f"expected: '{expected_value}', got: '{actual_value}'" + + actual_value = para_frag.text + expected_value = "Page break in>><