diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..2f09165 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,44 @@ +name: ci + +on: [pull_request, push] + +jobs: + build-with-required-deps: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.7', '3.8', '3.9', '3.10'] + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install + run: python setup.py install + - name: Tests + run: | + python -m unittest + + build-with-optional-deps: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.7', '3.8', '3.9', '3.10'] + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install with optional dependencies + run: | + pip install regex + python setup.py install + - name: Tests + run: | + python -m unittest diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..eeae922 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,24 @@ +name: CodeCov +on: [push, pull_request] +jobs: + codecov-build-with-regex: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '2' + + - name: Setup Python + uses: actions/setup-python@master + with: + python-version: '3.x' + - name: Install Dependencies + run: | + pip install regex + python setup.py install + - name: Generate Coverage Report + run: | + pip install coverage + coverage run -m unittest + - name: Upload Coverage to Codecov + uses: codecov/codecov-action@v2 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 396fbf8..0000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -sudo: false -language: python - -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - -install: pip install tox-travis regex coveralls nose -script: tox - diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..876f603 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,8 @@ +************ +Contributors +************ + +* John Gruber +* Stuart Colville +* Pat Pannuto +* Sam Brockie diff --git a/LICENSE.md b/LICENSE.txt similarity index 96% rename from LICENSE.md rename to LICENSE.txt index 766a0a5..f6e7117 100644 --- a/LICENSE.md +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) {{{year}}} {{{fullname}}} +Copyright (c) 2021 Patrick William Pannuto Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 2fc99c3..75ad4d0 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,8 @@ Titlecase ========= -.. image:: https://travis-ci.org/ppannuto/python-titlecase.svg?branch=master - :target: https://travis-ci.org/ppannuto/python-titlecase -.. image:: https://coveralls.io/repos/github/ppannuto/python-titlecase/badge.svg?branch=master - :target: https://coveralls.io/github/ppannuto/python-titlecase?branch=master +.. image:: https://codecov.io/gh/ppannuto/python-titlecase/branch/main/graph/badge.svg?token=J1Li8uhB8q + :target: https://codecov.io/gh/ppannuto/python-titlecase This filter changes a given text to Title Caps, and attempts to be clever about SMALL words like a/an/the in the input. @@ -24,7 +22,7 @@ The filter employs some heuristics to guess abbreviations that don't need conver +------------------+----------------+ More examples and expected behavior for corner cases are available in the -`package test suite `__. +`package test suite `__. This library is a resurrection of `Stuart Colville's titlecase.py `__, @@ -77,7 +75,7 @@ Command Line Usage Titlecase also provides a command line utility ``titlecase``: -.. code-block:: python +:: $ titlecase make me a title Make Me a Title @@ -86,6 +84,17 @@ Titlecase also provides a command line utility ``titlecase``: # Or read/write files: $ titlecase -f infile -o outfile +In addition, commonly used acronyms can be kept in a local file +at `~/.titlecase.txt`. This file contains one acronym per line. +The acronym will be maintained in the title as it is provided. +Once there is e.g. one line saying `TCP`, then it will be automatically +used when used from the command line. + +:: + + $ titlecase I LOVE TCP + I Love TCP + Limitations ----------- @@ -100,4 +109,4 @@ there is basic support for Unicode characters, such that something like not be handled correctly. If anyone has concrete solutions to improve these or other shortcomings of the -libraries, pull requests are very welcome! +library, pull requests are very welcome! diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4334f6d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +# The assumed default build requirements from pip are: "setuptools>=40.8.0", +# "wheel" +# See: https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support +# These are taken from the PyScaffold example +# See: https://github.com/pyscaffold/pyscaffold-demo +requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +# See configuration details in https://github.com/pypa/setuptools_scm +version_scheme = "no-guess-dev" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6b0f706 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,54 @@ +[metadata] +name = titlecase +author = Stuart Colville +maintainer = Pat Pannuto +maintainer_email = pat.pannuto+titlecase@gmail.com +description = Python Port of John Gruber's titlecase.pl +long_description = file: README.rst +long_description_content_type = text/x-rst +url = https://github.com/ppannuto/python-titlecase +project_urls = + PyPI = https://pypi.org/project/titlecase/ + conda-forge = https://anaconda.org/conda-forge/titlecase + Source Code = https://github.com/ppannuto/python-titlecase + Bug Tracker = https://github.com/ppannuto/python-titlecase/issues + +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + Operating System :: OS Independent + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: Implementation :: CPython + License :: OSI Approved :: MIT License + Natural Language :: English + Topic :: Text Processing :: Filters +license = MIT +license_files = [LICENSE.txt] +keyword = + string formatting + +[options] +zip_safe = False +include_package_data = True +packages = find: +python_requires = >=3.7 + +[options.extras_require] +regex = + regex >=2020.4.4 + +[options.entry_points] +console_scripts = + titlecase = titlecase.__init__:cmd + +[bdist_wheel] +universal = 1 + +[devpi:upload] +no_vcs = 1 +formats = bdist_wheel diff --git a/setup.py b/setup.py index a54b682..78d7646 100644 --- a/setup.py +++ b/setup.py @@ -1,50 +1,21 @@ -import os -import sys +"""Setup file for Titlecase. -from setuptools import setup, find_packages +This is based on the example from PyScaffold (https://pyscaffold.org/). +`setup.cfg` is used to configure the project. -def readme(): - with open('README.rst') as f: - return f.read() +""" -from titlecase import __version__ - -setup(name='titlecase', - version=__version__, - description="Python Port of John Gruber's titlecase.pl", - long_description=readme(), - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: Implementation :: CPython", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Topic :: Text Processing :: Filters", - ], - keywords='string formatting', - author="Pat Pannuto, Stuart Colville, John Gruber", - author_email="pat.pannuto+titlecase@gmail.com", - url="https://github.com/ppannuto/python-titlecase", - license="MIT", - packages=find_packages(), - include_package_data=True, - zip_safe=False, - tests_require=['nose', 'regex'], - setup_requires=['nose>=1.0', 'regex>=2020.4.4'], - test_suite="titlecase.tests", - entry_points = { - 'console_scripts': [ - 'titlecase = titlecase.__init__:cmd', - ], - }, -) +from setuptools import setup +if __name__ == "__main__": + try: + setup(use_scm_version={"version_scheme": "no-guess-dev"}) + except Exception: + msg = ( + "\n\nAn error occurred while building the project, " + "please ensure you have the most updated version of setuptools, " + "setuptools_scm and wheel with:\n" + " pip install -U setuptools setuptools_scm wheel\n\n" + ) + print(msg) + raise diff --git a/titlecase/__init__.py b/titlecase/__init__.py index 4369127..fd24431 100755 --- a/titlecase/__init__.py +++ b/titlecase/__init__.py @@ -7,42 +7,55 @@ License: http://www.opensource.org/licenses/mit-license.php """ -from __future__ import unicode_literals - import argparse +import logging +logger = logging.getLogger(__name__) +import os +import string import sys -import regex +try: + import regex +except ImportError: + import re as regex + REGEX_AVAILABLE = False +else: + REGEX_AVAILABLE = True __all__ = ['titlecase'] -__version__ = '0.12.0' +__version__ = '2.4.1' SMALL = r'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?' PUNCT = r"""!"“#$%&'‘()*+,\-–‒—―./:;?@[\\\]_`{|}~""" SMALL_WORDS = regex.compile(r'^(%s)$' % SMALL, regex.I) -INLINE_PERIOD = regex.compile(r'[\p{Letter}][.][\p{Letter}]', regex.I) -UC_ELSEWHERE = regex.compile(r'[%s]*?[\p{Letter}]+[\p{Uppercase_Letter}]+?' % PUNCT) -CAPFIRST = regex.compile(r"^[%s]*?([\p{Letter}])" % PUNCT) + SMALL_FIRST = regex.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), regex.I) SMALL_LAST = regex.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), regex.I) SUBPHRASE = regex.compile(r'([:.;?!\-–‒—―][ ])(%s)' % SMALL) -APOS_SECOND = regex.compile(r"^[dol]{1}['‘]{1}[\p{Letter}]+(?:['s]{2})?$", regex.I) -UC_INITIALS = regex.compile(r"^(?:[\p{Uppercase_Letter}]{1}\.{1}|[\p{Uppercase_Letter}]{1}\.{1}[\p{Uppercase_Letter}]{1})+$") MAC_MC = regex.compile(r"^([Mm]c|MC)(\w.+)") +MR_MRS_MS_DR = regex.compile(r"^((m((rs?)|s))|Dr)$", regex.I) + +if REGEX_AVAILABLE: + INLINE_PERIOD = regex.compile(r'[\p{Letter}][.][\p{Letter}]', regex.I) + UC_ELSEWHERE = regex.compile(r'[%s]*?[\p{Letter}]+[\p{Uppercase_Letter}]+?' % PUNCT) + CAPFIRST = regex.compile(r"^[%s]*?([\p{Letter}])" % PUNCT) + APOS_SECOND = regex.compile(r"^[dol]{1}['‘]{1}[\p{Letter}]+(?:['s]{2})?$", regex.I) + UC_INITIALS = regex.compile(r"^(?:[\p{Uppercase_Letter}]{1}\.{1}|[\p{Uppercase_Letter}]{1}\.{1}[\p{Uppercase_Letter}]{1})+$") +else: + INLINE_PERIOD = regex.compile(r'[\w][.][\w]', regex.I) + UC_ELSEWHERE = regex.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT) + CAPFIRST = regex.compile(r"^[%s]*?([\w])" % PUNCT) + APOS_SECOND = regex.compile(r"^[dol]['‘][\w]+(?:['s]{2})?$", regex.I) + UC_INITIALS = regex.compile(r"^(?:[A-Z]\.|[A-Z]\.[A-Z])+$") class Immutable(object): pass - -text_type = unicode if sys.version_info < (3,) else str - - -class ImmutableString(text_type, Immutable): +class ImmutableString(str, Immutable): pass - class ImmutableBytes(bytes, Immutable): pass @@ -64,9 +77,18 @@ def set_small_word_list(small=SMALL): SUBPHRASE = regex.compile(r'([:.;?!][ ])(%s)' % small) -def titlecase(text, callback=None, small_first_last=True): +def titlecase(text, callback=None, small_first_last=True, preserve_blank_lines=False, normalise_space_characters=False): """ - Titlecases input text + :param text: Titlecases input text + :param callback: Callback function that returns the titlecase version of a specific word + :param small_first_last: Capitalize small words (e.g. 'A') at the beginning; disabled when recursing + :param preserve_blank_lines: Preserve blank lines in the output + :param normalise_space_characters: Convert all original spaces to normal space characters + :type text: str + :type callback: function + :type small_first_last: bool + :type preserve_blank_lines: bool + :type normalise_space_characters: bool This filter changes all words to Title Caps, and attempts to be clever about *un*capitalizing SMALL words like a/an/the in the input. @@ -75,12 +97,16 @@ def titlecase(text, callback=None, small_first_last=True): the New York Times Manual of Style, plus 'vs' and 'v'. """ - - lines = regex.split('[\r\n]+', text) + if preserve_blank_lines: + lines = regex.split('[\r\n]', text) + else: + lines = regex.split('[\r\n]+', text) processed = [] for line in lines: all_caps = line.upper() == line - words = regex.split('[\t ]', line) + split_line = regex.split(r'(\s)', line) + words = split_line[::2] + spaces = split_line[1::2] tc_line = [] for word in words: if callback: @@ -107,7 +133,13 @@ def titlecase(text, callback=None, small_first_last=True): match = MAC_MC.match(word) if match: tc_line.append("%s%s" % (match.group(1).capitalize(), - titlecase(match.group(2),callback,small_first_last))) + titlecase(match.group(2), callback, True))) + continue + + match = MR_MRS_MS_DR.match(word) + if match: + word = word[0].upper() + word[1:] + tc_line.append(word) continue if INLINE_PERIOD.search(word) or (not all_caps and UC_ELSEWHERE.match(word)): @@ -127,7 +159,7 @@ def titlecase(text, callback=None, small_first_last=True): if '-' in word: hyphenated = map( - lambda t: titlecase(t,callback,small_first_last), + lambda t: titlecase(t, callback, False), word.split('-') ) tc_line.append("-".join(hyphenated)) @@ -136,6 +168,17 @@ def titlecase(text, callback=None, small_first_last=True): if all_caps: word = word.lower() + + # A term with all consonants should be considered an acronym. But if it's + # too short (like "St", don't apply this) + CONSONANTS = ''.join(set(string.ascii_lowercase) + - {'a', 'e', 'i', 'o', 'u', 'y'}) + is_all_consonants = regex.search(r'\A[' + CONSONANTS + r']+\Z', word, + flags=regex.IGNORECASE) + if is_all_consonants and len(word) > 2: + tc_line.append(word.upper()) + continue + # Just a normal word that needs to be capitalized tc_line.append(CAPFIRST.sub(lambda m: m.group(0).upper(), word)) @@ -151,7 +194,13 @@ def titlecase(text, callback=None, small_first_last=True): lambda m: m.group(0).capitalize(), tc_line[-1] ) - result = " ".join(tc_line) + if normalise_space_characters: + result = " ".join(tc_line) + else: + line_to_be_joined = tc_line + spaces + line_to_be_joined[::2] = tc_line + line_to_be_joined[1::2] = spaces + result = "".join(line_to_be_joined) result = SUBPHRASE.sub(lambda m: '%s%s' % ( m.group(1), @@ -160,7 +209,33 @@ def titlecase(text, callback=None, small_first_last=True): processed.append(result) - return "\n".join(processed) + result = "\n".join(processed) + logger.debug(result) + return result + + +def create_wordlist_filter_from_file(file_path): + ''' + Load a list of abbreviations from the file with the provided path, + reading one abbreviation from each line, and return a callback to + be passed to the `titlecase` function for preserving their given + canonical capitalization during title-casing. + ''' + if file_path is None: + logger.debug('No abbreviations file path given') + return lambda word, **kwargs: None + file_path_str = str(file_path) + if not os.path.isfile(file_path_str): + logger.debug('No abbreviations file found at ' + file_path_str) + return lambda word, **kwargs: None + with open(file_path_str) as f: + logger.debug('Reading abbreviations from file ' + file_path_str) + abbrevs_gen = (line.strip() for line in f.read().splitlines() if line) + abbrevs = {abbr.upper(): abbr for abbr in abbrevs_gen} + if logger.isEnabledFor(logging.DEBUG): + for abbr in abbrevs.values(): + logger.debug('Registered abbreviation: ' + abbr) + return lambda word, **kwargs: abbrevs.get(word.upper()) def cmd(): @@ -170,7 +245,7 @@ def cmd(): # Consume '-f' and '-o' as input/output, allow '-' for stdin/stdout # and treat any subsequent arguments as a space separated string to # be titlecased (so it still works if people forget quotes) - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(allow_abbrev=False) in_group = parser.add_mutually_exclusive_group() in_group.add_argument('string', nargs='*', default=[], help='String to titlecase') @@ -178,6 +253,10 @@ def cmd(): help='File to read from to titlecase') parser.add_argument('-o', '--output-file', help='File to write titlecased output to') + parser.add_argument('-w', '--wordlist', + help='Wordlist for acronyms') + parser.add_argument('--preserve-blank-lines', action='store_true', + help='Do not skip blank lines in input') args = parser.parse_args() @@ -203,5 +282,12 @@ def cmd(): with ifile: in_string = ifile.read() + if args.wordlist is not None: + wordlist_file = args.wordlist + else: + wordlist_file = os.path.join(os.path.expanduser('~'), '.titlecase.txt') + wordlist_filter = create_wordlist_filter_from_file(wordlist_file) + with ofile: - ofile.write(titlecase(in_string)) + ofile.write(titlecase(in_string, callback=wordlist_filter, + preserve_blank_lines=args.preserve_blank_lines)) diff --git a/titlecase/tests.py b/titlecase/tests.py index c35d353..9265ec0 100644 --- a/titlecase/tests.py +++ b/titlecase/tests.py @@ -3,14 +3,16 @@ """Tests for titlecase""" -from __future__ import print_function, unicode_literals - import os import sys +import tempfile +import unittest + sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../')) +from titlecase import titlecase, set_small_word_list, create_wordlist_filter_from_file -from titlecase import titlecase, set_small_word_list +# (executed by `test_specific_string` below) TEST_DATA = ( ( "", @@ -28,6 +30,10 @@ "dance with me/let’s face the music and dance", "Dance With Me/Let’s Face the Music and Dance" ), + ( + "a-b end-to-end two-not-three/three-by-four/five-and", + "A-B End-to-End Two-Not-Three/Three-by-Four/Five-And" + ), ( "34th 3rd 2nd", "34th 3rd 2nd" @@ -44,6 +50,10 @@ "Apple deal with AT&T falls through", "Apple Deal With AT&T Falls Through" ), + ( + "Words with all consonants like cnn are acronyms", + "Words With All Consonants Like CNN Are Acronyms" + ), ( "this v that", "This v That" @@ -240,6 +250,14 @@ "o'melveny/o'doyle o'Melveny/o'doyle O'melveny/o'doyle o'melveny/o'Doyle o'melveny/O'doyle", "O'Melveny/O'Doyle O'Melveny/O'Doyle O'Melveny/O'Doyle O'Melveny/O'Doyle O'Melveny/O'Doyle", ), + # These 'Mc' cases aim to ensure more consistent/predictable behavior. + # The examples here are somewhat contrived, and are subject to change + # if there is a compelling argument for updating their behavior. + # See https://github.com/ppannuto/python-titlecase/issues/64 + ( + "mccay-mcbut-mcdo mcdonalds/mcby", + "McCay-McBut-McDo McDonalds/McBy" + ), ( "oblon, spivak, mcclelland, maier & neustadt", "Oblon, Spivak, McClelland, Maier & Neustadt", @@ -284,67 +302,147 @@ "ýæ ñø", "Ýæ Ñø", ), + # https://github.com/ppannuto/python-titlecase/pull/67 + ( + "Mr mr Mrs Ms Mss Dr dr , Mr. and Mrs. Person", + "Mr Mr Mrs Ms MSS Dr Dr , Mr. And Mrs. Person", + ), + ( + "a mix of\tdifferent\u200aspace\u2006characters", + "A Mix of\tDifferent\u200aSpace\u2006Characters", + ), ) -def test_initials_regex(): - """Test - uppercase initials regex with A.B""" - from titlecase import UC_INITIALS - assert bool(UC_INITIALS.match('A.B')) is True +class TestStringSuite(unittest.TestCase): + """Generated tests from strings""" + def test_specific_string(self): + for data in TEST_DATA: + with self.subTest(): + self.assertEqual(titlecase(data[0]), data[1]) -def test_initials_regex_2(): - """Test - uppercase initials regex with A.B.""" - from titlecase import UC_INITIALS - assert bool(UC_INITIALS.match('A.B.')) is True +class TestInitialsRegex(unittest.TestCase): + def test_initials_regex(self): + """Test - uppercase initials regex with A.B""" + from titlecase import UC_INITIALS + #assert bool(UC_INITIALS.match('A.B')) is True + self.assertRegex('A.B', UC_INITIALS) -def test_initials_regex_3(): - """Test - uppercase initials regex with ABCD""" - from titlecase import UC_INITIALS - assert bool(UC_INITIALS.match('ABCD')) is False + def test_initials_regex_2(self): + """Test - uppercase initials regex with A.B.""" + from titlecase import UC_INITIALS + #assert bool(UC_INITIALS.match('A.B.')) is True + self.assertRegex('A.B.', UC_INITIALS) + def test_initials_regex_3(self): + """Test - uppercase initials regex with ABCD""" + from titlecase import UC_INITIALS + #assert bool(UC_INITIALS.match('ABCD')) is False + self.assertNotRegex('ABCD', UC_INITIALS) -def check_input_matches_expected_output(in_, out): - """Function yielded by test generator""" - try: - assert titlecase(in_) == out - except AssertionError: - print("{0} != {1}".format(titlecase(in_), out)) - raise - -def test_at_and_t(): +class TestSymbols(unittest.TestCase): + @staticmethod def at_n_t(word, **kwargs): if word.upper() == "AT&T": return word.upper() - print(titlecase("at&t", callback=at_n_t)) - assert titlecase("at&t", callback=at_n_t) == "AT&T" - -def test_input_output(): - """Generated tests""" - for data in TEST_DATA: - yield check_input_matches_expected_output, data[0], data[1] + def test_at_n_t(self): + self.assertEqual(titlecase("at&t", callback=TestSymbols.at_n_t), "AT&T") -def test_callback(): +class TestCallback(unittest.TestCase): + @staticmethod def abbreviation(word, **kwargs): if word.upper() in ('TCP', 'UDP'): return word.upper() - s = 'a simple tcp and udp wrapper' - assert titlecase(s) == 'A Simple Tcp and Udp Wrapper' - assert titlecase(s, callback=abbreviation) == 'A Simple TCP and UDP Wrapper' - assert titlecase(s.upper(), callback=abbreviation) == 'A Simple TCP and UDP Wrapper' - assert titlecase(u'crème brûlée', callback=lambda x, **kw: x.upper()) == u'CRÈME BRÛLÉE' + def test_callback(self): + s = 'a simple tcp and udp wrapper' + # Note: this library is able to guess that all-consonant words are acronyms, so TCP + # works naturally, but others will require the custom list + self.assertEqual(titlecase(s), + 'A Simple TCP and Udp Wrapper') + self.assertEqual(titlecase(s, callback=TestCallback.abbreviation), + 'A Simple TCP and UDP Wrapper') + self.assertEqual(titlecase(s.upper(), callback=TestCallback.abbreviation), + 'A Simple TCP and UDP Wrapper') + self.assertEqual(titlecase(u'crème brûlée', callback=lambda x, **kw: x.upper()), + u'CRÈME BRÛLÉE') + + +# It looks like set_small_word_list uses different regexs that the original +# setup code path :/. It really should be the case that one could call +# titlecase.set_small_word_list() and reset to the original behavior (it +# _really_ should be the case that there aren't all these ugly globals around). +# +# It seems that `nose` ran every test in isolation, or just in a different +# order, so the global state bug wasn't caught before. This should be fixed, +# but one thingg at a time. +@unittest.skip("FIXME: Converting to unittest exposed a bug") +class TestSmallWordList(unittest.TestCase): + def test_set_small_word_list(self): + self.assertEqual(titlecase('playing the game "words with friends"'), + 'Playing the Game "Words With Friends"') + set_small_word_list('a|an|the|with') + self.assertEqual(titlecase('playing the game "words with friends"'), + 'Playing the Game "Words with Friends"') + + +class TestCustomAbbreviations(unittest.TestCase): + def setUp(self): + # Do not delete on close, instead do manually for Windows (see #86). + self.f = tempfile.NamedTemporaryFile(mode='w', delete=False) + self.f.write('UDP\nPPPoE\n') + self.f.flush() + + def tearDown(self): + self.f.close() # manually close + os.unlink(self.f.name) # manually delete + + def test_technical_acronyms(self): + # This works without a wordlist, because it begins mixed case + self.assertEqual(titlecase('sending UDP packets over PPPoE works great'), + 'Sending UDP Packets Over PPPoE Works Great') + # Without a wordlist, this will do the "wrong" thing for the context + self.assertEqual(titlecase('SENDING UDP PACKETS OVER PPPOE WORKS GREAT'), + 'Sending Udp Packets Over Pppoe Works Great') + # A wordlist can provide custom acronyms + self.assertEqual(titlecase( + 'sending UDP packets over PPPoE works great', + callback=create_wordlist_filter_from_file(self.f.name)), + 'Sending UDP Packets Over PPPoE Works Great') + + +class TestBlankLines(unittest.TestCase): + # Really, it's a bit odd that the default behavior is to delete blank lines, + # but that's what it was from day one, so we're kind of stuck with that. + # This ensures folks can opt-out of that behavior if they want. + + def test_one_blank(self): + s = 'Line number one\n\nand Line three\n' + self.assertEqual(titlecase(s), 'Line Number One\nAnd Line Three\n') + self.assertEqual(titlecase(s, preserve_blank_lines=True), 'Line Number One\n\nAnd Line Three\n') + + def test_complex_blanks(self): + s = '\n\nLeading blank\n\n\nMulti-blank\n\n\n\n\nTrailing Blank\n\n' + self.assertEqual(titlecase(s), + '\nLeading Blank\nMulti-Blank\nTrailing Blank\n') + self.assertEqual(titlecase(s, preserve_blank_lines=True), + '\n\nLeading Blank\n\n\nMulti-Blank\n\n\n\n\nTrailing Blank\n\n') -def test_set_small_word_list(): - assert titlecase('playing the game "words with friends"') == 'Playing the Game "Words With Friends"' - set_small_word_list('a|an|the|with') - assert titlecase('playing the game "words with friends"') == 'Playing the Game "Words with Friends"' +class TestNormaliseSpaceCharacters(unittest.TestCase): + def test_tabs(self): + s = 'text\twith\ttabs' + self.assertEqual(titlecase(s), 'Text\tWith\tTabs') + self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Tabs') + def test_nbsps(self): + s = 'text with nonbreaking spaces' + self.assertEqual(titlecase(s), 'Text With Nonbreaking Spaces') + self.assertEqual(titlecase(s, normalise_space_characters=True), 'Text With Nonbreaking Spaces') -if __name__ == "__main__": - import nose - nose.main() +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 183a05a..335d207 100644 --- a/tox.ini +++ b/tox.ini @@ -4,16 +4,20 @@ # and then run "tox" from this directory. [tox] -envlist = py26, py27, py33, py34, py35 -# Doesn't seem to work on jython currently; some unicode issue -# pypy breaks on Travis, something from a pulled dep: https://travis-ci.org/ppannuto/python-titlecase/jobs/308106681 +envlist = py36, py37, py38, py39, py310 -[testenv] -passenv = TRAVIS TRAVIS_JOB_ID TRAVIS_BRANCH +[base] deps = - nose - regex - coveralls + coveralls >=1.1 commands = - coverage run --source=titlecase setup.py nosetests + coverage run -m unittest coveralls + +[testenv:re] +deps = + {[base]deps} + +[testenv:regex] +deps = + regex >=2020.4.4 + {[base]deps}