diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..b33811f1
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E501, W503 
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..80224f9e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+tests/samples/* linguist-vendored
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
new file mode 100644
index 00000000..23f16106
--- /dev/null
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,40 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 00000000..bdaab28a
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 16a2c86e..b532e65e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
 *.pyc
+__pycache__
 *.egg-info
-build
-dist
+/build
+/dist
 /bin
 /include
 /lib
@@ -12,3 +13,6 @@ nosetests.xml
 .tox
 .idea
 .cache
+/.noseids
+/.venv
+/poetry.lock
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index 21e1ce11..ea56f519 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,47 +4,16 @@ cache: pip
 
 matrix:
   include:
-    - name: "Python 2.7 on Linux"
-      python: 2.7
-      env: PIP=pip
-    - name: "Python 3.5 on Linux"
-      python: 3.5
-    - name: "Python 3.6 on Linux"
-      python: 3.6
-    - name: "Python 3.7 on Linux"
-      python: 3.7
     - name: "Python 3.8 on Linux"
       dist: xenial
       python: 3.8
     - name: "Python 3.9 Nightly on Linux"
       dist: bionic
       python: nightly
-    - name: "Pypy on Linux"
-      python: pypy
-      env: PIP=pip
     - name: "Pypy 3 on Linux"
       python: pypy3
-    - name: "Python 3.7 on older macOS"
-      os: osx
-      osx_image: xcode9.4
-      language: shell
-      env: TOXENV=py37
-      before_install:
-        - sw_vers
-        - python3 --version
-        - pip3 --version
-    - name: "Python 3.7 on macOS"
-      os: osx
-      osx_image: xcode11
-      language: shell
-      env: TOXENV=py37
-      before_install:
-        - sw_vers
-        - python3 --version
-        - pip3 --version
   allow_failures:
     - python: nightly
-    - python: pypy
     - python: pypy3
     - os: osx
 
diff --git a/Makefile b/Makefile
index 81a14523..9caf08a5 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ PY := .venv/bin/python
 PIP := .venv/bin/pip
 PEP8 := .venv/bin/pep8
 NOSE := .venv/bin/nosetests
+TWINE := .venv/bin/twine
 
 # ###########
 # Tests rule!
@@ -12,22 +13,24 @@ NOSE := .venv/bin/nosetests
 test: venv develop $(NOSE)
 	$(NOSE) --with-id -s tests
 
-$(NOSE):
-	$(PIP) install nose pep8 coverage
+$(NOSE): setup
 
 # #######
 # INSTALL
 # #######
 .PHONY: all
-all: venv develop
+all: setup develop
 
 venv: .venv/bin/python
 
+setup: venv
+	$(PIP) install -r requirements-dev.txt | grep -v "already satisfied" || true
+
 .venv/bin/python:
-	virtualenv .venv
+	test -d .venv || which python3 && python3 -m venv .venv || virtualenv .venv
 
-.PHONY: clean_venv
-clean_venv:
+.PHONY: clean
+clean:
 	rm -rf .venv
 
 develop: .venv/lib/python*/site-packages/readability-lxml.egg-link
@@ -42,18 +45,29 @@ develop: .venv/lib/python*/site-packages/readability-lxml.egg-link
 .PHONY: clean_all
 clean_all: clean_venv
 
+.PHONY: build
+build:
+	poetry build
 
 # ###########
 # Deploy
 # ###########
 .PHONY: dist
 dist:
-	$(PY) setup.py sdist
+	$(PY) -m pip install wheel
+	$(PY) setup.py sdist bdist_wheel
+	$(TWINE) check dist/*
 
 .PHONY: upload
 upload:
-	$(PY) setup.py sdist upload
+	$(TWINE) upload dist/*
 
-.PHONY: version_update
-version_update:
-	$(EDITOR) setup.py
+.PHONY: bump
+bump:
+	$(EDITOR) readability/__init__.py
+	$(eval VERSION := $(shell grep "__version__" readability/__init__.py | cut -d'"' -f2))
+	# fix first occurrence of version in pyproject.toml
+	sed -i '0,/version = ".*"/s//version = "$(VERSION)"/' pyproject.toml
+	git commit -m "Bump version to $(VERSION)" pyproject.toml readability/__init__.py
+	git tag $(VERSION)
+	git push --tags
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..e09a515a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+[![PyPI version](https://img.shields.io/pypi/v/readability-lxml.svg)](https://pypi.python.org/pypi/readability-lxml)
+
+# python-readability
+
+Given an HTML document, extract and clean up the main body text and title.
+
+This is a Python port of a Ruby port of [arc90's Readability project](https://web.archive.org/web/20130519040221/http://www.readability.com/).
+
+## Installation
+
+It's easy using `pip`, just run:
+
+```bash
+$ pip install readability-lxml
+```
+
+As an alternative, you may also use conda to install, just run:
+
+```bash
+$ conda install -c conda-forge readability-lxml
+```
+
+## Usage
+
+```python
+>>> import requests
+>>> from readability import Document
+
+>>> response = requests.get('http://example.com')
+>>> doc = Document(response.content)
+>>> doc.title()
+'Example Domain'
+
+>>> doc.summary()
+"""<html><body><div><body id="readabilityBody">\n<div>\n    <h1>Example Domain</h1>\n
+<p>This domain is established to be used for illustrative examples in documents. You may
+use this\n    domain in examples without prior coordination or asking for permission.</p>
+\n    <p><a href="http://www.iana.org/domains/example">More information...</a></p>\n</div>
+\n</body>\n</div></body></html>"""
+```
+
+## Change Log
+- 0.8.4 Better CJK support, thanks @cdhigh
+- 0.8.3.1 Support for python 3.8 - 3.13
+- 0.8.3 We can now save all images via keep_all_images=True (default is to save 1 main image), thanks @botlabsDev
+- 0.8.2 Added article author(s) (thanks @mattblaha)
+- 0.8.1 Fixed processing of non-ascii HTMLs via regexps.
+- 0.8 Replaced XHTML output with HTML5 output in summary() call.
+- 0.7.1 Support for Python 3.7 . Fixed a slowdown when processing documents with lots of spaces.
+- 0.7 Improved HTML5 tags handling. Fixed stripping unwanted HTML nodes (only first matching node was removed before).
+- 0.6 Finally a release which supports Python versions 2.6, 2.7, 3.3 - 3.6
+- 0.5 Preparing a release to support Python versions 2.6, 2.7, 3.3 and 3.4
+- 0.4 Added Videos loading and allowed more images per paragraph
+- 0.3 Added Document.encoding, positive\_keywords and negative\_keywords
+
+## Licensing
+
+This code is under [the Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0) license.
+
+## Thanks to
+
+- Latest [readability.js](https://github.com/MHordecki/readability-redux/blob/master/readability/readability.js)
+- Ruby port by starrhorne and iterationlabs
+- [Python port](https://github.com/gfxmonk/python-readability) by gfxmonk
+- [Decruft effort](https://web.archive.org/web/20110214150709/https://www.minvolai.com/blog/decruft-arc90s-readability-in-python/) to move to lxml
+- "BR to P" fix from readability.js which improves quality for smaller texts
+- Github users contributions.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index d4150aca..00000000
--- a/README.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-.. image:: https://travis-ci.org/buriy/python-readability.svg?branch=master
-    :target: https://travis-ci.org/buriy/python-readability
-
-
-python-readability
-==================
-
-Given a html document, it pulls out the main body text and cleans it up.
-
-This is a python port of a ruby port of `arc90's readability
-project <http://lab.arc90.com/experiments/readability/>`__.
-
-Installation
-------------
-
-It's easy using ``pip``, just run:
-
-.. code-block:: bash
-
-    $ pip install readability-lxml
-
-Usage
------
-
-.. code-block:: python
-
-    >>> import requests
-    >>> from readability import Document
-
-    >>> response = requests.get('http://example.com')
-    >>> doc = Document(response.text)
-    >>> doc.title()
-    'Example Domain'
-
-    >>> doc.summary()
-    """<html><body><div><body id="readabilityBody">\n<div>\n    <h1>Example Domain</h1>\n
-    <p>This domain is established to be used for illustrative examples in documents. You may
-    use this\n    domain in examples without prior coordination or asking for permission.</p>
-    \n    <p><a href="http://www.iana.org/domains/example">More information...</a></p>\n</div>
-    \n</body>\n</div></body></html>"""
-
-Change Log
-----------
-
--  0.8.1 Fixed processing of non-ascii HTMLs via regexps.
--  0.8 Replaced XHTML output with HTML5 output in summary() call.
--  0.7.1 Support for Python 3.7 . Fixed a slowdown when processing documents with lots of spaces.
--  0.7 Improved HTML5 tags handling. Fixed stripping unwanted HTML nodes (only first matching node was removed before).
--  0.6 Finally a release which supports Python versions 2.6, 2.7, 3.3 - 3.6
--  0.5 Preparing a release to support Python versions 2.6, 2.7, 3.3 and 3.4
--  0.4 Added Videos loading and allowed more images per paragraph
--  0.3 Added Document.encoding, positive\_keywords and negative\_keywords
-
-Licensing
---------
-
-This code is under `the Apache License
-2.0 <http://www.apache.org/licenses/LICENSE-2.0>`__ license.
-
-Thanks to
----------
-
--  Latest `readability.js <https://github.com/MHordecki/readability-redux/blob/master/readability/readability.js>`__
--  Ruby port by starrhorne and iterationlabs
--  `Python port <https://github.com/gfxmonk/python-readability>`__ by gfxmonk
--  `Decruft effort <http://www.minvolai.com/blog/decruft-arc90s-readability-in-python/>` to move to lxml
--  "BR to P" fix from readability.js which improves quality for smaller texts
--  Github users contributions.
diff --git a/doc/source/conf.py b/doc/source/conf.py
index bb261349..e70cf9b3 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # readability documentation build configuration file, created by
 # sphinx-quickstart on Thu Mar 23 16:29:38 2017.
@@ -38,7 +37,7 @@
     "sphinx.ext.doctest",
     "sphinx.ext.intersphinx",
     "sphinx.ext.todo",
-    "recommonmark",
+    "myst_parser",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -72,7 +71,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..44992853
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "readability-lxml"
+version = "0.8.4.1"
+description = "fast html to text parser (article readability tool) with python 3 support"
+authors = ["Yuri Baburov <burchik@gmail.com>"]
+license = "Apache License 2.0"
+readme = "README.md"
+packages = [
+    { include = "readability" },
+]
+
+[tool.poetry.dependencies]
+python = ">=3.8.2,<3.14"
+chardet = "^5.2.0"
+cssselect = [
+    { version = "~1.2", markers = "python_version < '3.9'" },
+    { version = "~1.3", markers = "python_version >= '3.9'" }
+]
+lxml = {extras = ["html-clean"], version = "^5.4.0"}
+lxml-html-clean = {markers = "python_version < \"3.11\"", version = "^0.4.2"}
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/readability/__init__.py b/readability/__init__.py
index 32e28e3b..b36f021d 100644
--- a/readability/__init__.py
+++ b/readability/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "0.8.1"
+__version__ = "0.8.4.1"
 
 from .readability import Document
diff --git a/readability/cleaners.py b/readability/cleaners.py
index 69825c6b..e0b07260 100644
--- a/readability/cleaners.py
+++ b/readability/cleaners.py
@@ -1,6 +1,9 @@
 # strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
 import re
-from lxml.html.clean import Cleaner
+try:
+    from lxml.html.clean import Cleaner
+except ImportError:
+    from lxml_html_clean import Cleaner
 
 bad_attrs = ["width", "height", "style", "[-a-z]*color", "background[-a-z]*", "on*"]
 single_quoted = "'[^']+'"
diff --git a/readability/compat/__init__.py b/readability/compat/__init__.py
deleted file mode 100644
index c648633a..00000000
--- a/readability/compat/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-This module contains compatibility helpers for Python 2/3 interoperability.
-
-It mainly exists because their are certain incompatibilities in the Python
-syntax that can only be solved by conditionally importing different functions.
-"""
-import sys
-from lxml.etree import tostring
-
-if sys.version_info[0] == 2:
-    bytes_ = str
-    str_ = unicode
-    def tostring_(s):
-        return tostring(s, encoding='utf-8').decode('utf-8')
-
-elif sys.version_info[0] == 3:
-    bytes_ = bytes
-    str_ = str
-    def tostring_(s):
-        return tostring(s, encoding='utf-8')
diff --git a/readability/compat/three.py b/readability/compat/three.py
deleted file mode 100644
index 26351575..00000000
--- a/readability/compat/three.py
+++ /dev/null
@@ -1,6 +0,0 @@
-def raise_with_traceback(exc_type, traceback, *args, **kwargs):
-    """
-    Raise a new exception of type `exc_type` with an existing `traceback`. All
-    additional (keyword-)arguments are forwarded to `exc_type`
-    """
-    raise exc_type(*args, **kwargs).with_traceback(traceback)
diff --git a/readability/compat/two.py b/readability/compat/two.py
deleted file mode 100644
index 642ecb75..00000000
--- a/readability/compat/two.py
+++ /dev/null
@@ -1,6 +0,0 @@
-def raise_with_traceback(exc_type, traceback, *args, **kwargs):
-    """
-    Raise a new exception of type `exc_type` with an existing `traceback`. All
-    additional (keyword-)arguments are forwarded to `exc_type`
-    """
-    raise exc_type(*args, **kwargs), None, traceback
diff --git a/readability/encoding.py b/readability/encoding.py
index e915866a..08332df0 100644
--- a/readability/encoding.py
+++ b/readability/encoding.py
@@ -1,11 +1,13 @@
 import re
-import chardet
-import sys
+try:
+    import cchardet as chardet
+except ImportError:
+    import chardet
 
 
-RE_CHARSET = re.compile(br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
-RE_PRAGMA = re.compile(br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
-RE_XML = re.compile(br'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
+RE_CHARSET = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
+RE_PRAGMA = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
+RE_XML = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
 
 CHARSETS = {
     "big5": "big5hkscs",
@@ -35,25 +37,24 @@ def get_encoding(page):
     # Try any declared encodings
     for declared_encoding in declared_encodings:
         try:
-            if sys.version_info[0] == 3:
-                # declared_encoding will actually be bytes but .decode() only
-                # accepts `str` type. Decode blindly with ascii because no one should
-                # ever use non-ascii characters in the name of an encoding.
-                declared_encoding = declared_encoding.decode("ascii", "replace")
+            # Python3 only
+            # declared_encoding will actually be bytes but .decode() only
+            # accepts `str` type. Decode blindly with ascii because no one should
+            # ever use non-ascii characters in the name of an encoding.
+            declared_encoding = declared_encoding.decode("ascii", "replace")
 
             encoding = fix_charset(declared_encoding)
-
             # Now let's decode the page
             page.decode(encoding)
             # It worked!
             return encoding
-        except (UnicodeDecodeError, LookupError):
+        except UnicodeDecodeError:
             pass
 
     # Fallback to chardet if declared encodings fail
     # Remove all HTML tags, and leave only text for chardet
-    text = re.sub(br"(\s*</?[^>]*>)+\s*", b" ", page).strip()
-    enc = "utf-8"
+    text = re.sub(r'(\s*</?[^>]*>)+\s*', ' ', page).strip()
+    enc = 'utf-8'
     if len(text) < 10:
         return enc  # can't guess
     res = chardet.detect(text)
diff --git a/readability/htmls.py b/readability/htmls.py
index 17a75c7d..d99a9f53 100644
--- a/readability/htmls.py
+++ b/readability/htmls.py
@@ -4,13 +4,12 @@
 
 from .cleaners import normalize_spaces, clean_attributes
 from .encoding import get_encoding
-from .compat import str_
 
 utf8_parser = lxml.html.HTMLParser(encoding="utf-8")
 
 
 def build_doc(page):
-    if isinstance(page, str_):
+    if isinstance(page, str):
         encoding = None
         decoded_page = page
     else:
@@ -30,14 +29,14 @@ def js_re(src, pattern, flags, repl):
 
 def normalize_entities(cur_title):
     entities = {
-        u"\u2014": "-",
-        u"\u2013": "-",
-        u"&mdash;": "-",
-        u"&ndash;": "-",
-        u"\u00A0": " ",
-        u"\u00AB": '"',
-        u"\u00BB": '"',
-        u"&quot;": '"',
+        "\u2014": "-",
+        "\u2013": "-",
+        "&mdash;": "-",
+        "&ndash;": "-",
+        "\u00A0": " ",
+        "\u00AB": '"',
+        "\u00BB": '"',
+        "&quot;": '"',
     }
     for c, r in entities.items():
         if c in cur_title:
@@ -58,6 +57,15 @@ def get_title(doc):
     return norm_title(title.text)
 
 
+def get_author(doc):
+    author = doc.find(".//meta[@name='author']")
+    if author is None or 'content' not in author.keys() or \
+       len(author.get('content')) == 0:
+        return "[no-author]"
+
+    return author.get('content')
+
+
 def add_match(collection, text, orig):
     text = norm_title(text)
     if len(text.split()) >= 2 and len(text) >= 15:
@@ -102,27 +110,34 @@ def shorten_title(doc):
             if e.text_content():
                 add_match(candidates, e.text_content(), orig)
 
+    cjk = re.compile('[\u4e00-\u9fff]+')
+
     if candidates:
         title = sorted(candidates, key=len)[-1]
     else:
         for delimiter in [" | ", " - ", " :: ", " / "]:
             if delimiter in title:
                 parts = orig.split(delimiter)
-                if len(parts[0].split()) >= 4:
-                    title = parts[0]
+                p0 = parts[0]
+                pl = parts[-1]
+                if (len(p0.split()) >= 4) or (len(p0) >= 4 and cjk.search(p0)):
+                    title = p0
                     break
-                elif len(parts[-1].split()) >= 4:
-                    title = parts[-1]
+                elif (len(pl.split()) >= 4) or (len(pl) >= 4 and cjk.search(pl)):
+                    title = pl
                     break
         else:
             if ": " in title:
-                parts = orig.split(": ")
-                if len(parts[-1].split()) >= 4:
-                    title = parts[-1]
+                p1 = orig.split(": ")[-1]
+                if (len(p1.split()) >= 4) or (len(p1) >= 4 and cjk.search(p1)):
+                    title = p1
                 else:
                     title = orig.split(": ", 1)[1]
 
-    if not 15 < len(title) < 150:
+    if cjk.search(title):
+        if not (4 <= len(title) < 100):  # Allow length >= 4, cap at 100
+            return orig
+    elif not 15 < len(title) < 150:
         return orig
 
     return title
@@ -134,7 +149,9 @@ def get_body(doc):
         elem.drop_tree()
     # tostring() always return utf-8 encoded string
     # FIXME: isn't better to use tounicode?
-    raw_html = str_(tostring(doc.body or doc))
+    raw_html = tostring(doc.body or doc)
+    if isinstance(raw_html, bytes):
+        raw_html = raw_html.decode()
     cleaned = clean_attributes(raw_html)
     try:
         # BeautifulSoup(cleaned) #FIXME do we really need to try loading it?
diff --git a/readability/readability.py b/readability/readability.py
index 7a7d9b3d..c5739056 100755
--- a/readability/readability.py
+++ b/readability/readability.py
@@ -1,20 +1,25 @@
 #!/usr/bin/env python
-from __future__ import print_function
 import logging
 import re
 import sys
+import urllib.request
+import urllib.parse
+import urllib.error
 
+from lxml.etree import tostring
 from lxml.etree import tounicode
+from lxml.etree import _ElementTree
 from lxml.html import document_fromstring
 from lxml.html import fragment_fromstring
+from lxml.html import HtmlElement
 
 from .cleaners import clean_attributes
 from .cleaners import html_cleaner
 from .htmls import build_doc
 from .htmls import get_body
 from .htmls import get_title
+from .htmls import get_author
 from .htmls import shorten_title
-from .compat import str_, bytes_, tostring_
 from .debug import describe, text_content
 
 
@@ -37,11 +42,11 @@
     "divToPElementsRe": re.compile(
         r"<(a|blockquote|dl|div|img|ol|p|pre|table|ul)", re.I
     ),
-    #'replaceBrsRe': re.compile(r'(<br[^>]*>[ \n\r\t]*){2,}',re.I),
-    #'replaceFontsRe': re.compile(r'<(\/?)font[^>]*>',re.I),
-    #'trimRe': re.compile(r'^\s+|\s+$/'),
-    #'normalizeRe': re.compile(r'\s{2,}/'),
-    #'killBreaksRe': re.compile(r'(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
+    # 'replaceBrsRe': re.compile(r'(<br[^>]*>[ \n\r\t]*){2,}',re.I),
+    # 'replaceFontsRe': re.compile(r'<(\/?)font[^>]*>',re.I),
+    # 'trimRe': re.compile(r'^\s+|\s+$/'),
+    # 'normalizeRe': re.compile(r'\s{2,}/'),
+    # 'killBreaksRe': re.compile(r'(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
     "videoRe": re.compile(r"https?:\/\/(www\.)?(youtube|vimeo)\.com", re.I),
     # skipFootnoteLink:      /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
 }
@@ -77,16 +82,16 @@ def text_length(i):
 def compile_pattern(elements):
     if not elements:
         return None
-    elif isinstance(elements, re._pattern_type):
+    elif isinstance(elements, re.Pattern):
         return elements
-    elif isinstance(elements, (str_, bytes_)):
-        if isinstance(elements, bytes_):
-            elements = str_(elements, "utf-8")
-        elements = elements.split(u",")
+    elif isinstance(elements, (str, bytes)):
+        if isinstance(elements, bytes):
+            elements = str(elements, "utf-8")
+        elements = elements.split(",")
     if isinstance(elements, (list, tuple)):
-        return re.compile(u"|".join([re.escape(x.strip()) for x in elements]), re.U)
+        return re.compile("|".join([re.escape(x.strip()) for x in elements]), re.U)
     else:
-        raise Exception("Unknown type for the pattern: {}".format(type(elements)))
+        raise Exception(f"Unknown type for the pattern: {type(elements)}")
         # assume string or string like object
 
 
@@ -154,7 +159,11 @@ def _html(self, force=False):
         return self.html
 
     def _parse(self, input):
-        doc, self.encoding = build_doc(input)
+        if isinstance(input, (_ElementTree, HtmlElement)):
+            doc = input
+            self.encoding = 'utf-8'
+        else:
+            doc, self.encoding = build_doc(input)
         doc = html_cleaner.clean_html(doc)
         base_href = self.url
         if base_href:
@@ -186,6 +195,10 @@ def title(self):
         """Returns document title"""
         return get_title(self._html(True))
 
+    def author(self):
+        """Returns document author"""
+        return get_author(self._html(True))
+
     def short_title(self):
         """Returns cleaned up document title"""
         return shorten_title(self._html(True))
@@ -197,12 +210,13 @@ def get_clean_html(self):
         """
         return clean_attributes(tounicode(self.html, method="html"))
 
-    def summary(self, html_partial=False):
+    def summary(self, html_partial=False, keep_all_images=False):
         """
         Given a HTML file, extracts the text of the article.
 
         :param html_partial: return only the div of the document, don't wrap
                              in html and body tags.
+        :param keep_all_images: Keep all images in summary.
 
         Warning: It mutates internal DOM representation of the HTML document,
         so it is better to call other API methods before this one.
@@ -231,24 +245,20 @@ def summary(self, html_partial=False):
                         log.info("ruthless removal did not work. ")
                         ruthless = False
                         log.debug(
-                            (
                                 "ended up stripping too much - "
                                 "going for a safer _parse"
-                            )
                         )
                         # try again
                         continue
                     else:
                         log.debug(
-                            (
                                 "Ruthless and lenient parsing did not work. "
                                 "Returning raw html"
-                            )
                         )
                         article = self.html.find("body")
                         if article is None:
                             article = self.html
-                cleaned_article = self.sanitize(article, candidates)
+                cleaned_article = self.sanitize(article, candidates, keep_all_images)
 
                 article_length = len(cleaned_article or "")
                 retry_length = self.retry_length
@@ -261,11 +271,7 @@ def summary(self, html_partial=False):
                     return cleaned_article
         except Exception as e:
             log.exception("error getting summary: ")
-            if sys.version_info[0] == 2:
-                from .compat.two import raise_with_traceback
-            else:
-                from .compat.three import raise_with_traceback
-            raise_with_traceback(Unparseable, sys.exc_info()[2], str_(e))
+            raise Unparseable(str(e)).with_traceback(sys.exc_info()[2])
 
     def get_article(self, candidates, best_candidate, html_partial=False):
         # Now that we have the top candidate, look through its siblings for
@@ -327,7 +333,7 @@ def select_best_candidate(self, candidates):
         )
         for candidate in sorted_candidates[:5]:
             elem = candidate["elem"]
-            log.debug("Top 5 : %6.3f %s" % (candidate["content_score"], describe(elem)))
+            log.debug("Top 5 : {:6.3f} {}".format(candidate["content_score"], describe(elem)))
 
         best_candidate = sorted_candidates[0]
         return best_candidate
@@ -443,7 +449,7 @@ def score_node(self, elem):
 
     def remove_unlikely_candidates(self):
         for elem in self.html.findall(".//*"):
-            s = "%s %s" % (elem.get("class", ""), elem.get("id", ""))
+            s = "{} {}".format(elem.get("class", ""), elem.get("id", ""))
             if len(s) < 2:
                 continue
             if (
@@ -463,7 +469,8 @@ def transform_misused_divs_into_paragraphs(self):
             # This results in incorrect results in case there is an <img>
             # buried within an <a> for example
             if not REGEXES["divToPElementsRe"].search(
-                str_(b"".join(map(tostring_, list(elem))))
+                str(b"".join(tostring(s, encoding='utf-8') for s in elem))
+                # str(b"".join(map(tostring_, list(elem))))
             ):
                 # log.debug("Altering %s to p" % (describe(elem)))
                 elem.tag = "p"
@@ -490,15 +497,13 @@ def transform_misused_divs_into_paragraphs(self):
 
     def tags(self, node, *tag_names):
         for tag_name in tag_names:
-            for e in node.findall(".//%s" % tag_name):
-                yield e
+            yield from node.findall(".//%s" % tag_name)
 
     def reverse_tags(self, node, *tag_names):
         for tag_name in tag_names:
-            for e in reversed(node.findall(".//%s" % tag_name)):
-                yield e
+            yield from reversed(node.findall(".//%s" % tag_name))
 
-    def sanitize(self, node, candidates):
+    def sanitize(self, node, candidates, keep_all_images=False):
         MIN_LEN = self.min_text_length
         for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
             if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
@@ -559,8 +564,8 @@ def sanitize(self, node, candidates):
                 to_remove = False
                 reason = ""
 
-                # if el.tag == 'div' and counts["img"] >= 1:
-                #    continue
+                if keep_all_images and el.tag == 'div' and counts["img"] >= 1:
+                    continue
                 if counts["p"] and counts["img"] > 1 + counts["p"] * 1.3:
                     reason = "too many images (%s)" % counts["img"]
                     to_remove = True
@@ -583,13 +588,13 @@ def sanitize(self, node, candidates):
                     )
                     to_remove = True
                 elif weight < 25 and link_density > 0.2:
-                    reason = "too many links %.3f for its weight %s" % (
+                    reason = "too many links {:.3f} for its weight {}".format(
                         link_density,
                         weight,
                     )
                     to_remove = True
                 elif weight >= 25 and link_density > 0.5:
-                    reason = "too many links %.3f for its weight %s" % (
+                    reason = "too many links {:.3f} for its weight {}".format(
                         link_density,
                         weight,
                     )
@@ -715,18 +720,10 @@ def main():
     file = None
     if options.url:
         headers = {"User-Agent": "Mozilla/5.0"}
-        if sys.version_info[0] == 3:
-            import urllib.request, urllib.parse, urllib.error
-
-            request = urllib.request.Request(options.url, None, headers)
-            file = urllib.request.urlopen(request)
-        else:
-            import urllib2
-
-            request = urllib2.Request(options.url, None, headers)
-            file = urllib2.urlopen(request)
+        request = urllib.request.Request(options.url, None, headers)
+        file = urllib.request.urlopen(request)
     else:
-        file = open(args[0], "rt")
+        file = open(args[0])
     try:
         doc = Document(
             file.read(),
@@ -740,14 +737,8 @@ def main():
             result = "<h2>" + doc.short_title() + "</h2><br/>" + doc.summary()
             open_in_browser(result)
         else:
-            enc = (
-                sys.__stdout__.encoding or "utf-8"
-            )  # XXX: this hack could not always work, better to set PYTHONIOENCODING
             result = "Title:" + doc.short_title() + "\n" + doc.summary()
-            if sys.version_info[0] == 3:
-                print(result)
-            else:
-                print(result.encode(enc, "replace"))
+            print(result)
     finally:
         file.close()
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 00000000..996bbfc0
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,3 @@
+nose
+twine
+flake8
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index d6e1198b..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--e .
diff --git a/setup.py b/setup.py
index adb6a239..a88e8185 100755
--- a/setup.py
+++ b/setup.py
@@ -1,29 +1,16 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
 import codecs
 import os
 import re
 from setuptools import setup
-import sys
 
-lxml_requirement = "lxml"
-if sys.platform == "darwin":
-    import platform
-
-    mac_ver = platform.mac_ver()[0]
-    mac_ver_no = int(mac_ver.split(".")[1])
-    if mac_ver_no < 9:
-        print("Using lxml<2.4")
-        lxml_requirement = "lxml<2.4"
-
-test_deps = [
-    # Test timeouts
-    "timeout_decorator",
+speed_deps = [
+     "cchardet",
 ]
 
 extras = {
-    "test": test_deps,
+    'speed': speed_deps,
 }
 
 # Adapted from https://github.com/pypa/pip/blob/master/setup.py
@@ -48,14 +35,19 @@ def find_version(*file_paths):
     version=find_version("readability", "__init__.py"),
     author="Yuri Baburov",
     author_email="burchik@gmail.com",
-    description="fast html to text parser (article readability tool) with python3 support",
+    description="fast html to text parser (article readability tool) with python 3 support",
     test_suite="tests.test_article_only",
-    long_description=open("README.rst").read(),
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
     license="Apache License 2.0",
     url="http://github.com/buriy/python-readability",
-    packages=["readability", "readability.compat"],
-    install_requires=["chardet", lxml_requirement, "cssselect"],
-    tests_require=test_deps,
+    packages=["readability"],
+    install_requires=[
+        "chardet",
+        "lxml[html_clean]",
+        "lxml-html-clean; python_version < '3.11'",
+        "cssselect"
+    ],
     extras_require=extras,
     classifiers=[
         "Environment :: Web Environment",
@@ -66,12 +58,13 @@ def find_version(*file_paths):
         "Topic :: Internet",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "Programming Language :: Python",
-        "Programming Language :: Python :: 2",
-        "Programming Language :: Python :: 2.7",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.5",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
+        "Programming Language :: Python :: Implementation :: PyPy",
     ],
 )
diff --git a/tests/samples/summary-keep-all-images.sample.html b/tests/samples/summary-keep-all-images.sample.html
new file mode 100644
index 00000000..127683fc
--- /dev/null
+++ b/tests/samples/summary-keep-all-images.sample.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html lang="en">
+<head></head>
+<body>
+<h2>
+    <span>
+        H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline
+    </span>
+</h2>
+<p>
+    <spa>
+        Text Text Text Text Text Text Text Text Text Text
+    </spa>
+</p>
+<div>
+    <span>
+        <a>
+            <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAIAAAACUFjqAAABhGlDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw1AUhU9TpSLVDnYQcchQnSyIFXHUKhShQqgVWnUweekfNDEkKS6OgmvBwZ/FqoOLs64OroIg+APiLjgpukiJ9yWFFjFeeLyP8+45vHcfIDSqTLO6xgFNt81MKinm8iti6BUBhBFBPxIys4xZSUrDt77uqZvqLs6z/Pv+rD61YDEgIBLPMMO0ideJpzZtg/M+cZSVZZX4nHjMpAsSP3Jd8fiNc8llgWdGzWxmjjhKLJY6WOlgVjY14knimKrplC/kPFY5b3HWqjXWuid/YbigLy9xndYwUljAIiSIUFBDBVXYiNOuk2IhQ+dJH/+Q65fIpZCrAkaOeWxAg+z6wf/g92ytYmLCSwonge4Xx/kYAUK7QLPuON/HjtM8AYLPwJXe9m80gOlP0uttLXYERLaBi+u2puwBlzvA4JMhm7IrBWkJxSLwfkbflAcGboHeVW9urXOcPgBZmlX6Bjg4BEZLlL3m8+6ezrn929Oa3w9e03KfJqsuOAAAAAlwSFlzAAAuIwAALiMBeKU/dgAAAAd0SU1FB+kBDA8PKt1W5MYAAAAZdEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIEdJTVBXgQ4XAAAAFUlEQVQY02P8x+rFgBswMeAFI1UaAJ65AWFYB2G5AAAAAElFTkSuQmCC"
+            />
+         </a>
+    </span>
+</div>
+<p>
+    <spa>
+        Text Text Text Text Text Text Text Text Text Text
+    </spa>
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/tests/test_article_only.py b/tests/test_article_only.py
index 51dfaea1..fe322121 100644
--- a/tests/test_article_only.py
+++ b/tests/test_article_only.py
@@ -1,8 +1,33 @@
 import os
+import time
 import unittest
 
 from readability import Document
-import timeout_decorator
+from functools import wraps
+
+
+class TimeoutException(Exception):
+    """Exception raised when a function exceeds its time limit."""
+    pass
+
+
+def timeout(seconds):
+    """Decorator to enforce a timeout on function execution."""
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            start_time = time.perf_counter()
+            result = func(*args, **kwargs)
+            end_time = time.perf_counter()
+            elapsed_time = end_time - start_time
+            if elapsed_time > seconds:
+                raise TimeoutException(
+                    f"Function '{func.__name__}' exceeded time limit of {seconds} seconds "
+                    f"with an execution time of {elapsed_time:.4f} seconds"
+                )
+            return result
+        return wrapper
+    return decorator
 
 
 SAMPLES = os.path.join(os.path.dirname(__file__), "samples")
@@ -101,7 +126,7 @@ def test_correct_cleanup(self):
         assert not "aside" in s
 
     # Many spaces make some regexes run forever
-    @timeout_decorator.timeout(seconds=3, use_signals=False)
+    @timeout(3)
     def test_many_repeated_spaces(self):
         long_space = " " * 1000000
         sample = "<html><body><p>foo" + long_space + "</p></body></html>"
@@ -124,3 +149,85 @@ def test_utf8_kanji(self):
         sample = load_sample("utf-8-kanji.sample.html")
         doc = Document(sample)
         res = doc.summary()
+        assert 0 < len(res) < 10000
+
+    def test_author_present(self):
+        sample = load_sample("the-hurricane-rubin-carter-denzel-washington.html")
+        doc = Document(sample)
+        assert 'Alex von Tunzelmann' == doc.author()
+
+    def test_author_absent(self):
+        sample = load_sample("si-game.sample.html")
+        doc = Document(sample)
+        assert '[no-author]' == doc.author()
+
+    def test_keep_images_present(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" in doc.summary(keep_all_images=True)
+
+    def test_keep_images_absent(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" not in doc.summary(keep_all_images=False)
+
+    def test_keep_images_absent_by_defautl(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" not in doc.summary()
+
+    def test_cjk_summary(self):
+        """Check we can extract CJK text correctly."""
+        html = """
+        <html>
+            <head>
+                <title>这是标题</title>
+            </head>
+            <body>
+                <div>一些无关紧要的内容</div>
+                <div class="article-content">
+                    <h1>主要文章标题</h1>
+                    <p>这是主要内容的第一段。</p>
+                    <p>これはコンテンツの第2段落です。</p>
+                    <p>이것은 콘텐츠의 세 번째 단락입니다.</p>
+                    <p>This is the fourth paragraph.</p>
+                </div>
+                <div>More irrelevant stuff</div>
+            </body>
+        </html>
+        """
+        doc = Document(html)
+        summary = doc.summary()
+        # Check that the main CJK content is present in the summary
+        self.assertTrue("这是主要内容的第一段" in summary)
+        self.assertTrue("これはコンテンツの第2段落です" in summary)
+        self.assertTrue("이것은 콘텐츠의 세 번째 단락입니다" in summary)
+        # Check that irrelevant content is mostly gone
+        self.assertFalse("一些无关紧要的内容" in summary)
+
+    def test_shorten_title_delimiter_bug(self):
+        """Test that shorten_title handles delimiters correctly when the last part is valid.
+
+        This specifically targets a potential bug where 'p1' might be used instead of 'pl'.
+        """
+        html = """
+        <html>
+            <head>
+                <title>Short Part | これは長いです</title>
+            </head>
+            <body>
+                <div>Content</div>
+            </body>
+        </html>
+        """
+        doc = Document(html)
+        # With the bug, this call might raise NameError: name 'p1' is not defined
+        # With the fix, it should correctly return the last part.
+        short_title = doc.short_title()
+        self.assertEqual(short_title, "これは長いです")
diff --git a/tox.ini b/tox.ini
index a9ec295d..926fda50 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,7 +5,7 @@
 
 [tox]
 envlist =
-    py{27,35,36,37,38,py,py3}, doc
+    py{38,39,310,311,312,313,py3}, doc
 skip_missing_interpreters =
     True
 
@@ -14,7 +14,7 @@ deps =
     pytest
     doc: sphinx
     doc: sphinx_rtd_theme
-    doc: recommonmark
+    doc: myst-parser
 
 # This creates the virtual envs with --site-packages so already packages
 # that are already installed will be reused. This is especially useful on
@@ -30,4 +30,4 @@ commands =
 
 [testenv:doc]
 commands =
-    python setup.py build_sphinx
+    sphinx-build -b html doc/source/ build/