diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..9f8ccc38
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,6 @@
+[flake8]
+ignore =
+    # line too long
+    E501
+exclude =
+    build/
diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml
new file mode 100644
index 00000000..d6e13da3
--- /dev/null
+++ b/.github/workflows/lint_and_test.yml
@@ -0,0 +1,33 @@
+---
+name: python-textile
+
+on: [push]
+
+jobs:
+  lint_and_test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.10"]
+        image_size: ['true', 'false']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Python flake8 Lint
+        uses: py-actions/flake8@v2.3.0
+      - name: Install dependencies
+        run: |
+          imagesize=''
+          pip install -U pytest pytest-cov coverage codecov
+          if [[ ${{ matrix.image_size }} == true ]] ; then imagesize='[imagesize]' ; fi
+          pip install -e ".${imagesize}"
+      - name: run tests
+        run: |
+          pytest
+      - name: Codecov
+        uses: codecov/codecov-action@v4
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 2ea03521..7f97eb68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ develop-eggs
 .DS_Store
 *.swp
 .tox
+README.txt
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 3f7d77cf..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-dist: xenial   # required for Python >= 3.7
-language: python
-env:
-  - IMAGESIZE=true
-  - IMAGESIZE=false
-python:
-  - "3.5"
-  - "3.6"
-  - "3.7"
-  - "3.8"
-  - "3.9"
-  # PyPy versions
-  - "pypy3"
-# command to install dependencies
-install:
-  - imagesize=''
-  - pip install -U pytest pytest-cov coverage codecov
-  - if [[ $IMAGESIZE == true ]] ; then imagesize='[imagesize]' ; fi
-  - pip install -e ".${imagesize}"
-# command to run tests
-script: py.test
-after_success:
-  - codecov
diff --git a/CHANGELOG.textile b/CHANGELOG.textile
index 43beedca..a3ff7424 100644
--- a/CHANGELOG.textile
+++ b/CHANGELOG.textile
@@ -1,5 +1,14 @@
 h1. Textile Changelog
 
+h2. Version 4.0.3
+* Update supported Python versions to 3.8 - 3.12 ("#83":https://github.com/textile/python-textile/issues/83)
+* Replace html5lib with nh3 for html sanitization
+* General code cleanup
+* Bugfixes:
+**  Wrong HTML output when "bc.." is the very last in the document ("#81":https://github.com/textile/python-textile/issues/81)
+* Other:
+** Use github actions instead of travis for automated testing
+
 h2. Version 4.0.2
 * Bugfixes:
 ** Support non-http schemas in url refs ("#75":https://github.com/textile/python-textile/pull/75)
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 5624ae6a..90d949aa 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -7,4 +7,6 @@ Alex Shiels
 Jason Samsa
 Kurt Raschke
 Dave Brondsema
-Dmitry Shachnev
\ No newline at end of file
+Dmitry Shachnev
+Kirill Mavreshko
+Brad Schoening
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 5ca56e8d..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,2 +0,0 @@
-include MANIFEST.in
-include tests/fixtures/README.txt
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..7570eac3
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+clean:
+	$(RM) README.txt
+	$(RM) -r ./dist ./build
+
+generate_pypi_README:
+	${VIRTUAL_ENV}/bin/pytextile README.textile | sed -e 's/^\t//' > README.txt
+
+build: generate_pypi_README
+	python -m build
+
+upload_to_test: build
+	twine check ./dist/*
+	twine upload --repository test_textile ./dist/*
+
+upload_to_prod: build
+	twine check ./dist/*
+	# for now, don't actually upload to prod PyPI, just output the command to do so.
+	@echo "twine upload --repository textile ./dist/*"
diff --git a/README.textile b/README.textile
index 98f4fbde..958ea63c 100644
--- a/README.textile
+++ b/README.textile
@@ -1,15 +1,15 @@
-!https://travis-ci.org/textile/python-textile.svg!:https://travis-ci.org/textile/python-textile !https://codecov.io/github/textile/python-textile/coverage.svg!:https://codecov.io/github/textile/python-textile !https://img.shields.io/pypi/pyversions/textile! !https://img.shields.io/pypi/wheel/textile!
+!https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml/badge.svg(python-textile)!:https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml !https://codecov.io/github/textile/python-textile/coverage.svg!:https://codecov.io/github/textile/python-textile !https://img.shields.io/pypi/pyversions/textile! !https://img.shields.io/pypi/wheel/textile!
 
 h1. python-textile
 
-python-textile is a Python port of "Textile":http://txstyle.org/, Dean Allen's humane web text generator.
+python-textile is a Python port of "Textile":https://textile-lang.com/, Dean Allen's humane web text generator.
 
 h2. Installation
 
 @pip install textile@
 
 Dependencies:
-* "html5lib":https://pypi.org/project/html5lib/
+* "nh3":https://pypi.org/project/nh3/
 * "regex":https://pypi.org/project/regex/ (The regex package causes problems with PyPy, and is not installed as a dependency in such environments. If you are upgrading a textile install on PyPy which had regex previously included, you may need to uninstall it.)
 
 Optional dependencies include:
@@ -42,7 +42,7 @@ bc.. import textile
 
 h3. Notes:
 
-* Active development supports Python 3.5 or later.
+* Active development supports Python 3.8 or later.
 
 h3. Running Tests
 
@@ -50,8 +50,8 @@ To run the test suite, use pytest. `pytest-cov` is required as well.
 
 When textile is installed locally:
 
-bc.. pytest
+bc. pytest
 
 When textile is not installed locally:
 
-bc.. PYTHONPATH=. pytest
+bc. PYTHONPATH=. pytest
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..caa03da1
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,52 @@
+[build-system]
+requires = ["setuptools", "setuptools-scm", "nh3"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "textile"
+authors = [
+    { name = "Dennis Burke", email = "ikirudennis@gmail.com"}
+]
+description = 'Textile processing for python.'
+classifiers = [
+    'Development Status :: 5 - Production/Stable',
+    'Environment :: Web Environment',
+    'Intended Audience :: Developers',
+    'License :: OSI Approved :: BSD License',
+    'Operating System :: OS Independent',
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 3',
+    'Programming Language :: Python :: 3 :: Only',
+    'Programming Language :: Python :: 3.8',
+    'Programming Language :: Python :: 3.9',
+    'Programming Language :: Python :: 3.10',
+    'Programming Language :: Python :: 3.11',
+    'Programming Language :: Python :: 3.12',
+    'Topic :: Software Development :: Libraries :: Python Modules',
+]
+dynamic = ["version",]
+dependencies = [
+    'nh3',
+    'regex>1.0; implementation_name != "pypy"',
+]
+requires-python = '>=3.8'
+keywords = ['textile', 'text', 'html markup']
+# Use the following command to generate a README.txt which is compatible with
+# pypi's readme rendering:
+#   pytextile README.textile | sed -e 's/^\t//' > README.txt
+readme = {file = 'README.txt', content-type = 'text/markdown'}
+
+[project.optional-dependencies]
+develop = ['pytest', 'pytest-cov']
+imagesize = ['Pillow>=3.0.0',]
+
+[project.urls]
+Homepage = "https://github.com/textile/python-textile"
+Repository = "https://github.com/textile/python-textile.git"
+Issues = "https://github.com/textile/python-textile/issues"
+
+[project.scripts]
+pytextile = "textile.__main__:main"
+
+[tool.setuptools.dynamic]
+version = {attr = "textile.__version__"}
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 118c2fb0..00000000
--- a/setup.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from setuptools import setup, find_packages
-from setuptools.command.test import test as TestCommand
-import os
-import sys
-
-
-def get_version():
-    basedir = os.path.dirname(__file__)
-    with open(os.path.join(basedir, 'textile/version.py')) as f:
-        variables = {}
-        exec(f.read(), variables)
-        return variables.get('VERSION')
-    raise RuntimeError('No version info found.')
-
-setup(
-    name='textile',
-    version=get_version(),
-    author='Dennis Burke',
-    author_email='ikirudennis@gmail.com',
-    description='Textile processing for python.',
-    url='http://github.com/textile/python-textile',
-    packages=find_packages(),
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Environment :: Web Environment',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: BSD License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-    ],
-    keywords='textile,text,html markup',
-    install_requires=[
-        'html5lib>=1.0.1',
-        'regex>1.0; implementation_name != "pypy"',
-        ],
-    extras_require={
-        'develop': ['pytest', 'pytest-cov'],
-        'imagesize': ['Pillow>=3.0.0'],
-    },
-    entry_points={'console_scripts': ['pytextile=textile.__main__:main']},
-    tests_require=['pytest', 'pytest-cov'],
-    include_package_data=True,
-    zip_safe=False,
-    python_requires='>=3.5',
-)
diff --git a/textile/tools/__init__.py b/tests/__init__.py
similarity index 100%
rename from textile/tools/__init__.py
rename to tests/__init__.py
diff --git a/tests/fixtures/README.txt b/tests/fixtures/README.txt
index 61dc0f01..515cf860 100644
--- a/tests/fixtures/README.txt
+++ b/tests/fixtures/README.txt
@@ -1,8 +1,8 @@
-	<p><a href="https://travis-ci.org/textile/python-textile"><img alt="" src="https://travis-ci.org/textile/python-textile.svg" /></a> <a href="https://codecov.io/github/textile/python-textile"><img alt="" src="https://codecov.io/github/textile/python-textile/coverage.svg" /></a> <img alt="" src="https://img.shields.io/pypi/pyversions/textile" /> <img alt="" src="https://img.shields.io/pypi/wheel/textile" /></p>
+	<p><a href="https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml"><img alt="python-textile" src="https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml/badge.svg" title="python-textile" /></a> <a href="https://codecov.io/github/textile/python-textile"><img alt="" src="https://codecov.io/github/textile/python-textile/coverage.svg" /></a> <img alt="" src="https://img.shields.io/pypi/pyversions/textile" /> <img alt="" src="https://img.shields.io/pypi/wheel/textile" /></p>
 
 	<h1>python-textile</h1>
 
-	<p>python-textile is a Python port of <a href="http://txstyle.org/">Textile</a>, Dean Allen&#8217;s humane web text generator.</p>
+	<p>python-textile is a Python port of <a href="https://textile-lang.com/">Textile</a>, Dean Allen&#8217;s humane web text generator.</p>
 
 	<h2>Installation</h2>
 
@@ -10,7 +10,7 @@
 
 	<p>Dependencies:
 	<ul>
-		<li><a href="https://pypi.org/project/html5lib/">html5lib</a></li>
+		<li><a href="https://pypi.org/project/nh3/">nh3</a></li>
 		<li><a href="https://pypi.org/project/regex/">regex</a> (The regex package causes problems with PyPy, and is not installed as a dependency in such environments. If you are upgrading a textile install on PyPy which had regex previously included, you may need to uninstall it.)</li>
 	</ul></p>
 
@@ -47,7 +47,7 @@
 	<h3>Notes:</h3>
 
 	<ul>
-		<li>Active development supports Python 3.5 or later.</li>
+		<li>Active development supports Python 3.8 or later.</li>
 	</ul>
 
 	<h3>Running Tests</h3>
@@ -56,8 +56,8 @@
 
 	<p>When textile is installed locally:</p>
 
-<pre><code>pytest
+<pre><code>pytest</code></pre>
 
-When textile is not installed locally:</code></pre>
+	<p>When textile is not installed locally:</p>
 
-<pre>PYTHONPATH=. pytest</pre>
\ No newline at end of file
+<pre><code>PYTHONPATH=. pytest</code></pre>
\ No newline at end of file
diff --git a/tests/test_attributes.py b/tests/test_attributes.py
index 70da8422..fed235de 100644
--- a/tests/test_attributes.py
+++ b/tests/test_attributes.py
@@ -1,5 +1,6 @@
+from typing import OrderedDict
 from textile.utils import parse_attributes
-import re
+
 
 def test_parse_attributes():
     assert parse_attributes('\\1', element='td') == {'colspan': '1'}
@@ -13,3 +14,11 @@ def test_parse_attributes():
     assert parse_attributes('<') == {'style': 'text-align:left;'}
     assert parse_attributes('(c#i)') == {'class': 'c', 'id': 'i'}
     assert parse_attributes('\\2 100', element='col') == {'span': '2', 'width': '100'}
+
+
+def test_parse_attributes_edge_cases():
+    result = parse_attributes('(:c#i)')
+    expect = OrderedDict({'id': 'i'})
+    assert result == expect
+
+    assert parse_attributes('(<)') == OrderedDict()
diff --git a/tests/test_block.py b/tests/test_block.py
index 44f3ea23..eed5441c 100644
--- a/tests/test_block.py
+++ b/tests/test_block.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
 import textile
 from textile.objects import Block
 
@@ -8,6 +6,7 @@
 except ImportError:
     from ordereddict import OrderedDict
 
+
 def test_block():
     t = textile.Textile()
     result = t.block('h1. foobar baby')
@@ -16,15 +15,14 @@ def test_block():
 
     b = Block(t, "bq", "", None, "", "Hello BlockQuote")
     expect = ('blockquote', OrderedDict(), 'p', OrderedDict(),
-            'Hello BlockQuote')
+              'Hello BlockQuote')
     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
     assert result == expect
 
     b = Block(t, "bq", "", None, "http://google.com", "Hello BlockQuote")
-    citation = '{0}1:url'.format(t.uid)
     expect = ('blockquote', OrderedDict([('cite',
-        '{0.uid}{0.refIndex}:url'.format(t))]), 'p', OrderedDict(),
-        'Hello BlockQuote')
+              '{0.uid}{0.refIndex}:url'.format(t))]), 'p', OrderedDict(),
+              'Hello BlockQuote')
     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
     assert result == expect
 
@@ -40,6 +38,7 @@ def test_block():
     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
     assert result == expect
 
+
 def test_block_tags_false():
     t = textile.Textile(block_tags=False)
     assert t.block_tags is False
@@ -48,6 +47,7 @@ def test_block_tags_false():
     expect = 'test'
     assert result == expect
 
+
 def test_blockcode_extended():
     input = 'bc.. text\nmoretext\n\nevenmoretext\n\nmoremoretext\n\np. test'
     expect = '<pre><code>text\nmoretext\n\nevenmoretext\n\nmoremoretext</code></pre>\n\n\t<p>test</p>'
@@ -55,6 +55,7 @@ def test_blockcode_extended():
     result = t.parse(input)
     assert result == expect
 
+
 def test_blockcode_in_README():
     with open('README.textile') as f:
         readme = ''.join(f.readlines())
@@ -63,6 +64,7 @@ def test_blockcode_in_README():
         expect = ''.join(f.readlines())
     assert result == expect
 
+
 def test_blockcode_comment():
     input = '###.. block comment\nanother line\n\np. New line'
     expect = '\t<p>New line</p>'
@@ -70,6 +72,7 @@ def test_blockcode_comment():
     result = t.parse(input)
     assert result == expect
 
+
 def test_extended_pre_block_with_many_newlines():
     """Extra newlines in an extended pre block should not get cut down to only
     two."""
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5f6e501f..5e6ab794 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -3,28 +3,30 @@
 
 import textile
 
+
 def test_console_script():
     command = [sys.executable, '-m', 'textile', 'README.textile']
     try:
         result = subprocess.check_output(command)
     except AttributeError:
         command[2] = 'textile.__main__'
-        result = subprocess.Popen(command,
-                stdout=subprocess.PIPE).communicate()[0]
+        result = subprocess.Popen(
+            command, stdout=subprocess.PIPE).communicate()[0]
     with open('tests/fixtures/README.txt') as f:
         expect = ''.join(f.readlines())
-    if type(result) == bytes:
+    if isinstance(result, bytes):
         result = result.decode('utf-8')
     assert result == expect
 
+
 def test_version_string():
     command = [sys.executable, '-m', 'textile', '-v']
     try:
         result = subprocess.check_output(command)
     except AttributeError:
         command[2] = 'textile.__main__'
-        result = subprocess.Popen(command,
-                stdout=subprocess.PIPE).communicate()[0]
-    if type(result) == bytes:
+        result = subprocess.Popen(
+            command, stdout=subprocess.PIPE).communicate()[0]
+    if isinstance(result, bytes):
         result = result.decode('utf-8')
     assert result.strip() == textile.__version__
diff --git a/tests/test_footnoteRef.py b/tests/test_footnoteRef.py
index b773ad2f..5ac2ea4b 100644
--- a/tests/test_footnoteRef.py
+++ b/tests/test_footnoteRef.py
@@ -1,5 +1,5 @@
 from textile import Textile
-import re
+
 
 def test_footnoteRef():
     t = Textile()
diff --git a/tests/test_getRefs.py b/tests/test_getRefs.py
index d3cfcd72..8a22d4fb 100644
--- a/tests/test_getRefs.py
+++ b/tests/test_getRefs.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_getRefs():
     t = Textile()
     result = t.getRefs("some text [Google]http://www.google.com")
diff --git a/tests/test_getimagesize.py b/tests/test_getimagesize.py
index 43f85e3a..4cafc9dc 100644
--- a/tests/test_getimagesize.py
+++ b/tests/test_getimagesize.py
@@ -1,8 +1,9 @@
-from textile.tools.imagesize import getimagesize
+from textile.utils import getimagesize
 import pytest
 
 PIL = pytest.importorskip('PIL')
 
+
 def test_imagesize():
     assert getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") == (276, 110)
     assert getimagesize("http://bad.domain/") == ''
diff --git a/tests/test_github_issues.py b/tests/test_github_issues.py
index 2507e5f4..6808054a 100644
--- a/tests/test_github_issues.py
+++ b/tests/test_github_issues.py
@@ -1,53 +1,60 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 import textile
 
+
 def test_github_issue_16():
     result = textile.textile('"$":http://google.com "$":https://google.com "$":mailto:blackhole@sun.comet')
     expect = '\t<p><a href="http://google.com">google.com</a> <a href="https://google.com">google.com</a> <a href="mailto:blackhole%40sun.comet">blackhole@sun.comet</a></p>'
     assert result == expect
 
+
 def test_github_issue_17():
     result = textile.textile('!http://www.ox.ac.uk/favicon.ico!')
     expect = '\t<p><img alt="" src="http://www.ox.ac.uk/favicon.ico" /></p>'
     assert result == expect
 
+
 def test_github_issue_20():
     text = 'This is a link to a ["Wikipedia article about Textile":http://en.wikipedia.org/wiki/Textile_(markup_language)].'
     result = textile.textile(text)
     expect = '\t<p>This is a link to a <a href="http://en.wikipedia.org/wiki/Textile_%28markup_language%29">Wikipedia article about Textile</a>.</p>'
     assert result == expect
 
+
 def test_github_issue_21():
-    text = '''h1. xml example
+    text = ('''h1. xml example
 
-bc. 
+bc. '''
+            '''
 <foo>
   bar
-</foo>'''
+</foo>''')
     result = textile.textile(text)
     expect = '\t<h1>xml example</h1>\n\n<pre><code>\n&lt;foo&gt;\n  bar\n&lt;/foo&gt;</code></pre>'
     assert result == expect
 
+
 def test_github_issue_22():
     text = '''_(artist-name)Ty Segall_’s'''
     result = textile.textile(text)
     expect = '\t<p><em class="artist-name">Ty Segall</em>’s</p>'
     assert result == expect
 
+
 def test_github_issue_26():
     text = ''
     result = textile.textile(text)
     expect = ''
     assert result == expect
 
+
 def test_github_issue_27():
     test = """* Folders with ":" in their names are displayed with a forward slash "/" instead. (Filed as "#4581709":/test/link, which was considered "normal behaviour" - quote: "Please note that Finder presents the 'Carbon filesystem' view, regardless of the underlying filesystem.")"""
     result = textile.textile(test)
     expect = """\t<ul>\n\t\t<li>Folders with &#8220;:&#8221; in their names are displayed with a forward slash &#8220;/&#8221; instead. (Filed as <a href="/test/link">#4581709</a>, which was considered &#8220;normal behaviour&#8221; &#8211; quote: &#8220;Please note that Finder presents the &#8216;Carbon filesystem&#8217; view, regardless of the underlying filesystem.&#8221;)</li>\n\t</ul>"""
     assert result == expect
 
+
 def test_github_issue_28():
     test = """So here I am porting my ancient "newspipe":newspipe "front-end":blog/2006/09/30/0950 to "Snakelets":Snakelets and "Python":Python, and I've just trimmed down over 20 lines of "PHP":PHP down to essentially one line of "BeautifulSoup":BeautifulSoup retrieval:
 
@@ -80,23 +87,26 @@ def parseWapProfile(self, url):
 \t<p>Of course there&#8217;s a lot more error handling to do (and useful data to glean off the <a href="XML"><span class="caps">XML</span></a>), but being able to cut through all the usual parsing crap is immensely gratifying.</p>""")
     assert result == expect
 
+
 def test_github_issue_30():
-    text ='"Tëxtíle (Tëxtíle)":http://lala.com'
+    text = '"Tëxtíle (Tëxtíle)":http://lala.com'
     result = textile.textile(text)
     expect = '\t<p><a href="http://lala.com" title="Tëxtíle">Tëxtíle</a></p>'
     assert result == expect
 
-    text ='!http://lala.com/lol.gif(♡ imáges)!'
+    text = '!http://lala.com/lol.gif(♡ imáges)!'
     result = textile.textile(text)
     expect = '\t<p><img alt="♡ imáges" src="http://lala.com/lol.gif" title="♡ imáges" /></p>'
     assert result == expect
 
+
 def test_github_issue_36():
     text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa'
     result = textile.textile(text)
     expect = '\t<p><a href="https://www.google.com/search?q=Chögyam+Trungpa">Chögyam Trungpa</a></p>'
     assert result == expect
 
+
 def test_github_issue_37():
     text = '# xxx\n# yyy\n*blah*'
     result = textile.textile(text)
@@ -118,24 +128,28 @@ def test_github_issue_37():
 \t</ul>'''
     assert result == expect
 
+
 def test_github_issue_40():
     text = '\r\n'
     result = textile.textile(text)
     expect = '\r\n'
     assert result == expect
 
+
 def test_github_issue_42():
     text = '!./image.png!'
     result = textile.textile(text)
     expect = '\t<p><img alt="" src="./image.png" /></p>'
     assert result == expect
 
+
 def test_github_issue_43():
     text = 'pre. smart ‘quotes’ are not smart!'
     result = textile.textile(text)
     expect = '<pre>smart ‘quotes’ are not smart!</pre>'
     assert result == expect
 
+
 def test_github_issue_45():
     """Incorrect transform unicode url"""
     text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
@@ -143,6 +157,7 @@ def test_github_issue_45():
     expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
     assert result == expect
 
+
 def test_github_issue_46():
     """Key error on mal-formed numbered lists. CAUTION: both the input and the
     ouput are ugly."""
@@ -153,6 +168,7 @@ def test_github_issue_46():
     result = textile.textile(text)
     assert result == expect
 
+
 def test_github_issue_47():
     """Incorrect wrap pre-formatted value"""
     text = '''pre.. word
@@ -172,6 +188,7 @@ def test_github_issue_47():
 yet anothe word</pre>'''
     assert result == expect
 
+
 def test_github_issue_49():
     """Key error on russian hash-route link"""
     s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры'
@@ -179,6 +196,7 @@ def test_github_issue_49():
     expect = '\t<p><a href="https://ru.vuejs.org/v2/guide/components.html#Входные-параметры">link</a></p>'
     assert result == expect
 
+
 def test_github_issue_50():
     """Incorrect wrap code with Java generics in pre"""
     test = ('pre.. public class Tynopet<T extends Framework> {}\n\nfinal '
@@ -189,6 +207,7 @@ def test_github_issue_50():
               'ArrayList&lt;&gt;();</pre>')
     assert result == expect
 
+
 def test_github_issue_51():
     """Link build with $ sign without "http" prefix broken."""
     test = '"$":www.google.com.br'
@@ -196,6 +215,7 @@ def test_github_issue_51():
     expect = '\t<p><a href="www.google.com.br">www.google.com.br</a></p>'
     assert result == expect
 
+
 def test_github_issue_52():
     """Table build without space after aligment raise a AttributeError."""
     test = '|=.First Header |=. Second Header |'
@@ -205,6 +225,7 @@ def test_github_issue_52():
               '\n\t\t</tr>\n\t</table>')
     assert result == expect
 
+
 def test_github_issue_55():
     """Incorrect handling of quote entities in extended pre block"""
     test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre '
@@ -258,15 +279,17 @@ def test_github_issue_55():
               'return configs;\n}\n}</pre>')
     assert result == expect
 
+
 def test_github_issue_56():
     """Empty description lists throw error"""
     result = textile.textile("- :=\n-")
     expect = '<dl>\n</dl>'
     assert result == expect
 
+
 def test_github_pull_61():
     """Fixed code block multiline encoding on quotes/span"""
-    test = '''bc.. This is some TEXT inside a "Code BLOCK"
+    test = ('''bc.. This is some TEXT inside a "Code BLOCK"
 
 {
   if (JSON) {
@@ -275,11 +298,12 @@ def test_github_pull_61():
   }
 }
 
-Back to 10-4 CAPS 
+Back to 10-4 CAPS '''
+            '''
 
 p.. Some multiline Paragragh
 
-Here is some output!!! "Some" CAPS'''
+Here is some output!!! "Some" CAPS''')
 
     expect = '''<pre><code>This is some TEXT inside a &quot;Code BLOCK&quot;
 
@@ -299,6 +323,7 @@ def test_github_pull_61():
     result = t.parse(test)
     assert result == expect
 
+
 def test_github_pull_62():
     """Fix for paragraph multiline, only last paragraph is rendered
     correctly"""
@@ -341,6 +366,7 @@ def test_github_pull_62():
     result = t.parse(test)
     assert result == expect
 
+
 def test_github_pull_63():
     """Forgot to set multiline_para to False"""
     test = '''p.. First one 'is'
diff --git a/tests/test_glyphs.py b/tests/test_glyphs.py
index 56b0d272..ed50ad53 100644
--- a/tests/test_glyphs.py
+++ b/tests/test_glyphs.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_glyphs():
     t = Textile()
 
diff --git a/tests/test_image.py b/tests/test_image.py
index aad39e29..b7462924 100644
--- a/tests/test_image.py
+++ b/tests/test_image.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_image():
     t = Textile()
     result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
@@ -17,5 +18,5 @@ def test_image():
     t = Textile(rel='nofollow')
     result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
     expect = ('<a href="{0}1:url" rel="nofollow"><img alt="" src="{0}2:url" '
-            '/></a>'.format(t.uid))
+              '/></a>'.format(t.uid))
     assert result == expect
diff --git a/tests/test_imagesize.py b/tests/test_imagesize.py
index 112989e1..cb3ad68a 100644
--- a/tests/test_imagesize.py
+++ b/tests/test_imagesize.py
@@ -1,10 +1,11 @@
 import textile
 
+
 def test_imagesize():
     imgurl = 'http://www.google.com/intl/en_ALL/images/srpr/logo1w.png'
-    result = textile.tools.imagesize.getimagesize(imgurl)
+    result = textile.utils.getimagesize(imgurl)
     try:
-        import PIL
+        import PIL  # noqa: F401
 
         expect = (275, 95)
         assert result == expect
diff --git a/tests/test_lists.py b/tests/test_lists.py
index 4e85f4c8..06d13c33 100644
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_lists():
     t = Textile()
     result = t.textileLists("* one\n* two\n* three")
diff --git a/tests/test_retrieve.py b/tests/test_retrieve.py
index 10bd1733..a4165240 100644
--- a/tests/test_retrieve.py
+++ b/tests/test_retrieve.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_retrieve():
     t = Textile()
     id = t.shelve("foobar")
diff --git a/tests/test_span.py b/tests/test_span.py
index d83530dd..7ae5b4b1 100644
--- a/tests/test_span.py
+++ b/tests/test_span.py
@@ -1,19 +1,20 @@
 from textile import Textile
 
+
 def test_span():
     t = Textile()
-    result = t.span("hello %(bob)span *strong* and **bold**% goodbye")
+    result = t.retrieveTags(t.span("hello %(bob)span *strong* and **bold**% goodbye"))
     expect = ('hello <span class="bob">span <strong>strong</strong> and '
-            '<b>bold</b></span> goodbye')
+              '<b>bold</b></span> goodbye')
     assert result == expect
 
-    result = t.span('%:http://domain.tld test%')
+    result = t.retrieveTags(t.span('%:http://domain.tld test%'))
     expect = '<span cite="http://domain.tld">test</span>'
     assert result == expect
 
     t = Textile()
     # cover the partial branch where we exceed the max_span_depth.
     t.max_span_depth = 2
-    result = t.span('_-*test*-_')
+    result = t.retrieveTags(t.span('_-*test*-_'))
     expect = '<em><del>*test*</del></em>'
     assert result == expect
diff --git a/tests/test_subclassing.py b/tests/test_subclassing.py
index 9235e032..a7db99a3 100644
--- a/tests/test_subclassing.py
+++ b/tests/test_subclassing.py
@@ -1,10 +1,10 @@
 import textile
 
+
 def test_change_glyphs():
     class TextilePL(textile.Textile):
         glyph_definitions = dict(textile.Textile.glyph_definitions,
-            quote_double_open = '&#8222;'
-        )
+                                 quote_double_open='&#8222;')
 
     test = 'Test "quotes".'
     expect = '\t<p>Test &#8222;quotes&#8221;.</p>'
diff --git a/tests/test_table.py b/tests/test_table.py
index 0a3cb0d6..1ea34e94 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -1,5 +1,6 @@
 from textile import Textile
 
+
 def test_table():
     t = Textile()
     result = t.table('(rowclass). |one|two|three|\n|a|b|c|')
diff --git a/tests/test_textile.py b/tests/test_textile.py
index 0c37690d..84e9ddf8 100644
--- a/tests/test_textile.py
+++ b/tests/test_textile.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
 import pytest
 import re
 import textile
 
+
 def test_FootnoteReference():
     html = textile.textile('YACC[1]')
     assert re.search(r'^\t<p><span class="caps">YACC</span><sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup></p>', html) is not None
 
+
 def test_Footnote():
     html = textile.textile('This is covered elsewhere[1].\n\nfn1. Down here, in fact.\n\nfn2. Here is another footnote.')
     assert re.search(r'^\t<p>This is covered elsewhere<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup>.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>1</sup> Down here, in fact.</p>\n\n\t<p class="footnote" id="fn\1-2"><sup>2</sup> Here is another footnote.</p>$', html) is not None
@@ -24,17 +25,19 @@ def test_Footnote():
     html = textile.textile('''See[4!] for details.\n\nfn4^. Here are the details.''')
     assert re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1">4</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup><a href="#fnrev\1-1">4</a></sup> Here are the details.</p>$', html) is not None
 
+
 def test_issue_35():
     result = textile.textile('"z"')
-    expect = '\t<p>&#8220;z&#8221; </p>'
+    expect = '\t<p>&#8220;z&#8221;</p>'
     assert result == expect
 
     result = textile.textile('" z"')
-    expect = '\t<p>&#8220; z&#8221; </p>'
+    expect = '\t<p>&#8220; z&#8221;</p>'
     assert result == expect
 
+
 def test_restricted():
-    #Note that the HTML is escaped, thus rendering the <script> tag harmless.
+    # Note that the HTML is escaped, thus rendering the <script> tag harmless.
     test = "Here is some text.\n<script>alert('hello world')</script>"
     result = textile.textile_restricted(test)
     expect = "\t<p>Here is some text.<br />\n&lt;script&gt;alert(&#8216;hello world&#8217;)&lt;/script&gt;</p>"
@@ -72,7 +75,6 @@ def test_restricted():
     expect = '''\
 \t<table>
 \t<caption>Your caption goes here</caption>
-\t
 \t<tfoot>
 \t\t<tr>
 \t\t\t<td>A footer </td>
@@ -93,10 +95,12 @@ def test_restricted():
 
     assert result == expect
 
+
 def test_unicode_footnote():
     html = textile.textile('текст[1]')
     assert re.compile(r'^\t<p>текст<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup></p>$', re.U).search(html) is not None
 
+
 def test_autolinking():
     test = """some text "test":http://www.google.com http://www.google.com "$":http://www.google.com"""
     result = """\t<p>some text <a href="http://www.google.com">test</a> http://www.google.com <a href="http://www.google.com">www.google.com</a></p>"""
@@ -104,6 +108,7 @@ def test_autolinking():
 
     assert result == expect
 
+
 def test_sanitize():
     test = "a paragraph of benign text"
     result = "\t<p>a paragraph of benign text</p>"
@@ -111,7 +116,7 @@ def test_sanitize():
     assert result == expect
 
     test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
-    result = '<p style="">a paragraph of evil text</p>'
+    result = '<p>a paragraph of evil text</p>'
     expect = textile.Textile().parse(test, sanitize=True)
     assert result == expect
 
@@ -120,14 +125,16 @@ def test_sanitize():
     expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
     assert result == expect
 
+
 def test_imagesize():
-    PIL = pytest.importorskip('PIL')
+    PIL = pytest.importorskip('PIL')  # noqa: F841
 
     test = "!http://www.google.com/intl/en_ALL/images/srpr/logo1w.png!"
     result = '\t<p><img alt="" height="95" src="http://www.google.com/intl/en_ALL/images/srpr/logo1w.png" width="275" /></p>'
     expect = textile.Textile(get_sizes=True).parse(test)
     assert result == expect
 
+
 def test_endnotes_simple():
     test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.\n\nnote#my_first_label Over the past billion years, about a quarter of the moon's 4.5 billion-year lifespan, it has shrunk about 200 meters (700 feet) in diameter."""
     html = textile.textile(test)
@@ -135,6 +142,7 @@ def test_endnotes_simple():
     result_re = re.compile(result_pattern)
     assert result_re.search(html) is not None
 
+
 def test_endnotes_complex():
     test = """Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality[#netneutral] and has expressed the view that ISPs should supply "connectivity with no strings attached"[#netneutral!] [#tbl_quote]\n\nBerners-Lee admitted that the forward slashes ("//") in a web address were actually unnecessary.  He told the newspaper that he could easily have designed URLs not to have the forward slashes.  "... it seemed like a good idea at the time,"[#slashes]\n\nnote#netneutral. "Web creator rejects net tracking":http://news.bbc.co.uk/2/hi/technology/7613201.stm. BBC. 15 September 2008\n\nnote#tbl_quote. "Web inventor's warning on spy software":http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html. The Daily Telegraph (London). 25 May 2008\n\nnote#slashes. "Berners-Lee 'sorry' for slashes":http://news.bbc.co.uk/1/hi/technology/8306631.stm. BBC. 14 October 2009\n\nnotelist."""
     html = textile.textile(test)
@@ -142,6 +150,7 @@ def test_endnotes_complex():
     result_re = re.compile(result_pattern)
     assert result_re.search(html) is not None
 
+
 def test_endnotes_unreferenced_note():
     test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#lavader(noteclass). "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman(#noteid). "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13. After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:§^.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:‡"""
     html = textile.textile(test)
@@ -149,6 +158,7 @@ def test_endnotes_unreferenced_note():
     result_re = re.compile(result_pattern, re.U)
     assert result_re.search(html) is not None
 
+
 def test_endnotes_malformed():
     test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13!] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#unused An unreferenced note.\n\nnote#lavader^ "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman^ "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13^ After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:α!+"""
     html = textile.textile(test)
@@ -156,13 +166,15 @@ def test_endnotes_malformed():
     result_re = re.compile(result_pattern, re.U)
     assert result_re.search(html) is not None
 
+
 def test_endnotes_undefined_note():
     test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!."""
     html = textile.textile(test)
-    result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup>.</p>\n\n\t<ol>\n\t\t<li> Undefined Note \[#my_first_label\].<li>\n\t</ol>$"""
+    result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup>.</p>\n\n\t<ol>\n\t\t<li> Undefined Note \[#my_first_label\].</li>\n\t</ol>$"""
     result_re = re.compile(result_pattern)
     assert result_re.search(html) is not None
 
+
 def test_encode_url():
     # I tried adding these as doctests, but the unicode tests weren't
     # returning the correct results.
@@ -198,21 +210,25 @@ def test_encode_url():
     eurl = t.encode_url(url)
     assert eurl == result
 
+
 def test_footnote_crosslink():
     html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''')
     searchstring = r'\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">2</a></sup> for details, and later, reference it again<sup class="footnote"><a href="#fn\1-1">2</a></sup>.</p>\n\n\t<p class="footy" id="otherid" lang="en"><sup id="fn\1-1"><a href="#fnrev\1-1">2</a></sup> Here are the details.</p>$'
     assert re.compile(searchstring).search(html) is not None
 
+
 def test_footnote_without_reflink():
     html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''')
     searchstring = r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1">3</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>3</sup> Here are the details.</p>$'
     assert re.compile(searchstring).search(html) is not None
 
+
 def testSquareBrackets():
     html = textile.textile("""1[^st^], 2[^nd^], 3[^rd^]. 2 log[~n~]\n\nA close[!http://textpattern.com/favicon.ico!]image.\nA tight["text":http://textpattern.com/]link.\nA ["footnoted link":http://textpattern.com/][182].""")
     searchstring = r'^\t<p>1<sup>st</sup>, 2<sup>nd</sup>, 3<sup>rd</sup>. 2 log<sub>n</sub></p>\n\n\t<p>A close<img alt="" src="http://textpattern.com/favicon.ico" />image.<br />\nA tight<a href="http://textpattern.com/">text</a>link.<br />\nA <a href="http://textpattern.com/">footnoted link</a><sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">182</a></sup>.</p>'
     assert re.compile(searchstring).search(html) is not None
 
+
 def test_html5():
     """docstring for testHTML5"""
 
@@ -221,6 +237,7 @@ def test_html5():
     expect = textile.textile(test, html_type="html5")
     assert result == expect
 
+
 def test_relURL():
     t = textile.Textile()
     t.restricted = True
diff --git a/tests/test_textilefactory.py b/tests/test_textilefactory.py
index 846b9275..e9fc027f 100644
--- a/tests/test_textilefactory.py
+++ b/tests/test_textilefactory.py
@@ -1,6 +1,7 @@
 from textile import textilefactory
 import pytest
 
+
 def test_TextileFactory():
     f = textilefactory.TextileFactory()
     result = f.process("some text here")
diff --git a/tests/test_urls.py b/tests/test_urls.py
index 7a9798eb..1cd09f92 100644
--- a/tests/test_urls.py
+++ b/tests/test_urls.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 from textile import Textile
-import re
+
 
 def test_urls():
     t = Textile()
@@ -54,12 +54,14 @@ def test_urls():
     expect = '\t<p>A link that <a href="/test/">contains a\nnewline</a> raises an exception.</p>'
     assert result == expect
 
+
 def test_rel_attribute():
     t = Textile(rel='nofollow')
     result = t.parse('"$":http://domain.tld')
     expect = '\t<p><a href="http://domain.tld" rel="nofollow">domain.tld</a></p>'
     assert result == expect
 
+
 def test_quotes_in_link_text():
     """quotes in link text are tricky."""
     test = '""this is a quote in link text"":url'
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 7f386a9b..952c7b4c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,23 +1,25 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 from textile import utils
 
+
 def test_encode_html():
     result = utils.encode_html('''this is a "test" of text that's safe to '''
-            'put in an <html> attribute.')
+                               'put in an <html> attribute.')
     expect = ('this is a &quot;test&quot; of text that&#39;s safe to put in '
-            'an &lt;html&gt; attribute.')
+              'an &lt;html&gt; attribute.')
     assert result == expect
 
+
 def test_has_raw_text():
     assert utils.has_raw_text('<p>foo bar biz baz</p>') is False
     assert utils.has_raw_text(' why yes, yes it does') is True
 
+
 def test_is_rel_url():
     assert utils.is_rel_url("http://www.google.com/") is False
     assert utils.is_rel_url("/foo") is True
 
+
 def test_generate_tag():
     result = utils.generate_tag('span', 'inner text', {'class': 'test'})
     expect = '<span class="test">inner text</span>'
@@ -28,3 +30,8 @@ def test_generate_tag():
     expect = '<a href="http://de.wikipedia.org/wiki/%C3%C9bermensch">Übermensch</a>'
     result = utils.generate_tag('a', text, attributes)
     assert result == expect
+
+
+def test_human_readable_url_edge_case():
+    assert utils.human_readable_url('google.com') == 'google.com'
+    assert utils.human_readable_url('tel:1-800-555-1212') == '1-800-555-1212'
diff --git a/tests/test_values.py b/tests/test_values.py
index 063ed3e9..7c19e116 100644
--- a/tests/test_values.py
+++ b/tests/test_values.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
 import textile
 import pytest
 
@@ -14,7 +13,7 @@
 
     ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
 
-    ('"Observe!"', '\t<p>&#8220;Observe!&#8221; </p>'),
+    ('"Observe!"', '\t<p>&#8220;Observe!&#8221;</p>'),
 
     ('Observe -- very nice!', '\t<p>Observe &#8212; very nice!</p>'),
 
@@ -35,7 +34,7 @@
     ('h3. Header 3', '\t<h3>Header 3</h3>'),
 
     ('An old text\n\nbq. A block quotation.\n\nAny old text''',
-    '\t<p>An old text</p>\n\n\t<blockquote>\n\t\t<p>A block quotation.</p>\n\t</blockquote>\n\n\t<p>Any old text</p>'),
+     '\t<p>An old text</p>\n\n\t<blockquote>\n\t\t<p>A block quotation.</p>\n\t</blockquote>\n\n\t<p>Any old text</p>'),
 
     ('I _believe_ every word.', '\t<p>I <em>believe</em> every word.</p>'),
 
@@ -70,8 +69,8 @@
     ('p[fr]. rouge', '\t<p lang="fr">rouge</p>'),
 
     ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
-    '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
-    ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
+     '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
+     ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
 
     ('p<. align left', '\t<p style="text-align:left;">align left</p>'),
 
@@ -219,14 +218,14 @@
      '\t<p style="font-size:0.8em;"><strong>TxStyle</strong> is a documentation project of Textile 2.4 for <a href="http://texpattern.com">Textpattern <span class="caps">CMS</span></a>.</p>'),
     (""""Übermensch":http://de.wikipedia.org/wiki/Übermensch""", """\t<p><a href="http://de.wikipedia.org/wiki/%C3%9Cbermensch">Übermensch</a></p>"""),
     ("""Here is some text with a <!-- Commented out[1] --> block.\n\n<!-- Here is a single <span>line</span> comment block -->\n\n<!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n-->\n\nbc. <!-- Here is a comment block in a code block. -->""",
-     """\t<p>Here is some text with a <!-- Commented out[1] --> block.</p>\n\n\t<p><!-- Here is a single <span>line</span> comment block --></p>\n\n\t<p><!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n--></p>\n\n<pre><code>&lt;!-- Here is a comment block in a code block. --&gt;</code></pre>"""),
+     """\t<p>Here is some text with a <!-- Commented out[1] --> block.</p>\n\n<!-- Here is a single <span>line</span> comment block -->\n\n<!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n-->\n\n<pre><code>&lt;!-- Here is a comment block in a code block. --&gt;</code></pre>"""),
     (""""Textile(c)" is a registered(r) 'trademark' of Textpattern(tm) -- or TXP(That's textpattern!) -- at least it was - back in '88 when 2x4 was (+/-)5(o)C ... QED!\n\np{font-size: 200%;}. 2(1/4) 3(1/2) 4(3/4)""",
      """\t<p>&#8220;Textile&#169;&#8221; is a registered&#174; &#8216;trademark&#8217; of Textpattern&#8482; &#8212; or <acronym title="That&#8217;s textpattern!"><span class="caps">TXP</span></acronym> &#8212; at least it was &#8211; back in &#8217;88 when 2&#215;4 was &#177;5&#176;C &#8230; <span class="caps">QED</span>!</p>\n\n\t<p style="font-size: 200%;">2&#188; 3&#189; 4&#190;</p>"""),
     ("""|=. Testing colgroup and col syntax\n|:\\5. 80\n|a|b|c|d|e|\n\n|=. Testing colgroup and col syntax|\n|:\\5. 80|\n|a|b|c|d|e|""", """\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>"""),
     ("""table(#dvds){border-collapse:collapse}. Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups\n|={font-size:140%;margin-bottom:15px}. DVDs with two Textiled tbody elements\n|:\\3. 100 |{background:#ddd}|250||50|300|\n|^(header).\n|_. Title |_. Starring |_. Director |_. Writer |_. Notes |\n|~(footer).\n|\\5=. This is the tfoot, centred |\n|-(toplist){background:#c5f7f6}.\n| _The Usual Suspects_ | Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey | Bryan Singer | Chris McQaurrie | One of the finest films ever made |\n| _Se7en_ | Morgan Freeman, Brad Pitt, Kevin Spacey | David Fincher | Andrew Kevin Walker | Great psychological thriller |\n| _Primer_ | David Sullivan, Shane Carruth | Shane Carruth | Shane Carruth | Amazing insight into trust and human psychology <br />rather than science fiction. Terrific! |\n| _District 9_ | Sharlto Copley, Jason Cope | Neill Blomkamp | Neill Blomkamp, Terri Tatchell | Social commentary layered on thick,\nbut boy is it done well |\n|-(medlist){background:#e7e895;}.\n| _Arlington Road_ | Tim Robbins, Jeff Bridges | Mark Pellington | Ehren Kruger | Awesome study in neighbourly relations |\n| _Phone Booth_ | Colin Farrell, Kiefer Sutherland, Forest Whitaker | Joel Schumacher | Larry Cohen | Edge-of-the-seat stuff in this\nshort but brilliantly executed thriller |""",
      """\t<table id="dvds" style="border-collapse:collapse;" summary="Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups">\n\t<caption style="font-size:140%; margin-bottom:15px;"><span class="caps">DVD</span>s with two Textiled tbody elements</caption>\n\t<colgroup span="3" width="100">\n\t<col style="background:#ddd;" />\n\t<col width="250" />\n\t<col />\n\t<col width="50" />\n\t<col width="300" />\n\t</colgroup>\n\t<thead class="header">\n\t\t<tr>\n\t\t\t<th>Title </th>\n\t\t\t<th>Starring </th>\n\t\t\t<th>Director </th>\n\t\t\t<th>Writer </th>\n\t\t\t<th>Notes </th>\n\t\t</tr>\n\t</thead>\n\t<tfoot class="footer">\n\t\t<tr>\n\t\t\t<td colspan="5" style="text-align:center;">This is the tfoot, centred </td>\n\t\t</tr>\n\t</tfoot>\n\t<tbody class="toplist" style="background:#c5f7f6;">\n\t\t<tr>\n\t\t\t<td> <em>The Usual Suspects</em> </td>\n\t\t\t<td> Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey </td>\n\t\t\t<td> Bryan Singer </td>\n\t\t\t<td> Chris McQaurrie </td>\n\t\t\t<td> One of the finest films ever made </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Se7en</em> </td>\n\t\t\t<td> Morgan Freeman, Brad Pitt, Kevin Spacey </td>\n\t\t\t<td> David Fincher </td>\n\t\t\t<td> Andrew Kevin Walker </td>\n\t\t\t<td> Great psychological thriller </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Primer</em> </td>\n\t\t\t<td> David Sullivan, Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Amazing insight into trust and human psychology <br />\nrather than science fiction. Terrific! </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>District 9</em> </td>\n\t\t\t<td> Sharlto Copley, Jason Cope </td>\n\t\t\t<td> Neill Blomkamp </td>\n\t\t\t<td> Neill Blomkamp, Terri Tatchell </td>\n\t\t\t<td> Social commentary layered on thick,<br />\nbut boy is it done well </td>\n\t\t</tr>\n\t</tbody>\n\t<tbody class="medlist" style="background:#e7e895;">\n\t\t<tr>\n\t\t\t<td> <em>Arlington Road</em> </td>\n\t\t\t<td> Tim Robbins, Jeff Bridges </td>\n\t\t\t<td> Mark Pellington </td>\n\t\t\t<td> Ehren Kruger </td>\n\t\t\t<td> Awesome study in neighbourly relations </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Phone Booth</em> </td>\n\t\t\t<td> Colin Farrell, Kiefer Sutherland, Forest Whitaker </td>\n\t\t\t<td> Joel Schumacher </td>\n\t\t\t<td> Larry Cohen </td>\n\t\t\t<td> Edge-of-the-seat stuff in this<br />\nshort but brilliantly executed thriller </td>\n\t\t</tr>\n\t</tbody>\n\t</table>"""),
     ("""-(hot) *coffee* := Hot _and_ black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk := Nourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n\n-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:""",
-    """<dl>\n\t<dt class="hot"><strong>coffee</strong></dt>\n\t<dd>Hot <em>and</em> black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</dd>\n</dl>\n\n<dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl>"""),
+     """<dl>\n\t<dt class="hot"><strong>coffee</strong></dt>\n\t<dd>Hot <em>and</em> black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</dd>\n</dl>\n\n<dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl>"""),
     (""";(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3""",
      """\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3</dd>\n\t</dl>"""),
     ("""*Here is a comment*\n\nHere is *(class)a comment*\n\n*(class)Here is a class* that is a little extended and is\n*followed* by a strong word!\n\nbc. ; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n\n*123 test*\n\n*test 123*\n\n**123 test**\n\n**test 123**""",
@@ -236,7 +235,7 @@
     ("""# one\n##3 one.three\n## one.four\n## one.five\n# two\n\ntest\n\n#_(continuation#section2).\n# three\n# four\n##_ four.six\n## four.seven\n# five\n\ntest\n\n#21 twenty-one\n# twenty-two""",
      """\t<ol>\n\t\t<li>one\n\t\t<ol start="3">\n\t\t\t<li>one.three</li>\n\t\t\t<li>one.four</li>\n\t\t\t<li>one.five</li>\n\t\t</ol></li>\n\t\t<li>two</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="continuation" id="section2" start="3">\n\t\t<li>three</li>\n\t\t<li>four\n\t\t<ol start="6">\n\t\t\t<li>four.six</li>\n\t\t\t<li>four.seven</li>\n\t\t</ol></li>\n\t\t<li>five</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="21">\n\t\t<li>twenty-one</li>\n\t\t<li>twenty-two</li>\n\t</ol>"""),
     ("""|* Foo[^2^]\n* _bar_\n* ~baz~ |\n|#4 *Four*\n# __Five__ |\n|-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n|""",
-     """\t<table>\n\t\t<tr>\n\t\t\t<td>\t<ul>\n\t\t<li>Foo<sup>2</sup></li>\n\t\t<li><em>bar</em></li>\n\t\t<li><sub>baz</sub></li>\n\t</ul></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>\t<ol start="4">\n\t\t<li><strong>Four</strong></li>\n\t\t<li><i>Five</i></li>\n\t</ol></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td><dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd><br />\n</dl></td>\n\t\t</tr>\n\t</table>"""),
+     """\t<table>\n\t\t<tr>\n\t\t\t<td>\t<ul>\n\t\t<li>Foo<sup>2</sup></li>\n\t\t<li><em>bar</em></li>\n\t\t<li><sub>baz</sub></li>\n\t</ul></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>\t<ol start="4">\n\t\t<li><strong>Four</strong></li>\n\t\t<li><i>Five</i></li>\n\t</ol></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td><dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl></td>\n\t\t</tr>\n\t</table>"""),
     ("""h4. A more complicated table\n\ntable(tableclass#tableid){color:blue}.\n|_. table |_. more |_. badass |\n|\\3. Horizontal span of 3|\n(firstrow). |first|HAL(open the pod bay doors)|1|\n|some|{color:green}. styled|content|\n|/2. spans 2 rows|this is|quite a|\n| deep test | don't you think?|\n(lastrow). |fifth|I'm a lumberjack|5|\n|sixth| _*bold italics*_ |6|""",
      """\t<h4>A more complicated table</h4>\n\n\t<table class="tableclass" id="tableid" style="color:blue;">\n\t\t<tr>\n\t\t\t<th>table </th>\n\t\t\t<th>more </th>\n\t\t\t<th>badass </th>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td colspan="3">Horizontal span of 3</td>\n\t\t</tr>\n\t\t<tr class="firstrow">\n\t\t\t<td>first</td>\n\t\t\t<td><acronym title="open the pod bay doors"><span class="caps">HAL</span></acronym></td>\n\t\t\t<td>1</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>some</td>\n\t\t\t<td style="color:green;">styled</td>\n\t\t\t<td>content</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td rowspan="2">spans 2 rows</td>\n\t\t\t<td>this is</td>\n\t\t\t<td>quite a</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> deep test </td>\n\t\t\t<td> don&#8217;t you think?</td>\n\t\t</tr>\n\t\t<tr class="lastrow">\n\t\t\t<td>fifth</td>\n\t\t\t<td>I&#8217;m a lumberjack</td>\n\t\t\t<td>5</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>sixth</td>\n\t\t\t<td> <em><strong>bold italics</strong></em> </td>\n\t\t\t<td>6</td>\n\t\t</tr>\n\t</table>"""),
     ("""| *strong* |\n\n| _em_ |\n\n| Inter-word -dashes- | ZIP-codes are 5- or 9-digit codes |""",
@@ -246,7 +245,7 @@
     ("""h2. A definition list\n\n;(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3\n;; Center\n;; NATO(Why Em Cee Ayy)\n:: Subdef 1\n:: Subdef 2\n;;; SubSub Term\n::: SubSub Def 1\n::: SubSub Def 2\n::: Subsub Def 3\nWith newline\n::: Subsub Def 4\n:: Subdef 3\n: DEF 4\n; Term 2\n: Another def\n: And another\n: One more\n:: A def without a term\n:: More defness\n; Third term for good measure\n: My definition of a boombastic jazz""",
      """\t<h2>A definition list</h2>\n\n\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3\n\t\t<dl>\n\t\t\t<dt>Center</dt>\n\t\t\t<dt><acronym title="Why Em Cee Ayy"><span class="caps">NATO</span></acronym></dt>\n\t\t\t<dd>Subdef 1</dd>\n\t\t\t<dd>Subdef 2\n\t\t\t<dl>\n\t\t\t\t<dt>SubSub Term</dt>\n\t\t\t\t<dd>SubSub Def 1</dd>\n\t\t\t\t<dd>SubSub Def 2</dd>\n\t\t\t\t<dd>Subsub Def 3<br />\nWith newline</dd>\n\t\t\t\t<dd>Subsub Def 4</dd>\n\t\t\t</dl></dd>\n\t\t\t<dd>Subdef 3</dd>\n\t\t</dl></dd>\n\t\t<dd><span class="caps">DEF</span> 4</dd>\n\t\t<dt>Term 2</dt>\n\t\t<dd>Another def</dd>\n\t\t<dd>And another</dd>\n\t\t<dd>One more\n\t\t<dl>\n\t\t\t<dd>A def without a term</dd>\n\t\t\t<dd>More defness</dd>\n\t\t</dl></dd>\n\t\t<dt>Third term for good measure</dt>\n\t\t<dd>My definition of a boombastic jazz</dd>\n\t</dl>"""),
     ("""###. Here's a comment.\n\nh3. Hello\n\n###. And\nanother\none.\n\nGoodbye.""", """\t<h3>Hello</h3>\n\n\t<p>Goodbye.</p>"""),
-    ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t<h2>A Definition list which covers the instance where a new definition list is created with a term without a definition</h2>\n\n<dl>\n\t<dt>term2</dt>\n\t<dd>def</dd>\n</dl>"""),
+    ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t<h2>A Definition list which covers the instance where a new definition list is created with a term without a definition</h2>\n\n<dl>\n\t<dt>term</dt>\n\t<dt>term2</dt>\n\t<dd>def</dd>\n</dl>"""),
     ('!{height:20px;width:20px;}https://1.gravatar.com/avatar/!',
      '\t<p><img alt="" src="https://1.gravatar.com/avatar/" style="height:20px; width:20px;" /></p>'),
     ('& test', '\t<p>&amp; test</p>'),
@@ -254,12 +253,20 @@
 
 # A few extra cases for HTML4
 html_known_values = (
+    ("pre.. The beginning\n\nbc.. This code\n\nis the last\n\nblock in the document\n",
+     "<pre>The beginning</pre>\n\n<pre><code>This code\n\nis the last\n\nblock in the document</code></pre>"),
+    ("bc.. This code\n\nis not\n\nsurrounded by anything\n",
+     "<pre><code>This code\n\nis not\n\nsurrounded by anything</code></pre>"),
+    ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\np.. post-code paragraph",
+     "<pre><code>Paragraph 1\n\nParagraph 2\n\nParagraph 3</code></pre>\n\n<p>post-code paragraph</p>"),
+    ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\npre.. post-code non-p block",
+     "<pre><code>Paragraph 1\n\nParagraph 2\n\nParagraph 3</code></pre>\n\n<pre>post-code non-p block</pre>"),
     ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
     ('I __know__.\nI **really** __know__.', '\t<p>I <i>know</i>.<br />\nI <b>really</b> <i>know</i>.</p>'),
     ("I'm %{color:red}unaware%\nof most soft drinks.", '\t<p>I&#8217;m <span style="color:red;">unaware</span><br />\nof most soft drinks.</p>'),
     ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
-    '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
-    ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
+     '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
+     ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
     ('<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>',
      '<pre>\n<code>\na.gsub!( /&lt;/, "" )\n</code>\n</pre>'),
     ('<div style="float:right;">\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n</div>\n\n'
@@ -307,14 +314,211 @@
     # cite attribute
     ('bq.:http://textism.com/ Text...', '\t<blockquote cite="http://textism.com/">\n\t\t<p>Text&#8230;</p>\n\t</blockquote>'),
     ('Hello ["(Mum) & dad"]', '\t<p>Hello [&#8220;(Mum) &amp; dad&#8221;]</p>'),
+    # Dimensions
+    (
+        ('[1/2] x [1/4] and (1/2)" x [1/4]" and (1/2)\' x (1/4)\'\n\n'
+         '(2 x 10) X (3 / 4) x (200 + 64)\n\n'
+         '1 x 1 = 1\n\n'
+         '1 x1 = 1\n\n'
+         '1x 1 = 1\n\n'
+         '1x1 = 1\n\n'
+         '1 X 1 = 1\n\n'
+         '1 X1 = 1\n\n'
+         '1X 1 = 1\n\n'
+         '1X1 = 1\n\n'
+         'What is 1 x 1?\n\n'
+         'What is 1x1?\n\n'
+         'What is 1 X 1?\n\n'
+         'What is 1X1?\n\n'
+         '1 x 2 x 3 = 6\n\n'
+         '1x2x3=6\n\n'
+         '1x2 x 1x3 = 6\n\n'
+         '2\' x 2\' = 4 sqft.\n\n'
+         '2\'x 2\' = 4 sqft.\n\n'
+         '2\' x2\' = 4 sqft.\n\n'
+         '2\'x2\' = 4 sqft.\n\n'
+         '2\' X 2\' = 4 sqft.\n\n'
+         '2\'X 2\' = 4 sqft.\n\n'
+         '2\' X2\' = 4 sqft.\n\n'
+         '2\'X2\' = 4 sqft.\n\n'
+         '2" x 2" = 4 sqin.\n\n'
+         '2"x 2" = 4 sqin.\n\n'
+         '2" x2" = 4 sqin.\n\n'
+         '2"x2" = 4 sqin.\n\n'
+         '2" X 2" = 4 sqin.\n\n'
+         '2"X 2" = 4 sqin.\n\n'
+         '2" X2" = 4 sqin.\n\n'
+         '2"X2" = 4in[^2^].\n\n'
+         'What is 1.2 x 3.5?\n\n'
+         'What is .2 x .5?\n\n'
+         'What is 1.2x3.5?\n\n'
+         'What is .2x.5?\n\n'
+         'What is 1.2\' x3.5\'?\n\n'
+         'What is .2"x .5"?\n\n'
+         '1 x $10.00 x -£ 1.23 x ¥20,000 x -¤120.00 x ฿1,000,000 x -€110,00\n\n'),
+
+        ('\t<p>&#189; &#215; &#188; and &#189;&#8221; &#215; &#188;&#8221; and &#189;&#8217; &#215; &#188;&#8217;</p>\n\n'
+         '\t<p>(2 &#215; 10) &#215; (3 / 4) &#215; (200 + 64)</p>\n\n'
+         '\t<p>1 &#215; 1 = 1</p>\n\n'
+         '\t<p>1 &#215;1 = 1</p>\n\n'
+         '\t<p>1&#215; 1 = 1</p>\n\n'
+         '\t<p>1&#215;1 = 1</p>\n\n'
+         '\t<p>1 &#215; 1 = 1</p>\n\n'
+         '\t<p>1 &#215;1 = 1</p>\n\n'
+         '\t<p>1&#215; 1 = 1</p>\n\n'
+         '\t<p>1&#215;1 = 1</p>\n\n'
+         '\t<p>What is 1 &#215; 1?</p>\n\n'
+         '\t<p>What is 1&#215;1?</p>\n\n'
+         '\t<p>What is 1 &#215; 1?</p>\n\n'
+         '\t<p>What is 1&#215;1?</p>\n\n'
+         '\t<p>1 &#215; 2 &#215; 3 = 6</p>\n\n'
+         '\t<p>1&#215;2&#215;3=6</p>\n\n'
+         '\t<p>1&#215;2 &#215; 1&#215;3 = 6</p>\n\n'
+         '\t<p>2&#8217; &#215; 2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217;&#215; 2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217; &#215;2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217;&#215;2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217; &#215; 2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217;&#215; 2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217; &#215;2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8217;&#215;2&#8217; = 4 sqft.</p>\n\n'
+         '\t<p>2&#8221; &#215; 2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221;&#215; 2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221; &#215;2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221;&#215;2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221; &#215; 2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221;&#215; 2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221; &#215;2&#8221; = 4 sqin.</p>\n\n'
+         '\t<p>2&#8221;&#215;2&#8221; = 4in<sup>2</sup>.</p>\n\n'
+         '\t<p>What is 1.2 &#215; 3.5?</p>\n\n'
+         '\t<p>What is .2 &#215; .5?</p>\n\n'
+         '\t<p>What is 1.2&#215;3.5?</p>\n\n'
+         '\t<p>What is .2&#215;.5?</p>\n\n'
+         '\t<p>What is 1.2&#8217; &#215;3.5&#8217;?</p>\n\n'
+         '\t<p>What is .2&#8221;&#215; .5&#8221;?</p>\n\n'
+         '\t<p>1 &#215; $10.00 &#215; -£ 1.23 &#215; ¥20,000 &#215; -¤120.00 &#215; ฿1,000,000 &#215; -€110,00</p>')
+    ),
+    # Empty note lists
+    ('There should be nothing below.\n\nnotelist.', '\t<p>There should be nothing below.</p>\n\n\t'),
+    #  Empty things
+    (('\'\'\n\n""\n\n%%\n\n^^\n\n&&\n\n**\n\n__\n\n--\n\n++\n\n~~\n\n{}\n\n'
+      '[]\n\n()\n\n<>\n\n\\\\\n\n//\n\n??\n\n==\n\n@@\n\n##\n\n$$\n\n!!\n\n'
+      '::\n\n;;\n\n..\n\n,,\n\n||\n\n` `\n\n\' \'\n\n" "\n\n% %\n\n^ ^\n\n'
+      '& &\n\n* *\n\n_ _\n\n- -\n\n+ +\n\n~ ~\n\n{ }\n\n[ ]\n\n( )\n\n< >\n\n'
+      '\\ \\\n\n/ /\n\n? ?\n\n= =\n\n@ @\n\n# #\n\n$ $\n\n! !\n\n: :\n\n; ;\n\n'
+      '. .\n\n, ,'),
+     ("\t<p>&#8216;&#8217;</p>\n\n\t<p>&#8220;&#8221;</p>\n\n\t<p>%%</p>\n\n\t<p>^^</p>\n\n\t"
+      "<p>&amp;&amp;</p>\n\n\t<p>**</p>\n\n\t<p>__</p>\n\n\t<p>&#8212;</p>\n\n\t<p>++</p>\n\n\t"
+      "<p>~~</p>\n\n\t<p>{}</p>\n\n\t<p>[]</p>\n\n\t<p>()</p>\n\n\t<p>&lt;&gt;</p>\n\n\t<p>\\\\</p>\n\n\t"
+      "<p>//</p>\n\n\t<p>??</p>\n\n\t<p>==</p>\n\n\t<p><code></code></p>\n\n\t<p>##</p>\n\n\t<p>$$</p>\n\n\t"
+      "<p>!!</p>\n\n\t<p>::</p>\n\n\t<p>;;</p>\n\n\t<p>..</p>\n\n\t<p>,,</p>\n\n\t"
+      "<table>\n\t\t<tr>\n\t\t\t<td></td>\n\t\t</tr>\n\t</table>\n\n\t<p>` `</p>\n\n\t<p>&#8216; &#8216;</p>\n\n\t"
+      "<p>&#8220; &#8220;</p>\n\n\t<p>% %</p>\n\n\t<p>^ ^</p>\n\n\t<p>&amp; &amp;</p>\n\n\t"
+      "<ul>\n\t\t<li>*</li>\n\t</ul>\n\n\t<p>_ _</p>\n\n\t<p>- -</p>\n\n\t<p>+ +</p>\n\n\t<p>~ ~</p>\n\n\t"
+      "<p>{ }</p>\n\n\t<p>[ ]</p>\n\n\t<p>( )</p>\n\n\t<p>&lt; &gt;</p>\n\n\t<p>\\ \\</p>\n\n\t"
+      "<p>/ /</p>\n\n\t<p>? ?</p>\n\n\t<p>= =</p>\n\n\t<p><code> </code></p>\n\n\t<ol>\n\t\t<li>#</li>\n\t</ol>\n\n\t"
+      "<p>$ $</p>\n\n\t<p>! !</p>\n\n\t<dl>\n\t\t<dd>:</dd>\n\t</dl>\n\n\t<dl>\n\t\t<dt>;</dt>\n\t</dl>\n\n\t"
+      "<p>. .</p>\n\n\t<p>, ,</p>")),
+    # A lone standing comment must be preserved as is:
+    # withouth wrapping it into a paragraph
+    (('An ordinary block.\n\n'
+      '<!-- A comment block -->\n'),
+     '\t<p>An ordinary block.</p>\n\n<!-- A comment block -->'),
+    # Headers must be "breakable", just like paragraphs.
+    ('h1. Two line with *strong*\nheading\n',
+     '\t<h1>Two line with <strong>strong</strong><br />\nheading</h1>'),
+    # Non-standalone ampersands should not be escaped
+    (("&#8220;<span lang=\"en\">test</span>&#8221;\n\n"
+      "&#x201c;<span lang=\"en\">test</span>&#x201d;\n\n"
+      "&nbsp;<span lang=\"en\">test</span>&nbsp;\n"),
+     ("\t<p>&#8220;<span lang=\"en\">test</span>&#8221;</p>\n\n"
+      "\t<p>&#x201c;<span lang=\"en\">test</span>&#x201d;</p>\n\n"
+      "\t<p>&nbsp;<span lang=\"en\">test</span>&nbsp;</p>")),
+    # Nested and mixed multi-level ordered and unordered lists
+    (("* bullet\n"
+      "*# number\n"
+      "*# number\n"
+      "*#* bullet\n"
+      "*# number\n"
+      "*# number with\n"
+      "a break\n"
+      "* bullet\n"
+      "** okay"),
+     ("\t<ul>\n"
+      "\t\t<li>bullet\n"
+      "\t\t<ol>\n"
+      "\t\t\t<li>number</li>\n"
+      "\t\t\t<li>number\n"
+      "\t\t\t<ul>\n"
+      "\t\t\t\t<li>bullet</li>\n"
+      "\t\t\t</ul></li>\n"
+      "\t\t\t<li>number</li>\n"
+      "\t\t\t<li>number with<br />\n"
+      "a break</li>\n"
+      "\t\t</ol></li>\n"
+      "\t\t<li>bullet\n"
+      "\t\t<ul>\n"
+      "\t\t\t<li>okay</li>\n"
+      "\t\t</ul></li>\n"
+      "\t\t</ul>")),
+    # Checks proper insertion of <br /> within table cells
+    (("|-(cold) milk :=\n"
+      "Nourishing beverage for baby cows. =:\n"
+      "|"),
+     ("\t<table>\n"
+      "\t\t<tr>\n"
+      "\t\t\t<td><dl>\n"
+      "\t<dt class=\"cold\">milk</dt>\n"
+      "\t<dd><p>Nourishing beverage for baby cows.</p></dd>\n"
+      "</dl></td>\n"
+      "\t\t</tr>\n\t</table>")),
+    # Long non-textile blocks
+    ("notextile.. *a very*\n\n*long*\n\n*block*\n", "*a very*\n\n*long*\n\n*block*"),
+    # Correct use of &lsquo; and &rsquo;
+    ("Here is a %(example)'spanned'% word.",
+     '\t<p>Here is a <span class="example">&#8216;spanned&#8217;</span> word.</p>'),
+    # Using $-links with link aliases
+    ("\"$\":test\n[test]https://textpattern.com/start\n",
+     "\t<p><a href=\"https://textpattern.com/start\">textpattern.com/start</a></p>"),
+    ('Please check on "$":test for any updates.\n[test]https://de.wikipedia.org/wiki/Übermensch',
+     '\t<p>Please check on <a href="https://de.wikipedia.org/wiki/Übermensch">de.wikipedia.org/wiki/Übermensch</a> for any updates.</p>'),
+    # Make sure smileys don't get recognised as a definition list.
+    (":(\n\n:)\n\n:( \n:( \n:( \n:) \n\nPinocchio!\n:^)\n\nBaboon!\n:=)\n\nWink!\n;)\n\n:[ \n:]\n\n;(\nsomething\ndark side\n:) \n\n;(c)[de] Item",
+     '\t<p>:(</p>\n\n\t<p>:)</p>\n\n\t<p>:( <br />\n:( <br />\n:( <br />\n:) </p>\n\n\t<p>Pinocchio!<br />\n:^)</p>\n\n\t<p>Baboon!<br />\n:=)</p>\n\n\t<p>Wink!<br />\n;)</p>\n\n\t<p>:[ <br />\n:]</p>\n\n\t<p>;(<br />\nsomething<br />\ndark side<br />\n:) </p>\n\n\t<dl class="c" lang="de">\n\t\t<dt>Item</dt>\n\t</dl>'),
+    # Checking proper parsing of classes and IDs
+    ("_(class1 class2#id1)text1_ -(foobarbaz#boom bang)text2-\n",
+     '\t<p><em class="class1 class2" id="id1">text1</em> <del class="foobarbaz">text2</del></p>'),
+    # Tables with nested textile elements
+    ("|!http://tester.local/logo.png!| !http://tester.local/logo.png! |",
+     '\t<table>\n\t\t<tr>\n\t\t\t<td><img alt="" src="http://tester.local/logo.png" /></td>\n\t\t\t<td> <img alt="" src="http://tester.local/logo.png" /> </td>\n\t\t</tr>\n\t</table>'),
+    # Tables with colgroups
+    (("|=. Testing colgroup and col syntax | \n"
+      "|:\\5. 80 |\x20\n"
+      "|a|b|c|d|e|\x20\n"),
+     ('\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n'
+      '\t<colgroup span="5" width="80">\n\t</colgroup>\n'
+      '\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n'
+      '\t</table>')),
+    # Table column with an emphasis should not be confused with a heading
+    ('|_touch_ this!| _touch_ this! |',
+     '\t<table>\n\t\t<tr>\n\t\t\t<td><em>touch</em> this!</td>\n\t\t\t<td> <em>touch</em> this! </td>\n\t\t</tr>\n\t</table>'),
+    # Table with colgroup but no caption
+    (("|:\\5. 80 |\x20\n"
+      "|a|b|c|d|e|\x20\n"),
+     ('\t<table>\n'
+      '\t<colgroup span="5" width="80">\n\t</colgroup>\n'
+      '\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n'
+      '\t</table>')),
 )
 
+
 @pytest.mark.parametrize("input, expected_output", xhtml_known_values)
 def test_KnownValuesXHTML(input, expected_output):
     # XHTML
     output = textile.textile(input, html_type='xhtml')
     assert output == expected_output
 
+
 @pytest.mark.parametrize("input, expected_output", html_known_values)
 def test_KnownValuesHTML(input, expected_output):
     # HTML5
diff --git a/textile/__init__.py b/textile/__init__.py
index bb7829f7..16418739 100644
--- a/textile/__init__.py
+++ b/textile/__init__.py
@@ -1,9 +1,4 @@
-from __future__ import unicode_literals
-
-import sys
-import warnings
-
-from .core import textile, textile_restricted, Textile
+from .core import textile, textile_restricted, Textile  # noqa: F401
 from .version import VERSION
 
 __all__ = ['textile', 'textile_restricted']
diff --git a/textile/__main__.py b/textile/__main__.py
index 18459610..210c147d 100644
--- a/textile/__main__.py
+++ b/textile/__main__.py
@@ -33,5 +33,5 @@ def main():
         outfile.write(output)
 
 
-if __name__ == '__main__': #pragma: no cover
+if __name__ == '__main__':  # pragma: no cover
     main()
diff --git a/textile/core.py b/textile/core.py
index 7b66af02..4a2594f8 100644
--- a/textile/core.py
+++ b/textile/core.py
@@ -1,6 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 __copyright__ = """
 Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
 Copyright (c) 2010, Kurt Raschke <kurt@kurtraschke.com>
@@ -20,13 +18,14 @@
 import uuid
 from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
 from collections import OrderedDict
+from nh3 import clean
 
-from textile.tools import sanitizer, imagesize
 from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
-        regex_snippets, syms_re_s, table_span_re_s)
+                                   regex_snippets, syms_re_s, table_span_re_s)
 from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
-        has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines,
-        parse_attributes, pba)
+                           getimagesize, has_raw_text, human_readable_url,
+                           is_rel_url, is_valid_url, list_type,
+                           normalize_newlines, parse_attributes, pba)
 from textile.objects import Block, Table
 
 try:
@@ -35,43 +34,149 @@
     import re
 
 
+def make_glyph_replacers(html_type, uid, glyph_defs):
+    """
+    Generates a list of "replacers" (each is a pair consiting of
+    a regular expression and a replacing pattern) that,
+    when applied sequentially, replace some characters of the original
+    text with their HTML codes to produce valid HTML.
+    """
+    cur = (
+        r'(?:[{0}]{1}*)?'.format(regex_snippets['cur'], regex_snippets['space'])
+        if regex_snippets['cur']
+        else r'')
+    pre_result = [
+        # dimension sign (before apostrophes/quotes are replaced)
+        (re.compile(
+            r'([0-9]+[\])]?[\'"]? ?)[x]( ?[\[(]?)'
+            r'(?=[+-]?{0}[0-9]*\.?[0-9]+)'.format(cur),
+            flags=re.I | re.U),
+         r'\1{dimension}\2'),
+        # apostrophe's
+        (re.compile(
+            r"({0}|\))'({0})"
+            .format(regex_snippets['wrd']),
+            flags=re.U),
+         r'\1{apostrophe}\2'),
+        # back in '88
+        (re.compile(
+            r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')".format(
+                regex_snippets['space'], regex_snippets['wrd']),
+            flags=re.U),
+         r'\1{apostrophe}\2'),
+        # single opening following an open bracket.
+        (re.compile(r"([([{])'(?=\S)", flags=re.U),
+         r'\1{quote_single_open}'),
+        # single closing
+        (re.compile(
+            r"(\S)'(?={0}|{1}|<|$)".format(regex_snippets['space'], pnct_re_s),
+            flags=re.U),
+         r'\1{quote_single_close}'),
+        # single opening
+        (re.compile(r"'", re.U), r'{quote_single_open}'),
+        # double opening following an open bracket. Allows things like
+        # Hello ["(Mum) & dad"]
+        (re.compile(r'([([{])"(?=\S)', flags=re.U),
+         r'\1{quote_double_open}'),
+        # double closing
+        (re.compile(
+            r'(\S)"(?={0}|{1}|<|$)'.format(regex_snippets['space'], pnct_re_s),
+            flags=re.U),
+         r'\1{quote_double_close}'),
+        # double opening
+        (re.compile(r'"'), r'{quote_double_open}'),
+        # ellipsis
+        (re.compile(r'([^.]?)\.{3}'), r'\1{ellipsis}'),
+        # em dash
+        (re.compile(r'(\s?)--(\s?)'), r'\1{emdash}\2'),
+        # en dash
+        (re.compile(r' - '), r' {endash} '),
+        # trademark
+        (re.compile(
+            r'(\b ?|{0}|^)[([]TM[])]'.format(regex_snippets['space']),
+            flags=re.I | re.U),
+         r'\1{trademark}'),
+        # registered
+        (re.compile(
+            r'(\b ?|{0}|^)[([]R[])]'.format(regex_snippets['space']),
+            flags=re.I | re.U),
+            r'\1{registered}'),
+        # copyright
+        (re.compile(
+            r'(\b ?|{0}|^)[([]C[])]'.format(regex_snippets['space']),
+            flags=re.I | re.U),
+         r'\1{copyright}'),
+        # 1/2
+        (re.compile(r'[([]1\/2[])]'), r'{half}'),
+        # 1/4
+        (re.compile(r'[([]1\/4[])]'), r'{quarter}'),
+        # 3/4
+        (re.compile(r'[([]3\/4[])]'), r'{threequarters}'),
+        # degrees
+        (re.compile(r'[([]o[])]'), r'{degrees}'),
+        # plus/minus
+        (re.compile(r'[([]\+\/-[])]'), r'{plusminus}'),
+        # 3+ uppercase acronym
+        (re.compile(
+            r'\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])'
+            .format(regex_snippets['abr'], regex_snippets['acr']),
+            flags=re.U),
+         (r'<abbr title="\2">\1</abbr>' if html_type == 'html5'
+          else r'<acronym title="\2">\1</acronym>')),
+        # 3+ uppercase
+        (re.compile(
+            r'({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)'
+            '(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))'
+            .format(space=regex_snippets['space'],
+                    abr=regex_snippets['abr'],
+                    nab=regex_snippets['nab'],
+                    pnct=pnct_re_s),
+            re.U),
+         r'\1<span class="caps">{0}:glyph:\2</span>\3'.format(uid)),
+    ]
+    return [(regex_obj, replacement.format(**glyph_defs))
+            for (regex_obj, replacement) in pre_result]
+
+
 class Textile(object):
     restricted_url_schemes = ('http', 'https', 'ftp', 'mailto')
-    unrestricted_url_schemes = restricted_url_schemes + ('file', 'tel',
-            'callto', 'sftp', 'data')
+    unrestricted_url_schemes = restricted_url_schemes + (
+        'file', 'tel', 'callto', 'sftp', 'data')
 
     btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p', '###')
     btag_lite = ('bq', 'bc', 'p')
 
     note_index = 1
 
-    doctype_whitelist = ['xhtml', 'html5']
-
     glyph_definitions = {
-        'quote_single_open':  '&#8216;',
-        'quote_single_close': '&#8217;',
-        'quote_double_open':  '&#8220;',
-        'quote_double_close': '&#8221;',
-        'apostrophe':         '&#8217;',
-        'prime':              '&#8242;',
-        'prime_double':       '&#8243;',
-        'ellipsis':           '&#8230;',
-        'ampersand':          '&amp;',
-        'emdash':             '&#8212;',
-        'endash':             '&#8211;',
-        'dimension':          '&#215;',
-        'trademark':          '&#8482;',
-        'registered':         '&#174;',
-        'copyright':          '&#169;',
-        'half':               '&#189;',
-        'quarter':            '&#188;',
-        'threequarters':      '&#190;',
-        'degrees':            '&#176;',
-        'plusminus':          '&#177;',
+        'quote_single_open':  '&#8216;',  # noqa: E241
+        'quote_single_close': '&#8217;',  # noqa: E241
+        'quote_double_open':  '&#8220;',  # noqa: E241
+        'quote_double_close': '&#8221;',  # noqa: E241
+        'apostrophe':         '&#8217;',  # noqa: E241
+        'prime':              '&#8242;',  # noqa: E241
+        'prime_double':       '&#8243;',  # noqa: E241
+        'ellipsis':           '&#8230;',  # noqa: E241
+        'ampersand':          '&amp;',    # noqa: E241
+        'emdash':             '&#8212;',  # noqa: E241
+        'endash':             '&#8211;',  # noqa: E241
+        'dimension':          '&#215;',   # noqa: E241
+        'trademark':          '&#8482;',  # noqa: E241
+        'registered':         '&#174;',   # noqa: E241
+        'copyright':          '&#169;',   # noqa: E241
+        'half':               '&#189;',   # noqa: E241
+        'quarter':            '&#188;',   # noqa: E241
+        'threequarters':      '&#190;',   # noqa: E241
+        'degrees':            '&#176;',   # noqa: E241
+        'plusminus':          '&#177;',   # noqa: E241
     }
 
+    spanWrappers = (
+        ('[', ']'),
+    )
+
     def __init__(self, restricted=False, lite=False, noimage=False,
-            get_sizes=False, html_type='xhtml', rel='', block_tags=True):
+                 get_sizes=False, html_type='xhtml', rel='', block_tags=True):
         """Textile properties that are common to regular textile and
         textile_restricted"""
         self.restricted = restricted
@@ -93,119 +198,8 @@ def __init__(self, restricted=False, lite=False, noimage=False,
         self.refIndex = 0
         self.block_tags = block_tags
 
-        cur = r''
-        if regex_snippets['cur']: # pragma: no branch
-            cur = r'(?:[{0}]{1}*)?'.format(regex_snippets['cur'],
-                    regex_snippets['space'])
-
-        # We'll be searching for characters that need to be HTML-encoded to
-        # produce properly valid html.  These are the defaults that work in
-        # most cases.  Below, we'll copy this and modify the necessary pieces
-        # to make it work for characters at the beginning of the string.
-        self.glyph_search = [
-            # apostrophe's
-            re.compile(r"(^|{0}|\))'({0})".format(regex_snippets['wrd']),
-                flags=re.U),
-            # back in '88
-            re.compile(r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')".format(
-                regex_snippets['space'], regex_snippets['wrd']),
-                flags=re.U),
-            # single opening following an open bracket.
-            re.compile(r"([([{])'(?=\S)", flags=re.U),
-            # single closing
-            re.compile(r"(^|\S)'(?={0}|{1}|<|$)".format(
-                regex_snippets['space'], pnct_re_s), flags=re.U),
-            # single opening
-            re.compile(r"'", re.U),
-            # double opening following an open bracket. Allows things like
-            # Hello ["(Mum) & dad"]
-            re.compile(r'([([{])"(?=\S)', flags=re.U),
-            # double closing
-            re.compile(r'(^|\S)"(?={0}|{1}|<|$)'.format(
-                regex_snippets['space'], pnct_re_s), re.U),
-            # double opening
-            re.compile(r'"'),
-            # ellipsis
-            re.compile(r'([^.]?)\.{3}'),
-            # ampersand
-            re.compile(r'(\s?)&(\s)', re.U),
-            # em dash
-            re.compile(r'(\s?)--(\s?)'),
-            # en dash
-            re.compile(r' - '),
-            # dimension sign
-            re.compile(r'([0-9]+[\])]?[\'"]? ?)[x]( ?[\[(]?)'
-                r'(?=[+-]?{0}[0-9]*\.?[0-9]+)'.format(cur), flags=re.I | re.U),
-            # trademark
-            re.compile(r'(\b ?|{0}|^)[([]TM[])]'.format(regex_snippets['space']
-                ), flags=re.I | re.U),
-            # registered
-            re.compile(r'(\b ?|{0}|^)[([]R[])]'.format(regex_snippets['space']
-                ), flags=re.I | re.U),
-            # copyright
-            re.compile(r'(\b ?|{0}|^)[([]C[])]'.format(regex_snippets['space']
-                ), flags=re.I | re.U),
-            # 1/2
-            re.compile(r'[([]1\/2[])]'),
-            # 1/4
-            re.compile(r'[([]1\/4[])]'),
-            # 3/4
-            re.compile(r'[([]3\/4[])]'),
-            # degrees
-            re.compile(r'[([]o[])]'),
-            # plus/minus
-            re.compile(r'[([]\+\/-[])]'),
-            # 3+ uppercase acronym
-            re.compile(r'\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])'.format(
-                regex_snippets['abr'], regex_snippets['acr']), flags=re.U),
-            # 3+ uppercase
-            re.compile(r'({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)'
-                '(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))'.format(**{ 'space':
-                    regex_snippets['space'], 'abr': regex_snippets['abr'],
-                    'nab': regex_snippets['nab'], 'pnct': pnct_re_s}), re.U),
-        ]
-        # These are the changes that need to be made for characters that occur
-        # at the beginning of the string.
-        self.glyph_search_initial = list(self.glyph_search)
-        # apostrophe's
-        self.glyph_search_initial[0] = re.compile(r"({0}|\))'({0})".format(
-            regex_snippets['wrd']), flags=re.U)
-        # single closing
-        self.glyph_search_initial[3] = re.compile(r"(\S)'(?={0}|{1}|$)".format(
-                regex_snippets['space'], pnct_re_s), re.U)
-        # double closing
-        self.glyph_search_initial[6] = re.compile(r'(\S)"(?={0}|{1}|<|$)'.format(
-                regex_snippets['space'], pnct_re_s), re.U)
-
-        self.glyph_replace = [x.format(**self.glyph_definitions) for x in (
-            r'\1{apostrophe}\2',                  # apostrophe's
-            r'\1{apostrophe}\2',                  # back in '88
-            r'\1{quote_single_open}',             # single opening after bracket
-            r'\1{quote_single_close}',            # single closing
-            r'{quote_single_open}',               # single opening
-            r'\1{quote_double_open}',             # double opening after bracket
-            r'\1{quote_double_close}',            # double closing
-            r'{quote_double_open}',               # double opening
-            r'\1{ellipsis}',                      # ellipsis
-            r'\1{ampersand}\2',                   # ampersand
-            r'\1{emdash}\2',                      # em dash
-            r' {endash} ',                        # en dash
-            r'\1{dimension}\2',                   # dimension sign
-            r'\1{trademark}',                     # trademark
-            r'\1{registered}',                    # registered
-            r'\1{copyright}',                     # copyright
-            r'{half}',                            # 1/2
-            r'{quarter}',                         # 1/4
-            r'{threequarters}',                   # 3/4
-            r'{degrees}',                         # degrees
-            r'{plusminus}',                       # plus/minus
-            r'<acronym title="\2">\1</acronym>',  # 3+ uppercase acronym
-            r'\1<span class="caps">{0}:glyph:\2'  # 3+ uppercase
-              r'</span>\3'.format(self.uid),
-        )]
-
-        if self.html_type == 'html5':
-            self.glyph_replace[21] = r'<abbr title="\2">\1</abbr>'
+        self.glyph_replacers = make_glyph_replacers(
+            html_type, self.uid, self.glyph_definitions)
 
         if self.restricted is True:
             self.url_schemes = self.restricted_url_schemes
@@ -238,12 +232,12 @@ def parse(self, text, rel=None, sanitize=False):
 
         if self.block_tags:
             if self.lite:
-                self.blocktag_whitelist = ['bq', 'p']
+                self.blocktag_allowlist = set(['bq', 'p', 'br'])
                 text = self.block(text)
             else:
-                self.blocktag_whitelist = [ 'bq', 'p', 'bc', 'notextile',
-                        'pre', 'h[1-6]',
-                        'fn{0}+'.format(regex_snippets['digit']), '###']
+                self.blocktag_allowlist = set(['bq', 'p', 'br', 'bc', 'notextile',
+                                               'pre', 'h[1-6]',
+                                               f"fn{regex_snippets['digit']}+", '###'])
                 text = self.block(text)
                 text = self.placeNoteLists(text)
         else:
@@ -265,8 +259,9 @@ def parse(self, text, rel=None, sanitize=False):
         text = text.replace('{0}:glyph:'.format(self.uid), '')
 
         if sanitize:
-            text = sanitizer.sanitize(text)
+            text = clean(text, tags=self.blocktag_allowlist)
 
+        text = self.retrieveTags(text)
         text = self.retrieveURLs(text)
 
         # if the text contains a break tag (<br> or <br />) not followed by
@@ -280,9 +275,10 @@ def parse(self, text, rel=None, sanitize=False):
     def table(self, text):
         text = "{0}\n\n".format(text)
         pattern = re.compile(r'^(?:table(?P<tatts>_?{s}{a}{c})\.'
-                r'(?P<summary>.*?)\n)?^(?P<rows>{a}{c}\.? ?\|.*\|)'
-                r'[\s]*\n\n'.format(**{'s': table_span_re_s, 'a': align_re_s,
-                    'c': cls_re_s}), flags=re.S | re.M | re.U)
+                             r'(?P<summary>.*?)\n)?^(?P<rows>{a}{c}\.? ?\|.*\|)'
+                             r'[\s]*\n\n'.format(
+                                 **{'s': table_span_re_s, 'a': align_re_s,
+                                    'c': cls_re_s}), flags=re.S | re.M | re.U)
         match = pattern.search(text)
         if match:
             table = Table(self, **match.groupdict())
@@ -291,7 +287,7 @@ def table(self, text):
 
     def textileLists(self, text):
         pattern = re.compile(r'^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$'
-                r'(?![^#*;:])'.format(cls_re_s), re.U | re.M | re.S)
+                             r'(?![^#*;:])'.format(cls_re_s), re.U | re.M | re.S)
         return pattern.sub(self.fTextileList, text)
 
     def fTextileList(self, match):
@@ -306,7 +302,7 @@ def fTextileList(self, match):
                 nextline = ''
 
             m = re.search(r"^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>{0})[ .]"
-                    "(?P<content>.*)$".format(cls_re_s), line, re.S)
+                          "(?P<content>.*)$".format(cls_re_s), line, re.S)
             if m:
                 tl, start, atts, content = m.groups()
                 content = content.strip()
@@ -354,7 +350,7 @@ def fTextileList(self, match):
                         self.olstarts[tl] = 1
 
             nm = re.match(r"^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
-                    r"[ .].*".format(cls_re_s), nextline)
+                          r"[ .].*".format(cls_re_s), nextline)
             if nm:
                 nl = nm.group('nextlistitem')
 
@@ -374,7 +370,7 @@ def fTextileList(self, match):
             if tl not in ls:
                 ls[tl] = 1
                 itemtag = ("\n{0}\t<{1}>{2}".format(tabs, litem, content) if
-                            showitem else '')
+                           showitem else '')
                 line = "<{0}l{1}{2}>{3}".format(ltype, atts, start, itemtag)
             else:
                 line = ("\t<{0}{1}>{2}".format(litem, atts, content) if
@@ -387,18 +383,13 @@ def fTextileList(self, match):
             for k, v in reversed(list(ls.items())):
                 if len(k) > len(nl):
                     if v != 2:
-                        line = "{0}\n{1}</{2}l>".format(line, tabs,
-                                list_type(k))
+                        line = "{0}\n{1}</{2}l>".format(
+                            line, tabs, list_type(k))
                     if len(k) > 1 and v != 2:
                         line = "{0}</{1}>".format(line, litem)
                     del ls[k]
             # Remember the current Textile tag:
             pt = tl
-            # This else exists in the original php version.  I'm not sure how
-            # to come up with a case where the line would not match.  I think
-            # it may have been necessary due to the way php returns matches.
-            # else:
-                #line = "{0}\n".format(line)
             result.append(line)
         return self.doTagBr(litem, "\n".join(result))
 
@@ -407,14 +398,28 @@ def doTagBr(self, tag, input):
                           re.S).sub(self.doBr, input)
 
     def doPBr(self, in_):
-        return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr,
-                                                                 in_)
+        return (re
+                .compile(r'<(p|h[1-6])([^>]*?)>(.*)(</\1>)', re.S)
+                .sub(self.fPBr, in_))
+
+    def fPBr(self, m):
+        content = m.group(3)
+        content = (
+            re.compile(r"<br[ ]*/?>{0}*\n(?![{0}|])".format(regex_snippets['space']),
+                       re.I)
+            .sub("\n", content))
+        content = re.compile(r"\n(?![\s|])").sub('<br />', content)
+        return '<{0}{1}>{2}{3}'.format(m.group(1), m.group(2), content, m.group(4))
 
     def doBr(self, match):
-        content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*;:\s|])',
-                         r'\1<br />', match.group(3))
+        content = (
+            re.compile(
+                r'(.+)(?!(?<=</dd>|</dt>|</li>|<br/>)'
+                r'|(?<=<br>)|(?<=<br />))\n(?![#*;:\s|])',
+                re.I)
+            .sub(r'\1<br />', match.group(3)))
         return '<{0}{1}>{2}{3}'.format(match.group(1), match.group(2), content,
-                match.group(4))
+                                       match.group(4))
 
     def block(self, text):
         if not self.lite:
@@ -450,8 +455,8 @@ def block(self, text):
             eat_whitespace = False
 
             pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
-                    r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
-                        align_re_s, cls_re_s))
+                       r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(
+                           tre, align_re_s, cls_re_s))
             match = re.search(pattern, line, flags=re.S | re.U)
             # tag specified on this line.
             if match:
@@ -467,15 +472,17 @@ def block(self, text):
                         content = out[-2]
 
                     if not multiline_para:
-                        content = generate_tag(block.inner_tag, content,
-                                block.inner_atts)
-                        content = generate_tag(block.outer_tag, content,
-                            block.outer_atts)
+                        # block will have been defined in a previous run of the
+                        # loop
+                        content = generate_tag(block.inner_tag, content,  # noqa: F821
+                                               block.inner_atts)  # noqa: F821
+                        content = generate_tag(block.outer_tag, content,  # noqa: F821
+                                               block.outer_atts)  # noqa: F821
                     out[-2] = content
                 tag, atts, ext, cite, content = match.groups()
                 block = Block(self, **match.groupdict())
                 inner_block = generate_tag(block.inner_tag, block.content,
-                        block.inner_atts)
+                                           block.inner_atts)
                 # code tags and raw text won't be indented inside outer_tag.
                 if block.inner_tag != 'code' and not has_raw_text(inner_block):
                     inner_block = "\n\t\t{0}\n\t".format(inner_block)
@@ -483,7 +490,7 @@ def block(self, text):
                     line = block.content
                 else:
                     line = generate_tag(block.outer_tag, inner_block,
-                            block.outer_atts)
+                                        block.outer_atts)
                     # pre tags and raw text won't be indented.
                     if block.outer_tag != 'pre' and not has_raw_text(line):
                         line = "\t{0}".format(line)
@@ -515,10 +522,10 @@ def block(self, text):
                         line = block.content
                     else:
                         line = generate_tag(block.outer_tag, block.content,
-                                block.outer_atts)
+                                            block.outer_atts)
                         line = "\t{0}".format(line)
                 else:
-                    if block.tag == 'pre' or block.inner_tag == 'code':
+                    if block.tag in ('pre', 'notextile') or block.inner_tag == 'code':
                         line = self.shelve(encode_html(line, quotes=True))
                     else:
                         line = self.graf(line)
@@ -559,24 +566,24 @@ def block(self, text):
         # at this point, we've gone through all the lines. if there's still an
         # extension in effect, we close it here
         if ext and out and not block.tag == 'p':
-            block.content = out.pop()
-            block.process()
-            final = generate_tag(block.outer_tag, block.content,
-                                 block.outer_atts)
-            out.append(final)
+            content = out.pop()
+            content = generate_tag(block.inner_tag, content, block.inner_atts)
+            content = generate_tag(block.outer_tag, content, block.outer_atts)
+            out.append(content)
         return ''.join(out)
 
     def footnoteRef(self, text):
         # somehow php-textile gets away with not capturing the space.
         return re.compile(r'(?<=\S)\[(?P<id>{0}+)(?P<nolink>!?)\]'
-                r'(?P<space>{1}?)'.format(regex_snippets['digit'],
-                    regex_snippets['space']), re.U).sub(self.footnoteID, text)
+                          r'(?P<space>{1}?)'.format(
+                              regex_snippets['digit'], regex_snippets['space']),
+                          re.U).sub(self.footnoteID, text)
 
     def footnoteID(self, m):
         fn_att = OrderedDict({'class': 'footnote'})
         if m.group('id') not in self.fn:
-            self.fn[m.group('id')] = '{0}{1}'.format(self.linkPrefix,
-                    self._increment_link_index())
+            self.fn[m.group('id')] = '{0}{1}'.format(
+                self.linkPrefix, self._increment_link_index())
             fnid = self.fn[m.group('id')]
             fn_att['id'] = 'fnrev{0}'.format(fnid)
         fnid = self.fn[m.group('id')]
@@ -602,21 +609,31 @@ def glyphs(self, text):
         single quote.  If it's the first character of one of those splits, it's
         an apostrophe or closed single quote, but the regex will bear that out.
         A similar situation occurs for double quotes as well.
-        So, for the first pass, we use the glyph_search_initial set of
-        regexes.  For all remaining passes, we use glyph_search
+        So, for the first pass, we use a set of regexes from
+        the initial_glyph_replacers. For all remaining passes,
+        we use glyph_replacers
         """
         text = text.rstrip('\n')
         result = []
-        searchlist = self.glyph_search_initial
+        standalone_amp_re = re.compile(
+            r"&(?!#[0-9]+;|#x[a-f0-9]+;|[a-z][a-z0-9]*;)",
+            flags=re.I)
+        html_amp_symbol = self.glyph_definitions['ampersand']
         # split the text by any angle-bracketed tags
-        for i, line in enumerate(re.compile(r'(<[\w\/!?].*?>)', re.U).split(
-            text)):
-            if not i % 2:
-                for s, r in zip(searchlist, self.glyph_replace):
+        lines = re.compile(r'(<[\w/!?].*?>)', re.U | re.S).split(text)
+        for i, line in enumerate(lines):
+            if i % 2 == 0:
+                if not self.restricted:
+                    # Raw < > & chars have already been encoded
+                    # when in restricted mode
+                    line = (
+                        standalone_amp_re
+                        .sub(html_amp_symbol, line)
+                        .replace('<', '&lt;')
+                        .replace('>', '&gt;'))
+                for s, r in self.glyph_replacers:
                     line = s.sub(r, line)
             result.append(line)
-            if i == 0:
-                searchlist = self.glyph_search
         return ''.join(result)
 
     def getRefs(self, text):
@@ -719,7 +736,7 @@ def markStartOfLinks(self, text):
                 linkparts = []
                 i = 0
 
-                while balanced != 0 or i == 0: # pragma: no branch
+                while balanced != 0 or i == 0:  # pragma: no branch
                     # Starting at the end, pop off the previous part of the
                     # slice's fragments.
 
@@ -728,9 +745,9 @@ def markStartOfLinks(self, text):
 
                     if len(possibility) > 0:
                         # did this part inc or dec the balanced count?
-                        if re.search(r'^\S|=$', possibility, flags=re.U): # pragma: no branch
+                        if re.search(r'^\S|=$', possibility, flags=re.U):  # pragma: no branch
                             balanced = balanced - 1
-                        if re.search(r'\S$', possibility, flags=re.U): # pragma: no branch
+                        if re.search(r'\S$', possibility, flags=re.U):  # pragma: no branch
                             balanced = balanced + 1
                         try:
                             possibility = possible_start_quotes.pop()
@@ -750,7 +767,7 @@ def markStartOfLinks(self, text):
 
                         try:
                             possibility = possible_start_quotes.pop()
-                        except IndexError: # pragma: no cover
+                        except IndexError:  # pragma: no cover
                             # If out of possible starting segments we back the
                             # last one from the linkparts array
                             linkparts.pop()
@@ -759,7 +776,7 @@ def markStartOfLinks(self, text):
                         # we have a closing ".
                         if (possibility == '' or possibility.endswith(' ')):
                             # force search exit
-                            balanced = 0;
+                            balanced = 0
 
                     if balanced <= 0:
                         possible_start_quotes.append(possibility)
@@ -775,7 +792,7 @@ def markStartOfLinks(self, text):
                 # Re-assemble the link starts with a specific marker for the
                 # next regex.
                 o = '{0}{1}linkStartMarker:"{2}'.format(pre_link, self.uid,
-                        link_content)
+                                                        link_content)
                 output.append(o)
 
             # Add the last part back
@@ -817,14 +834,14 @@ def fLink(self, m):
             )                            # end of $text
             (?:\((?P<title>[^)]+?)\))?   # $title (if any)
             $'''.format(cls_re_s, regex_snippets['space']), inner,
-                flags=re.X | re.U)
+                      flags=re.X | re.U)
 
         atts = (m and m.group('atts')) or ''
         text = (m and m.group('text')) or inner
         title = (m and m.group('title')) or ''
 
         pop, tight = '', ''
-        counts = { '[': None, ']': url.count(']'), '(': None, ')': None }
+        counts = {'[': None, ']': url.count(']'), '(': None, ')': None}
 
         # Look for footnotes or other square-bracket delimited stuff at the end
         # of the url...
@@ -891,13 +908,13 @@ def _closingsquarebracket(c, pop, popped, url_chars, counts, pre):
                 # it
                 popped = True
                 url_chars.pop()
-                counts[']'] = counts[']'] - 1;
-                if first: # pragma: no branch
+                counts[']'] = counts[']'] - 1
+                if first:  # pragma: no branch
                     pre = ''
             return pop, popped, url_chars, counts, pre
 
         def _closingparenthesis(c, pop, popped, url_chars, counts, pre):
-            if counts[')'] is None: # pragma: no branch
+            if counts[')'] is None:  # pragma: no branch
                 counts['('] = url.count('(')
                 counts[')'] = url.count(')')
 
@@ -912,20 +929,20 @@ def _casesdefault(c, pop, popped, url_chars, counts, pre):
             return pop, popped, url_chars, counts, pre
 
         cases = {
-                '!': _endchar,
-                '?': _endchar,
-                ':': _endchar,
-                ';': _endchar,
-                '.': _endchar,
-                ',': _endchar,
-                '>': _rightanglebracket,
-                ']': _closingsquarebracket,
-                ')': _closingparenthesis,
-                }
-        for c in url_chars[-1::-1]: # pragma: no branch
+            '!': _endchar,
+            '?': _endchar,
+            ':': _endchar,
+            ';': _endchar,
+            '.': _endchar,
+            ',': _endchar,
+            '>': _rightanglebracket,
+            ']': _closingsquarebracket,
+            ')': _closingparenthesis,
+        }
+        for c in url_chars[-1::-1]:  # pragma: no branch
             popped = False
-            pop, popped, url_chars, counts, pre = cases.get(c,
-                    _casesdefault)(c, pop, popped, url_chars, counts, pre)
+            pop, popped, url_chars, counts, pre = cases.get(
+                c, _casesdefault)(c, pop, popped, url_chars, counts, pre)
             first = False
             if popped is False:
                 break
@@ -939,16 +956,19 @@ def _casesdefault(c, pop, popped, url_chars, counts, pre):
             return in_.replace('{0}linkStartMarker:'.format(self.uid), '')
 
         if text == '$':
-            text = url
-            if "://" in text:
-                text = text.split("://")[1]
-            elif ":" in text:
-                text = text.split(":")[1]
+            if valid_scheme:
+                text = human_readable_url(url)
+            else:
+                ref_url = self.urlrefs.get(url)
+                if ref_url is not None:
+                    text = human_readable_url(ref_url)
+                else:
+                    text = url
 
         text = text.strip()
         title = encode_html(title)
 
-        if not self.noimage: # pragma: no branch
+        if not self.noimage:  # pragma: no branch
             text = self.image(text)
         text = self.span(text)
         text = self.glyphs(text)
@@ -989,14 +1009,14 @@ def encode_url(self, url):
             """, re.X | re.U)
             netloc_parsed = netloc_pattern.match(parsed.netloc).groupdict()
         else:
-            netloc_parsed = {'user': '', 'password': '', 'host': '', 'port':
-                    ''}
+            netloc_parsed = {'user': '', 'password': '', 'host': '', 'port': ''}
 
         # encode each component
         scheme = parsed.scheme
         user = netloc_parsed['user'] and quote(netloc_parsed['user'])
-        password = (netloc_parsed['password'] and
-                    quote(netloc_parsed['password']))
+        password = (
+            netloc_parsed['password'] and quote(netloc_parsed['password'])
+        )
         host = netloc_parsed['host']
         port = netloc_parsed['port'] and netloc_parsed['port']
         # the below splits the path portion of the url by slashes, translates
@@ -1006,7 +1026,7 @@ def encode_url(self, url):
         # because the quote and unquote functions expects different input
         # types: unicode strings for PY2 and str for PY3.
         path_parts = (quote(unquote(pce), b'') for pce in
-                parsed.path.split('/'))
+                      parsed.path.split('/'))
         path = '/'.join(path_parts)
 
         # put it back together
@@ -1039,26 +1059,36 @@ def span(self, text):
                     (?P<end>[{pnct}]*)
                     {tag}
                     (?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))
-                """.format(**{'tag': tag, 'cls': cls_re_s, 'pnct': pnct,
-                    'space': regex_snippets['space']}), flags=re.X | re.U)
+                """.format(
+                    **{'tag': tag, 'cls': cls_re_s, 'pnct': pnct, 'space':
+                        regex_snippets['space']}
+                ), flags=re.X | re.U)
                 text = pattern.sub(self.fSpan, text)
         self.span_depth = self.span_depth - 1
         return text
 
+    def getSpecialOptions(self, pre, tail):
+        for before, after in self.spanWrappers:
+            if pre == before and tail == after:
+                pre = tail = ''
+                break
+        return (pre, tail)
+
     def fSpan(self, match):
         pre, tag, atts, cite, content, end, tail = match.groups()
+        pre, tail = self.getSpecialOptions(pre, tail)
 
         qtags = {
-            '*':  'strong',
-            '**': 'b',
-            '??': 'cite',
-            '_':  'em',
-            '__': 'i',
-            '-':  'del',
-            '%':  'span',
-            '+':  'ins',
-            '~':  'sub',
-            '^':  'sup'
+            '*':  'strong',  # noqa: E241
+            '**': 'b',       # noqa: E241
+            '??': 'cite',    # noqa: E241
+            '_':  'em',      # noqa: E241
+            '__': 'i',       # noqa: E241
+            '-':  'del',     # noqa: E241
+            '%':  'span',    # noqa: E241
+            '+':  'ins',     # noqa: E241
+            '~':  'sub',     # noqa: E241
+            '^':  'sup'      # noqa: E241
         }
 
         tag = qtags[tag]
@@ -1067,25 +1097,45 @@ def fSpan(self, match):
             atts = '{0} cite="{1}"'.format(atts, cite.rstrip())
 
         content = self.span(content)
+        opentag = '<{0}{1}>'.format(tag, atts)
+        closetag = '</{0}>'.format(tag)
+        tags = self.storeTags(opentag, closetag)
+        return pre + tags['open'] + content + end + tags['close'] + tail
+
+    def storeTags(self, opentag, closetag=''):
+        tags = {}
+        self.refIndex += 1
+        self.refCache[self.refIndex] = opentag
+        tags['open'] = self.uid + str(self.refIndex) + ':ospan '
+
+        self.refIndex += 1
+        self.refCache[self.refIndex] = closetag
+        tags['close'] = ' ' + self.uid + str(self.refIndex) + ':cspan'
+        return tags
+
+    def retrieveTags(self, text):
+        text = (re.compile('{0}(?P<token>[0-9]+):ospan '.format(self.uid), re.U)
+                .sub(self.fRetrieveTags, text))
+        text = (re.compile(' {0}(?P<token>[0-9]+):cspan'.format(self.uid), re.U)
+                .sub(self.fRetrieveTags, text))
+        return text
 
-        out = "<{0}{1}>{2}{3}</{4}>".format(tag, atts, content, end, tag)
-        if pre and not tail or tail and not pre:
-            out = '{0}{1}{2}'.format(pre, out, tail)
-        return out
+    def fRetrieveTags(self, match):
+        return self.refCache[int(match.group('token'))]
 
     def image(self, text):
         pattern = re.compile(r"""
-            (?:[\[{{])?         # pre
-            \!                  # opening !
-            (\<|\=|\>)?         # optional alignment atts
-            ({0})               # optional style,class atts
-            (?:\.\s)?           # optional dot-space
-            ([^\s(!]+)          # presume this is the src
-            \s?                 # optional space
-            (?:\(([^\)]+)\))?   # optional title
-            \!                  # closing
-            (?::(\S+))?         # optional href
-            (?:[\]}}]|(?=\s|$)) # lookahead: space or end of string
+            (?:[\[{{])?                # pre
+            \!                         # opening !
+            (\<|\=|\>)?                # optional alignment atts
+            ({0})                      # optional style,class atts
+            (?:\.\s)?                  # optional dot-space
+            ([^\s(!]+)                 # presume this is the src
+            \s?                        # optional space
+            (?:\(([^\)]+)\))?          # optional title
+            \!                         # closing
+            (?::(\S+)(?<![\]).,]))?    # optional href sans final punct
+            (?:[\]}}]|(?=[.,\s)|]|$))  # lookahead: space or end of string
         """.format(cls_re_s), re.U | re.X)
         return pattern.sub(self.fImage, text)
 
@@ -1101,7 +1151,7 @@ def fImage(self, match):
             title = ''
 
         if not is_rel_url(url) and self.get_sizes:
-            size = imagesize.getimagesize(url)
+            size = getimagesize(url)
 
         if href:
             href = self.shelveURL(href)
@@ -1137,6 +1187,7 @@ def code(self, text):
     def fCode(self, match):
         before, text, after = match.groups()
         after = after or ''
+        before, after = self.getSpecialOptions(before, after)
         # text needs to be escaped
         text = encode_html(text, quotes=False)
         return ''.join([before, self.shelve('<code>{0}</code>'.format(text)), after])
@@ -1145,6 +1196,7 @@ def fPre(self, match):
         before, text, after = match.groups()
         if after is None:
             after = ''
+        before, after = self.getSpecialOptions(before, after)
         # text needs to be escaped
         text = encode_html(text)
         return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
@@ -1161,8 +1213,9 @@ def noTextile(self, text):
 
     def fTextile(self, match):
         before, notextile, after = match.groups()
-        if after is None: # pragma: no branch
+        if after is None:  # pragma: no branch
             after = ''
+        before, after = self.getSpecialOptions(before, after)
         return ''.join([before, self.shelve(notextile), after])
 
     def getHTMLComments(self, text):
@@ -1187,7 +1240,7 @@ def redcloth_list(self, text):
         """Parse the text for definition lists and send them to be
         formatted."""
         pattern = re.compile(r"^([-]+{0}[ .].*:=.*)$(?![^-])".format(cls_re_s),
-                re.M | re.U | re.S)
+                             re.M | re.U | re.S)
         return pattern.sub(self.fRCList, text)
 
     def fRCList(self, match):
@@ -1197,7 +1250,7 @@ def fRCList(self, match):
         for line in text:
             # parse the attributes and content
             m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
-                    flags=re.M | re.S)
+                         flags=re.M | re.S)
             if not m:
                 continue
 
@@ -1207,9 +1260,12 @@ def fRCList(self, match):
             atts = pba(atts, restricted=self.restricted)
 
             # split the content into the term and definition
-            xm = re.match(r'^(.*?)[\s]*:=(.*?)[\s]*(=:|:=)?[\s]*$', content,
-                          re.S)
-            term, definition, ending = xm.groups()
+            xm = re.match(
+                r'^(.*?){0}*:=(.*?){0}*(=:|:=)?{0}*$'
+                .format(regex_snippets['space']),
+                content,
+                re.S)
+            term, definition, _ = xm.groups()
             # cleanup
             term = term.strip()
             definition = definition.strip(' ')
@@ -1222,16 +1278,23 @@ def fRCList(self, match):
                     dltag = "<dl>"
                 out.append(dltag)
 
-            if definition != '' and term != '':
-                if definition.startswith('\n'):
-                    definition = '<p>{0}</p>'.format(definition.lstrip())
-                definition = definition.replace('\n', '<br />').strip()
+            if term != '':
+                is_newline_started_def = definition.startswith('\n')
+                definition = (
+                    definition
+                    .strip()
+                    .replace('\n', '<br />'))
+
+                if is_newline_started_def:
+                    definition = '<p>{0}</p>'.format(definition)
+                term = term.replace('\n', '<br />')
 
                 term = self.graf(term)
                 definition = self.graf(definition)
 
-                out.extend(['\t<dt{0}>{1}</dt>'.format(atts, term),
-                    '\t<dd>{0}</dd>'.format(definition)])
+                out.append('\t<dt{0}>{1}</dt>'.format(atts, term))
+                if definition:
+                    out.append('\t<dd>{0}</dd>'.format(definition))
 
         out.append('</dl>')
         out = '\n'.join(out)
@@ -1249,12 +1312,12 @@ def placeNoteLists(self, text):
                 else:
                     self.unreferencedNotes[label] = info
 
-            if o: # pragma: no branch
+            if o:  # pragma: no branch
                 # sort o by key
                 o = OrderedDict(sorted(o.items(), key=lambda t: t[0]))
             self.notes = o
         text_re = re.compile(r'<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)'
-                r'\.?[\s]*</p>'.format(cls_re_s, syms_re_s), re.U)
+                             r'\.?[\s]*</p>'.format(cls_re_s, syms_re_s), re.U)
         text = text_re.sub(self.fNoteLists, text)
         return text
 
@@ -1265,9 +1328,9 @@ def fNoteLists(self, match):
         index = '{0}{1}{2}'.format(g_links, extras, start_char)
         result = ''
 
-        if index not in self.notelist_cache: # pragma: no branch
+        if index not in self.notelist_cache:  # pragma: no branch
             o = []
-            if self.notes: # pragma: no branch
+            if self.notes:  # pragma: no branch
                 for seq, info in self.notes.items():
                     links = self.makeBackrefLink(info, g_links, start_char)
                     atts = ''
@@ -1276,11 +1339,11 @@ def fNoteLists(self, match):
                         atts = info['def']['atts']
                         content = info['def']['content']
                         li = ('\t\t<li{0}>{1}<span id="note{2}"> '
-                                '</span>{3}</li>').format(atts, links, infoid,
-                                        content)
+                              '</span>{3}</li>').format(atts, links, infoid,
+                                                        content)
                     else:
-                        li = ('\t\t<li{0}>{1} Undefined Note [#{2}].<li>'
-                                ).format(atts, links, info['seq'])
+                        li = ('\t\t<li{0}>{1} Undefined Note [#{2}].</li>'
+                              ).format(atts, links, info['seq'])
                     o.append(li)
             if '+' == extras and self.unreferencedNotes:
                 for seq, info in self.unreferencedNotes.items():
@@ -1290,13 +1353,14 @@ def fNoteLists(self, match):
                     o.append(li)
             self.notelist_cache[index] = "\n".join(o)
             result = self.notelist_cache[index]
-        list_atts = pba(att, restricted=self.restricted)
-        result = '<ol{0}>\n{1}\n\t</ol>'.format(list_atts, result)
+        if result:
+            list_atts = pba(att, restricted=self.restricted)
+            result = '<ol{0}>\n{1}\n\t</ol>'.format(list_atts, result)
         return result
 
     def makeBackrefLink(self, info, g_links, i):
         """Given the pieces of a back reference link, create an <a> tag."""
-        atts, content, infoid, link = '', '', '', ''
+        link = ''
         if 'def' in info:
             link = info['def']['link']
         backlink_type = link or g_links
@@ -1314,7 +1378,7 @@ def makeBackrefLink(self, info, g_links, i):
             for refid in info['refids']:
                 i_entity = decode_high(i_)
                 sup = """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
-                        refid, i_entity)
+                    refid, i_entity)
                 if allow_inc:
                     i_ = i_ + 1
                 result.append(sup)
@@ -1330,13 +1394,14 @@ def fParseNoteDefs(self, m):
 
         # Assign an id if the note reference parse hasn't found the label yet.
         if label not in self.notes:
-            self.notes[label] = {'id': '{0}{1}'.format(self.linkPrefix,
-                self._increment_link_index())}
+            self.notes[label] = {'id': '{0}{1}'.format(
+                self.linkPrefix, self._increment_link_index())}
 
         # Ignores subsequent defs using the same label
-        if 'def' not in self.notes[label]: # pragma: no branch
-            self.notes[label]['def'] = {'atts': pba(att, restricted=self.restricted), 'content':
-                    self.graf(content), 'link': link}
+        if 'def' not in self.notes[label]:  # pragma: no branch
+            self.notes[label]['def'] = {
+                'atts': pba(att, restricted=self.restricted), 'content':
+                self.graf(content), 'link': link}
         return ''
 
     def noteRef(self, text):
@@ -1378,8 +1443,8 @@ def fParseNoteRefs(self, match):
         # If we are referencing a note that hasn't had the definition parsed
         # yet, then assign it an ID...
         if not self.notes[label]['id']:
-            self.notes[label]['id'] = '{0}{1}'.format(self.linkPrefix,
-                    self._increment_link_index())
+            self.notes[label]['id'] = '{0}{1}'.format(
+                self.linkPrefix, self._increment_link_index())
         labelid = self.notes[label]['id']
 
         # Build the link (if any)...
@@ -1445,5 +1510,4 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
 
     """
     return Textile(restricted=True, lite=lite, noimage=noimage,
-            html_type=html_type, rel='nofollow').parse(
-                    text)
+                   html_type=html_type, rel='nofollow').parse(text)
diff --git a/textile/objects/block.py b/textile/objects/block.py
index de993e87..6d611ed1 100644
--- a/textile/objects/block.py
+++ b/textile/objects/block.py
@@ -1,6 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 from collections import OrderedDict
 try:
     import regex as re
@@ -40,7 +38,7 @@ def process(self):
             [{space}]+                            # whitespace ends def marker
             (?P<content>.*)$                      # content""".format(
                 space=regex_snippets['space'], cls=cls_re_s),
-            flags=re.X | re.U)
+                flags=re.X | re.U)
             notedef = notedef_re.sub(self.textile.fParseNoteDefs, self.content)
 
             # It will be empty if the regex matched and ate it.
@@ -49,13 +47,13 @@ def process(self):
                 self.eat = True
 
         fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
-                self.tag, flags=re.U)
+                        self.tag, flags=re.U)
         if fns:
             self.tag = 'p'
             fnid = self.textile.fn.get(fns.group('fnid'), None)
             if fnid is None:
                 fnid = '{0}{1}'.format(self.textile.linkPrefix,
-                        self.textile._increment_link_index())
+                                       self.textile._increment_link_index())
 
             # If there is an author-specified ID goes on the wrapper & the
             # auto-id gets pushed to the <sup>
@@ -71,12 +69,11 @@ def process(self):
             else:
                 supp_id = parse_attributes('(#fn{0})'.format(fnid), restricted=self.textile.restricted)
 
-
             if '^' not in self.atts:
                 sup = generate_tag('sup', fns.group('fnid'), supp_id)
             else:
                 fnrev = generate_tag('a', fns.group('fnid'), {'href':
-                    '#fnrev{0}'.format(fnid)})
+                                     '#fnrev{0}'.format(fnid)})
                 sup = generate_tag('sup', fnrev, supp_id)
 
             self.content = '{0} {1}'.format(sup, self.content)
diff --git a/textile/objects/table.py b/textile/objects/table.py
index 60b68040..72781ad1 100644
--- a/textile/objects/table.py
+++ b/textile/objects/table.py
@@ -1,11 +1,9 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 from xml.etree import ElementTree
 
 from textile.regex_strings import (align_re_s, cls_re_s, regex_snippets,
-        table_span_re_s, valign_re_s)
-from textile.utils import encode_html, generate_tag, parse_attributes
+                                   table_span_re_s, valign_re_s, pnct_re_s)
+from textile.utils import generate_tag, parse_attributes
 
 try:
     import regex as re
@@ -14,6 +12,18 @@
 
 
 class Table(object):
+    caption_re = re.compile(
+        (r'^\|\=(?P<capts>{s}{a}{c})\. '
+         r'(?P<cap>[^\n]*)(?P<row>.*)'
+         .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s})),
+        re.S)
+    colgroup_re = re.compile(
+        r'^\|:(?P<cols>{s}{a}{c}\. .*)'
+        .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s}),
+        re.M)
+    heading_re = re.compile(
+        r'^_(?={0}|{1})'.format(regex_snippets['space'], pnct_re_s))
+
     def __init__(self, textile, tatts, rows, summary):
         self.textile = textile
         self.attributes = parse_attributes(tatts, 'table', restricted=self.textile.restricted)
@@ -27,27 +37,21 @@ def __init__(self, textile, tatts, rows, summary):
     def process(self):
         rgrp = None
         groups = []
-        if self.input[-1] == '|': # pragma: no branch
-            self.input = '{0}\n'.format(self.input)
-        split = self.input.split('|\n')
+        split = (
+            re.compile(r'\|{0}*?$'.format(regex_snippets['space']), re.M)
+            .split(self.input))
         for i, row in enumerate([x for x in split if x]):
             row = row.lstrip()
 
             # Caption -- only occurs on row 1, otherwise treat '|=. foo |...'
             # as a normal center-aligned cell.
-            if i == 0 and row[:2] == '|=':
-                captionpattern = (r"^\|\=(?P<capts>{s}{a}{c})\. "
-                                  r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{
-                                      's': table_span_re_s, 'a': align_re_s,
-                                      'c': cls_re_s}))
-                caption_re = re.compile(captionpattern, re.S)
-                cmtch = caption_re.match(row)
-                if cmtch:
-                    caption = Caption(restricted=self.textile.restricted, **cmtch.groupdict())
-                    self.caption = '\n{0}'.format(caption.caption)
-                    row = cmtch.group('row').lstrip()
-                    if row == '':
-                        continue
+            cmtch = self.caption_re.match(row)
+            if i == 0 and cmtch:
+                caption = Caption(restricted=self.textile.restricted, **cmtch.groupdict())
+                self.caption = '\n{0}'.format(caption.caption)
+                row = cmtch.group('row').lstrip()
+                if row == '':
+                    continue
 
             # Colgroup -- A colgroup row will not necessarily end with a |.
             # Hence it may include the next row of actual table data.
@@ -66,8 +70,9 @@ def process(self):
 
             # search the row for a table group - thead, tfoot, or tbody
             grpmatchpattern = (r"(:?^\|(?P<part>{v})(?P<rgrpatts>{s}{a}{c})"
-                    r"\.\s*$\n)?^(?P<row>.*)").format(**{'v': valign_re_s, 's':
-                        table_span_re_s, 'a': align_re_s, 'c': cls_re_s})
+                               r"\.\s*$\n)?^(?P<row>.*)").format(
+                                   **{'v': valign_re_s, 's': table_span_re_s,
+                                      'a': align_re_s, 'c': cls_re_s})
             grpmatch_re = re.compile(grpmatchpattern, re.S | re.M)
             grpmatch = grpmatch_re.match(row.lstrip())
 
@@ -93,12 +98,13 @@ def process(self):
             r = Row(row_atts, row)
             for cellctr, cell in enumerate(row.split('|')[1:]):
                 ctag = 'td'
-                if cell.startswith('_'):
+                if self.heading_re.match(cell):
                     ctag = 'th'
 
                 cmtch = re.search(r'^(?P<catts>_?{0}{1}{2}\. )'
-                        '(?P<cell>.*)'.format(table_span_re_s, align_re_s,
-                            cls_re_s), cell, flags=re.S)
+                                  '(?P<cell>.*)'.format(
+                                      table_span_re_s, align_re_s, cls_re_s),
+                                  cell, flags=re.S)
                 if cmtch:
                     catts = cmtch.group('catts')
                     cell_atts = parse_attributes(catts, 'td', restricted=self.textile.restricted)
@@ -108,7 +114,7 @@ def process(self):
 
                 if not self.textile.lite:
                     a_pattern = r'(?P<space>{0}*)(?P<cell>.*)'.format(
-                            regex_snippets['space'])
+                        regex_snippets['space'])
                     a = re.search(a_pattern, cell, flags=re.S)
                     cell = self.textile.redcloth_list(a.group('cell'))
                     cell = self.textile.textileLists(cell)
@@ -131,8 +137,8 @@ def process(self):
         if rgrp:
             groups.append('\n\t{0}'.format(rgrp.process()))
 
-        content = '{0}{1}{2}{3}\n\t'.format(self.caption, self.colgroup,
-                ''.join(groups), ''.join(self.content))
+        content = '{0}{1}{2}{3}\n\t'.format(
+            self.caption, self.colgroup, ''.join(groups), ''.join(self.content))
         tbl = generate_tag('table', content, self.attributes)
         return '\t{0}\n\n'.format(tbl)
 
@@ -143,8 +149,8 @@ def __init__(self, capts, cap, row, restricted):
         self.caption = self.process(cap)
 
     def process(self, cap):
-        tag = generate_tag('caption', cap, self.attributes)
-        return '\t{0}\n\t'.format(tag)
+        tag = generate_tag('caption', cap.strip(), self.attributes)
+        return '\t{0}'.format(tag)
 
 
 class Colgroup(object):
@@ -161,7 +167,6 @@ def process(self):
         colgroup = ElementTree.Element('colgroup', attrib=group_atts)
         colgroup.text = '\n\t'
         if self.cols is not None:
-            has_newline = "\n" in self.cols
             match_cols = self.cols.replace('.', '').split('|')
             # colgroup is the first item in match_cols, the remaining items are
             # cols.
@@ -174,7 +179,8 @@ def process(self):
         # tab between cols and a newline at the end
         xml_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n"
         colgrp = colgrp.replace(xml_declaration, '')
-        return colgrp.replace('><', '>\n\t<')
+        colgrp = colgrp.replace('><', '>\n\t<')
+        return f"\n\t{colgrp}"
 
 
 class Row(object):
diff --git a/textile/regex_strings.py b/textile/regex_strings.py
index 470203cb..c3691bb5 100644
--- a/textile/regex_strings.py
+++ b/textile/regex_strings.py
@@ -1,10 +1,8 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 try:
     # Use regex module for matching uppercase characters if installed,
     # otherwise fall back to finding all the uppercase chars in a loop.
-    import regex as re
+    import regex as re  # noqa: F401
     upper_re_s = r'\p{Lu}'
     regex_snippets = {
         'acr': r'\p{Lu}\p{Nd}',
@@ -15,22 +13,25 @@
         'digit': r'\p{N}',
         'space': r'(?:\p{Zs}|\v)',
         'char': r'(?:[^\p{Zs}\v])',
-        }
+    }
 except ImportError:
     from sys import maxunicode
     upper_re_s = "".join(
-                [chr(c) for c in range(maxunicode) if chr(c).isupper()]
-            )
+        [chr(c) for c in range(maxunicode) if chr(c).isupper()]
+    )
     regex_snippets = {
         'acr': r'{0}0-9'.format(upper_re_s),
         'abr': r'{0}'.format(upper_re_s),
         'nab': r'a-z',
         'wrd': r'\w',
-        'cur': r'',
+        # All codepoints identified as currency symbols
+        # by the [mrab-regex library](https://pypi.org/project/regex/)
+        # and the UNICODE standard.
+        'cur': r'$¢-¥֏؋৲৳৻૱௹฿៛\u20a0-\u20cf\ua838﷼﹩＄￠￡￥￦',
         'digit': r'\d',
         'space': r'(?:\s|\v)',
         'char': r'\S',
-        }
+    }
 
 halign_re_s = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
 valign_re_s = r'[\-^~]'
@@ -43,10 +44,10 @@
 table_span_re_s = r'(?:{0}|{1})*'.format(colspan_re_s, rowspan_re_s)
 # regex string to match class, style and language attributes
 cls_re_s = (r'(?:'
-               r'{c}(?:{l}(?:{s})?|{s}(?:{l})?)?|'
-               r'{l}(?:{c}(?:{s})?|{s}(?:{c})?)?|'
-               r'{s}(?:{c}(?:{l})?|{l}(?:{c})?)?'
+            r'{c}(?:{l}(?:{s})?|{s}(?:{l})?)?|'
+            r'{l}(?:{c}(?:{s})?|{s}(?:{c})?)?|'
+            r'{s}(?:{c}(?:{l})?|{l}(?:{c})?)?'
             r')?'
-           ).format(c=class_re_s, s=style_re_s, l=language_re_s)
+            ).format(c=class_re_s, s=style_re_s, l=language_re_s)
 pnct_re_s = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
 syms_re_s = '¤§µ¶†‡•∗∴◊♠♣♥♦'
diff --git a/textile/textilefactory.py b/textile/textilefactory.py
index e5e2458e..402bf868 100644
--- a/textile/textilefactory.py
+++ b/textile/textilefactory.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals
 from .core import Textile
 
 
@@ -21,10 +20,7 @@ def __init__(self, restricted=False, lite=False, sanitize=False,
             self.method_parms['rel'] = 'nofollow'
 
         if noimage is None:
-            if restricted:
-                noimage = True
-            else:
-                noimage = False
+            noimage = bool(restricted)
 
         self.class_parms['noimage'] = noimage
         self.method_parms['sanitize'] = sanitize
diff --git a/textile/tools/imagesize.py b/textile/tools/imagesize.py
deleted file mode 100644
index 6fba73eb..00000000
--- a/textile/tools/imagesize.py
+++ /dev/null
@@ -1,27 +0,0 @@
-def getimagesize(url):
-    """
-    Attempts to determine an image's width and height, and returns a tuple,
-    (width, height), in pixels or an empty string in case of failure.
-    Requires that PIL is installed.
-
-    """
-
-    try:
-        from PIL import ImageFile
-    except ImportError:
-        return ''
-
-    from urllib.request import urlopen
-
-    try:
-        p = ImageFile.Parser()
-        f = urlopen(url)
-        while True:
-            s = f.read(1024)
-            if not s:
-                break
-            p.feed(s)
-            if p.image:
-                return p.image.size
-    except (IOError, ValueError):
-        return ''
diff --git a/textile/tools/sanitizer.py b/textile/tools/sanitizer.py
deleted file mode 100644
index 3c7209c6..00000000
--- a/textile/tools/sanitizer.py
+++ /dev/null
@@ -1,11 +0,0 @@
-def sanitize(string):
-    """
-    Ensure that the text does not contain any malicious HTML code which might
-    break the page.
-    """
-    from html5lib import parseFragment, serialize
-
-    parsed = parseFragment(string)
-    clean = serialize(parsed, sanitize=True, omit_optional_tags=False,
-                      quote_attr_values='always')
-    return clean
diff --git a/textile/utils.py b/textile/utils.py
index 1b18945a..578af4ed 100644
--- a/textile/utils.py
+++ b/textile/utils.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
 try:
     import regex as re
 except ImportError:
@@ -14,16 +12,31 @@
 
 from textile.regex_strings import valign_re_s, halign_re_s
 
+# Regular expressions for stripping chunks of HTML,
+# leaving only content not wrapped in a tag or a comment
+RAW_TEXT_REVEALERS = (
+    # The php version has orders the below list of tags differently.  The
+    # important thing to note here is that the pre must occur before the p or
+    # else the regex module doesn't properly match pre-s. It only matches the
+    # p in pre.
+    re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
+               re.S),
+    re.compile(r'<(hr|br)[^>]*?/>'),
+    re.compile(r'<!--.*?-->'),
+)
+
 
 def decode_high(text):
     """Decode encoded HTML entities."""
     text = '&#{0};'.format(text)
     return html.unescape(text)
 
+
 def encode_high(text):
     """Encode the text so that it is an appropriate HTML entity."""
     return ord(text)
 
+
 def encode_html(text, quotes=True):
     """Return text that's safe for an HTML attribute."""
     a = (
@@ -39,6 +52,7 @@ def encode_html(text, quotes=True):
         text = text.replace(k, v)
     return text
 
+
 def generate_tag(tag, content, attributes=None):
     """Generate a complete html tag using the ElementTree module.  tag and
     content are strings, the attributes argument is a dictionary.  As
@@ -59,49 +73,87 @@ def generate_tag(tag, content, attributes=None):
     # non-ascii text being html-entity encoded.  Not bad, but not entirely
     # matching php-textile either.
     element_tag = ElementTree.tostringlist(element, encoding=enc,
-            method='html')
+                                           method='html')
     element_tag.insert(len(element_tag) - 1, content)
     element_text = ''.join(element_tag)
     return element_text
 
+
+def getimagesize(url):
+    """
+    Attempts to determine an image's width and height, and returns a tuple,
+    (width, height), in pixels or an empty string in case of failure.
+    Requires that PIL is installed.
+
+    """
+
+    try:
+        from PIL import ImageFile
+    except ImportError:
+        return ''
+
+    from urllib.request import urlopen
+
+    try:
+        p = ImageFile.Parser()
+        f = urlopen(url)
+        while True:
+            s = f.read(1024)
+            if not s:
+                break
+            p.feed(s)
+            if p.image:
+                return p.image.size
+    except (IOError, ValueError):
+        return ''
+
+
 def has_raw_text(text):
     """checks whether the text has text not already enclosed by a block tag"""
-    # The php version has orders the below list of tags differently.  The
-    # important thing to note here is that the pre must occur before the p or
-    # else the regex module doesn't properly match pre-s. It only matches the
-    # p in pre.
-    r = re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
-                   re.S).sub('', text.strip()).strip()
-    r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
-    return '' != r
+    r = text.strip()
+    for pattern in RAW_TEXT_REVEALERS:
+        r = pattern.sub('', r).strip()
+    return r != ''
+
+
+def human_readable_url(url):
+    if "://" in url:
+        url = url.split("://")[1]
+    elif ":" in url:
+        url = url.split(":")[1]
+    return url
+
 
 def is_rel_url(url):
     """Identify relative urls."""
     (scheme, netloc) = urlparse(url)[0:2]
     return not scheme and not netloc
 
+
 def is_valid_url(url):
     parsed = urlparse(url)
     if parsed.scheme == '':
         return True
     return False
 
+
 def list_type(list_string):
     listtypes = {
-        list_string.startswith('*'): 'u',
-        list_string.startswith('#'): 'o',
-        (not list_string.startswith('*') and not list_string.startswith('#')):
+        list_string.endswith('*'): 'u',
+        list_string.endswith('#'): 'o',
+        (not list_string.endswith('*') and not list_string.endswith('#')):
         'd'
     }
     return listtypes.get(True, False)
 
+
 def normalize_newlines(string):
-    out = string.strip()
-    out = re.sub(r'\r\n?', '\n', out)
+    out = re.sub(r'\r\n?', '\n', string)
     out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
-    out = re.sub(r'"$', '" ', out)
+    out = out.strip('\n')
     return out
 
+
 def parse_attributes(block_attributes, element=None, include_id=True, restricted=False):
     vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
     hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
@@ -146,8 +198,27 @@ def parse_attributes(block_attributes, element=None, include_id=True, restricted
 
     m = re.search(r'\(([^()]+)\)', matched, re.U)
     if m:
-        aclass = m.group(1)
         matched = matched.replace(m.group(0), '')
+        # Only allow a restricted subset of the CSS standard characters for classes/ids.
+        # No encoding markers allowed.
+        id_class_match = re.compile(r"^([-a-zA-Z 0-9_\/\[\]\.\:\#]+)$", re.U).match(m.group(1))
+        if id_class_match:
+            class_regex = re.compile(r"^([-a-zA-Z 0-9_\.\/\[\]]*)$")
+            id_class = id_class_match.group(1)
+            # If a textile class block attribute was found with a '#' in it
+            # split it into the css class and css id...
+            hashpos = id_class.find('#')
+            if hashpos >= 0:
+                id_match = re.match(r"^#([-a-zA-Z0-9_\.\:]*)$", id_class[hashpos:])
+                if id_match:
+                    block_id = id_match.group(1)
+
+                cls_match = class_regex.match(id_class[:hashpos])
+            else:
+                cls_match = class_regex.match(id_class)
+
+            if cls_match:
+                aclass = cls_match.group(1)
 
     m = re.search(r'([(]+)', matched)
     if m:
@@ -163,11 +234,6 @@ def parse_attributes(block_attributes, element=None, include_id=True, restricted
     if m:
         style.append("text-align:{0}".format(hAlign[m.group(1)]))
 
-    m = re.search(r'^(.*)#(.*)$', aclass)
-    if m:
-        block_id = m.group(2)
-        aclass = m.group(1)
-
     if element == 'col':
         pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
         csp = re.match(pattern, matched)
@@ -195,6 +261,7 @@ def parse_attributes(block_attributes, element=None, include_id=True, restricted
         result['width'] = width
     return result
 
+
 def pba(block_attributes, element=None, include_id=True, restricted=False):
     """Parse block attributes."""
     attrs = parse_attributes(block_attributes, element, include_id, restricted)
diff --git a/textile/version.py b/textile/version.py
index f3c42a78..ad53acbb 100644
--- a/textile/version.py
+++ b/textile/version.py
@@ -1 +1 @@
-VERSION = '4.0.2'
+VERSION = '4.0.3'