Skip to content

Commit

Permalink
replace html5lib with bleach
Browse files Browse the repository at this point in the history
The sanitizer portion of html5lib will be deprecated, and html5lib is
recommending replacing that functionality with bleach
  • Loading branch information
ikirudennis committed Aug 9, 2024
1 parent 8a5053e commit d5e166b
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
dynamic = ["version",]
dependencies = [
'html5lib>=1.0.1',
'bleach',
'regex>1.0; implementation_name != "pypy"',
]
requires-python = '>=3.8'
Expand Down
2 changes: 1 addition & 1 deletion tests/test_textile.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_sanitize():
assert result == expect

test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
result = '<p style="">a paragraph of evil text</p>'
result = '<p>a paragraph of evil text</p>'
expect = textile.Textile().parse(test, sanitize=True)
assert result == expect

Expand Down
10 changes: 6 additions & 4 deletions textile/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
from collections import OrderedDict

from textile.tools import sanitizer, imagesize
from bleach import clean

from textile.tools import imagesize
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
regex_snippets, syms_re_s, table_span_re_s)
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
Expand Down Expand Up @@ -236,10 +238,10 @@ def parse(self, text, rel=None, sanitize=False):

if self.block_tags:
if self.lite:
self.blocktag_whitelist = ['bq', 'p']
self.blocktag_allowlist = ['bq', 'p', 'br']
text = self.block(text)
else:
self.blocktag_whitelist = ['bq', 'p', 'bc', 'notextile',
self.blocktag_allowlist = ['bq', 'p', 'br', 'bc', 'notextile',
'pre', 'h[1-6]', 'fn{0}+'.format(
regex_snippets['digit']), '###']
text = self.block(text)
Expand All @@ -263,7 +265,7 @@ def parse(self, text, rel=None, sanitize=False):
text = text.replace('{0}:glyph:'.format(self.uid), '')

if sanitize:
text = sanitizer.sanitize(text)
text = clean(text, tags=self.blocktag_allowlist)

text = self.retrieveTags(text)
text = self.retrieveURLs(text)
Expand Down
11 changes: 0 additions & 11 deletions textile/tools/sanitizer.py

This file was deleted.

0 comments on commit d5e166b

Please sign in to comment.