Skip to content

Commit

Permalink
chore(_blanced_quotes_shadow): another way of processing apostrophes
Browse files Browse the repository at this point in the history
turned out to not affect the performance in a meaningful way.
maybe even a little slower.
  • Loading branch information
5j9 committed Apr 18, 2024
1 parent 5fc587f commit 90bd864
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions wikitextparser/_wikitext.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
IGNORECASE,
MULTILINE,
VERBOSE,
Match,
finditer,
match,
search,
Expand Down Expand Up @@ -113,11 +114,8 @@
DOTALL | MULTILINE | VERBOSE,
).finditer

substitute_apostrophes = rc( # bold-italic, bold, or italic tokens
rb"('\0*+){2,}+(?=[^']|$)",
MULTILINE | VERBOSE,
).sub
find_lines = rc(rb'(.*?)$').finditer
apostrophe_line_sub = rc(rb"^[^']*+[^\n]*+", MULTILINE).sub
apostrophes_sub = rc(rb"('\0*+){2,}+(?=[^']|$)", MULTILINE).sub

BOLD_FINDITER = rc(
rb"""
Expand Down Expand Up @@ -1021,8 +1019,9 @@ def _balanced_quotes_shadow(self) -> bytearray:
odd_bold_italics = False
append_bold_start = bold_starts.append

def process_line(line: bytes) -> bytes:
def process_line(line_match: Match) -> bytes:
nonlocal odd_italics, odd_bold_italics
line = apostrophes_sub(process_apostrophes, line_match[0])
if odd_italics and (len(bold_starts) + odd_bold_italics) % 2:
# one of the bold marks needs to be interpreted as italic
first_multi_letter_word = first_space = None
Expand Down Expand Up @@ -1078,12 +1077,7 @@ def process_apostrophes(m) -> bytes:
s = starts[-5]
return b'_' * (s - starts[0]) + m.string[s : m.end()]

return bytearray(b'\n').join(
[
process_line(substitute_apostrophes(process_apostrophes, line))
for line in self._shadow.splitlines()
]
)
return bytearray(apostrophe_line_sub(process_line, self._shadow))

def _bolds_italics_recurse(self, result: list, filter_cls: Optional[type]):
for prop in (
Expand Down

0 comments on commit 90bd864

Please sign in to comment.