-
Notifications
You must be signed in to change notification settings - Fork 2
/
filter_customs.py
141 lines (109 loc) · 4.29 KB
/
filter_customs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import json
import sys
import re
# ripped from
# https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python
# this should not have been necessary.
import codecs
ESCAPE_SEQUENCE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'"abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)
def decode_escapes(s):
def decode_match(match):
return codecs.decode(match.group(0), 'unicode-escape')
return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
# starting out with the default almost-no-op MD filter from
# https://rust-lang.github.io/mdBook/for_developers/preprocessors.html
# custom_matcher = re.compile(r'\[\[([^|]+)|(.*)\]\]')
custom_matcher = re.compile(r'\[\[([^| \t]+)[ \t]*(\|[ \t]*([^\]]*))?\]\]')
def replace_customs(text):
result = []
in_custom = False
#text = bytes(text, "UTF-8").decode("unicode_escape")
#text = json.loads(text)
text = decode_escapes(text)
#with open("unescaped.md", "a") as ofile: ofile.write(">" + text + "\n")
for line in text.split("\n"):
if (lm := custom_matcher.match(line)) is not None and in_custom == False:
in_custom = True
result.append("<div class=\"asside\">")
#result.append(f"groups: {lm.groups()}")
if len(lm.groups()) >= 3 and (tm := lm.groups()[2]) is not None and len(tm)> 0:
result.append(f"<div class=\"asside-title\">{tm}</div>")
result.append("") # add a blank line, so it creates a separate <p> for the first row.
elif in_custom == True:
if line.startswith("|"):
result.append(line[1:])
else:
result.append("</div>")
result.append(line)
in_custom = False
else:
result.append(line)
return "\n".join(result)
def process_chapter_rec(chapter):
res = replace_customs( chapter['content'] )
chapter['content'] = res
if 'sub_items' in chapter:
for sub_item in chapter['sub_items']: # chapter['sub_items'] is a list
if 'Chapter' in sub_item: # each sub-item is a dict, which might have Chapter as a key
process_chapter_rec(sub_item['Chapter'])
if __name__ == '__main__':
if len(sys.argv) >= 2: # we check if we received any argument
if sys.argv[1] == "supports" and sys.argv[2] == "html":
sys.exit(0)
else:
sys.exit(1)
"""
### Those escape sequences are in the text I get here
inp = sys.stdin.read()
with open("seen.json", "a") as ofile:
ofile.write(inp)
sys.stdout.write(inp)
"""
"""
context, book = json.load(sys.stdin)
with open("seen.json", "a") as ofile:
ofile.write(json.dumps(book))
ofile.write("\n")
ofile.write(json.dumps(context))
ofile.write("\n\n")
print(json.dumps(book))
"""
"""
context, book = json.load(sys.stdin)
book['sections'][0]['Chapter']['content'] = "# what the fuck\n\nis going on?"
with open("seen.json", "a") as ofile:
ofile.write(json.dumps(book))
ofile.write("\n")
ofile.write(json.dumps(context))
ofile.write("\n\n")
print(json.dumps(book))
"""
"""
context, book = json.load(sys.stdin)
for section in book['sections']:
in_custom = False
for doc in section['Chapter']['content']: # idiot, that's `for doc in a str`
res = replace_customs(doc)
section['Chapter']['content'] = json.dumps(res)
sys.stdout.write(json.dumps(book))
"""
inp = sys.stdin.read()
#with open("inp.json", "a") as ofile: ofile.write(inp)
context, book = json.loads(inp)
# context, book = json.load(sys.stdin)
for section in book['sections']:
# the section headers show up as, well, sections.
# that don't have Chapter elements.
if 'Chapter' not in section:
continue
#res = replace_customs( section['Chapter']['content'] )
#section['Chapter']['content'] = res
process_chapter_rec(section['Chapter'])
sys.stdout.write(json.dumps(book))