-
Notifications
You must be signed in to change notification settings - Fork 0
/
reader.py
118 lines (99 loc) · 3.77 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import re
from mal_types import (_symbol, _keyword, _list, _vector, _hash_map, _s2u, _u)
class Blank(Exception): pass
class Reader():
def __init__(self, tokens, position=0):
self.tokens = tokens
self.position = position
def next(self):
self.position += 1
return self.tokens[self.position-1]
def peek(self):
if len(self.tokens) > self.position:
return self.tokens[self.position]
else:
return None
def tokenize(str):
tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
return [t for t in re.findall(tre, str) if t[0] != ';']
def _unescape(s):
return s.replace('\\\\', _u('\u029e')).replace('\\"', '"').replace('\\n', '\n').replace(_u('\u029e'), '\\')
def read_atom(reader):
int_re = re.compile(r"-?[0-9]+$")
float_re = re.compile(r"-?[0-9][0-9.]*$")
string_re = re.compile(r'"(?:[\\].|[^\\"])*"')
token = reader.next()
if re.match(int_re, token): return int(token)
elif re.match(float_re, token): return float(token)
elif re.match(string_re, token):return _s2u(_unescape(token[1:-1]))
elif token[0] == '"': raise Exception("expected '\"', got EOF")
elif token[0] == ':': return _keyword(token[1:])
elif token == "nil": return None
elif token == "true": return True
elif token == "правда": return True
elif token == "wahr": return True
elif token == "false": return False
elif token == "falsch": return False
elif token == "ложь": return False
elif token == "фальшивий": return False
else: return _symbol(token)
def read_sequence(reader, typ=list, start='(', end=')'):
ast = typ()
token = reader.next()
if token != start: raise Exception("expected '" + start + "'")
token = reader.peek()
while token != end:
if not token: raise Exception("expected '" + end + "', got EOF")
ast.append(read_form(reader))
token = reader.peek()
reader.next()
return ast
def read_hash_map(reader):
lst = read_sequence(reader, list, '{', '}')
return _hash_map(*lst)
def read_list(reader):
return read_sequence(reader, _list, '(', ')')
def read_vector(reader):
return read_sequence(reader, _vector, '[', ']')
def read_form(reader):
token = reader.peek()
# reader macros/transforms
if token[0] == ';':
reader.next()
return None
elif token == '\'':
reader.next()
return _list(_symbol('quote'), read_form(reader))
elif token == '`':
reader.next()
return _list(_symbol('quasiquote'), read_form(reader))
elif token == '~':
reader.next()
return _list(_symbol('unquote'), read_form(reader))
elif token == '~@':
reader.next()
return _list(_symbol('splice-unquote'), read_form(reader))
elif token == '^':
reader.next()
meta = read_form(reader)
return _list(_symbol('with-meta'), read_form(reader), meta)
elif token == '@':
reader.next()
return _list(_symbol('deref'), read_form(reader))
# list
elif token == ')': raise Exception("unexpected ')'")
elif token == '(': return read_list(reader)
# vector
elif token == ']': raise Exception("unexpected ']'");
elif token == '[': return read_vector(reader);
# hash-map
elif token == '}': raise Exception("unexpected '}'");
elif token == '{': return read_hash_map(reader);
# atom
else: return read_atom(reader);
def read_str(str):
tokens = tokenize(str)
#print(tokens)
if tokens[0]==";": return ;
if len(tokens) == 0: raise Blank("Blank Line")
return read_form(Reader(tokens))