-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.go
136 lines (125 loc) · 3.4 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package queryparser
import (
"fmt"
"io"
"strings"
)
// Parser represents a parser.
type Parser struct {
s *scanner
buf struct {
tok token // last read token
lit string // last read literal
n int // buffer size (max=1)
}
}
// NewParser returns a new instance of Parser.
func NewParser(r io.Reader) *Parser {
return &Parser{s: newScanner(r)}
}
// Parse parses a query string.
func (p *Parser) Parse() (*Query, error) {
var phraseWords []string
var insideAPhrase bool
var currentSubquery *Query
var currentSubqueryParent *Query
query := &Query{Occur: MUST}
var currentTerms = &query.Terms
for {
tok, lit := p.scanIgnoreWhitespace()
switch tok {
case dquote:
// If this quote is end of a phrase
if insideAPhrase {
*currentTerms = append(*currentTerms, strings.Join(phraseWords, " "))
phraseWords = nil
insideAPhrase = false
// Otherwise assume it is start of a phrase
} else {
insideAPhrase = true
}
case lparen:
// Ignore parentheses inside phrases
if !insideAPhrase {
if currentSubquery == nil {
query.SubQueries = append(query.SubQueries, Query{Occur: MUST})
currentSubqueryParent = query
currentSubquery = &query.SubQueries[0]
} else {
currentSubquery.SubQueries = append(currentSubquery.SubQueries, Query{Occur: MUST})
currentSubqueryParent = currentSubquery
currentSubquery = ¤tSubquery.SubQueries[len(currentSubquery.SubQueries)-1]
}
currentTerms = ¤tSubquery.Terms
}
case rparen:
// Ignore parentheses inside phrases
if !insideAPhrase {
currentSubquery = currentSubqueryParent
currentTerms = ¤tSubquery.Terms
}
case literal:
if insideAPhrase {
phraseWords = append(phraseWords, lit)
} else {
*currentTerms = append(*currentTerms, lit)
}
case and:
if insideAPhrase {
phraseWords = append(phraseWords, lit)
} else {
if len(*currentTerms) > 0 {
if currentSubquery != nil {
currentSubquery.Occur = MUST
} else {
query.Occur = MUST
}
}
}
case or:
if insideAPhrase {
phraseWords = append(phraseWords, lit)
} else {
if currentSubquery != nil {
currentSubquery.Occur = SHOULD
// Because and has higher precedence than or, implicitly create subquery
} else if len(*currentTerms) > 1 && query.Occur == MUST {
query.SubQueries = append(query.SubQueries, Query{Occur: SHOULD})
currentSubqueryParent = query
currentSubquery = &query.SubQueries[0]
currentTerms = ¤tSubquery.Terms
} else {
query.Occur = SHOULD
}
}
case eof:
return query, nil
default:
fmt.Printf("default: tok=%v lit=%v\n", tok, lit)
}
}
}
// scan returns the next token from the underlying scanner.
// If a token has been unscanned then read that instead.
func (p *Parser) scan() (tok token, lit string) {
// If we have a token on the buffer, then return it.
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit
}
// Otherwise read the next token from the scanner.
tok, lit = p.s.scan()
// Save it to the buffer in case we unscan later.
p.buf.tok, p.buf.lit = tok, lit
return
}
// scanIgnoreWhitespace scans the next non-whitespace token.
func (p *Parser) scanIgnoreWhitespace() (tok token, lit string) {
tok, lit = p.scan()
if tok == ws {
tok, lit = p.scan()
}
return
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() { p.buf.n = 1 }