diff --git a/src/expr.jl b/src/expr.jl index 6d1e8ed6..fd61243c 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -326,6 +326,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args = Any[args[1], a2a...] end end + elseif k == K"catch" + if kind(childheads[1]) == K"Placeholder" + args[1] = false + end elseif k == K"try" # Try children in source order: # try_block catch_var catch_block else_block finally_block diff --git a/src/kinds.jl b/src/kinds.jl index 30d16e3e..27d90c22 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -197,6 +197,7 @@ register_kinds!(JuliaSyntax, 0, [ # Identifiers "BEGIN_IDENTIFIERS" "Identifier" + "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering # Macro names are modelled as special kinds of identifiers because the full # macro name may not appear as characters in the source: The `@` may be # detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd @@ -253,6 +254,7 @@ register_kinds!(JuliaSyntax, 0, [ "END_KEYWORDS" "BEGIN_LITERAL" + "Bool" "Integer" "BinInt" "HexInt" @@ -262,8 +264,6 @@ register_kinds!(JuliaSyntax, 0, [ "String" "Char" "CmdString" - "true" - "false" "END_LITERAL" "BEGIN_DELIMITERS" @@ -1067,7 +1067,7 @@ register_kinds!(JuliaSyntax, 0, [ # Special tokens "TOMBSTONE" # Empty placeholder for kind to be filled later - "None" # Placeholder; never emitted by lexer + "None" # Never emitted by lexer/parser "EndMarker" # EOF "BEGIN_ERRORS" @@ -1097,6 +1097,7 @@ const _nonunique_kind_names = Set([ K"Whitespace" K"NewlineWs" K"Identifier" + K"Placeholder" K"ErrorEofMultiComment" K"ErrorInvalidNumericConstant" @@ -1169,6 +1170,7 @@ const _token_error_descriptions = Dict{Kind, String}( #------------------------------------------------------------------------------- # Predicates +is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS" is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS" is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" @@ -1177,6 +1179,7 @@ is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") +is_identifier(k) = is_identifier(kind(k)) is_contextual_keyword(k) = is_contextual_keyword(kind(k)) is_error(k) = is_error(kind(k)) is_keyword(k) = is_keyword(kind(k)) diff --git a/src/literal_parsing.jl b/src/literal_parsing.jl index 5a744f97..f2b99b86 100644 --- a/src/literal_parsing.jl +++ b/src/literal_parsing.jl @@ -406,10 +406,8 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) last(srcrange)+1, Diagnostic[]) end return had_error ? ErrorVal() : String(take!(io)) - elseif k == K"true" - return true - elseif k == K"false" - return false + elseif k == K"Bool" + return txtbuf[first(srcrange)] == u8"t" end # TODO: Avoid allocating temporary String here @@ -418,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) parse_int_literal(val_str) elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) - elseif k == K"Identifier" + elseif k == K"Identifier" || k == K"Placeholder" if has_flags(head, RAW_STRING_FLAG) io = IOBuffer() unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) diff --git a/src/parser.jl b/src/parser.jl index 8da46517..49697964 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -2254,7 +2254,7 @@ function parse_try(ps) if peek(ps) == K"else" # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211 # - #v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block))) + #v1.8: try catch ; else end ==> (try (block) (catch □ (block)) (else (block))) else_mark = position(ps) bump(ps, TRIVIA_FLAG) parse_block(ps) @@ -2262,7 +2262,7 @@ function parse_try(ps) #v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y))) emit(ps, else_mark, K"error", error="Expected `catch` before `else`") end - #v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block)))) + #v1.7: try catch ; else end ==> (try (block) (catch □ (block)) (else (error (block)))) min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`") emit(ps, else_mark, K"else") end @@ -2302,10 +2302,10 @@ function parse_catch(ps::ParseState) bump(ps, TRIVIA_FLAG) k = peek(ps) if k in KSet"NewlineWs ;" || is_closing_token(ps, k) - # try x catch end ==> (try (block x) (catch false (block))) - # try x catch ; y end ==> (try (block x) (catch false (block y))) - # try x catch \n y end ==> (try (block x) (catch false (block y))) - bump_invisible(ps, K"false") + # try x catch end ==> (try (block x) (catch □ (block))) + # try x catch ; y end ==> (try (block x) (catch □ (block y))) + # try x catch \n y end ==> (try (block x) (catch □ (block y))) + bump_invisible(ps, K"Placeholder") else # try x catch e y end ==> (try (block x) (catch e (block y))) # try x catch $e y end ==> (try (block x) (catch ($ e) (block y))) diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index af2d3e31..38d853c0 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -125,6 +125,19 @@ byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) sourcefile(node::AbstractSyntaxNode) = node.source +function leaf_string(ex) + if !is_leaf(ex) + throw(ArgumentError("_value_string should be used for leaf nodes only")) + end + k = kind(ex) + value = ex.val + # TODO: Dispatch on kind extension module (??) + return k == K"Placeholder" ? "□"*string(value) : + is_identifier(k) ? string(value) : + value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here + repr(value) +end + function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) fname = filename(node) @@ -134,8 +147,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" end val = node.val - nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" : - isa(val, Symbol) ? string(val) : repr(val) + nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]" treestr = string(indent, nodestr) # Add filename if it's changed from the previous node if fname != current_filename[] @@ -157,8 +169,7 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode) if is_error(node) print(io, "(", untokenize(head(node)), ")") else - val = node.val - print(io, val isa Symbol ? string(val) : repr(val)) + print(io, leaf_string(node)) end else print(io, "(", untokenize(head(node))) diff --git a/src/tokenize.jl b/src/tokenize.jl index de2daa0f..af78bee4 100644 --- a/src/tokenize.jl +++ b/src/tokenize.jl @@ -1319,8 +1319,10 @@ function lex_identifier(l::Lexer, c) if n > MAX_KW_LENGTH emit(l, K"Identifier") + elseif h == _true_hash || h == _false_hash + emit(l, K"Bool") else - emit(l, get(kw_hash, h, K"Identifier")) + emit(l, get(_kw_hash, h, K"Identifier")) end end @@ -1374,8 +1376,6 @@ K"while", K"in", K"isa", K"where", -K"true", -K"false", K"abstract", K"as", @@ -1387,6 +1387,8 @@ K"type", K"var", ] -const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) +const _true_hash = simple_hash("true") +const _false_hash = simple_hash("false") +const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) end # module diff --git a/test/parser.jl b/test/parser.jl index 70842a4d..b4fac482 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -635,18 +635,18 @@ tests = [ "(try (block x) (catch e (block y)) (finally (block z)))" ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => "(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))" - "try x catch end" => "(try (block x) (catch false (block)))" - "try x catch ; y end" => "(try (block x) (catch false (block y)))" - "try x catch \n y end" => "(try (block x) (catch false (block y)))" + "try x catch end" => "(try (block x) (catch □ (block)))" + "try x catch ; y end" => "(try (block x) (catch □ (block y)))" + "try x catch \n y end" => "(try (block x) (catch □ (block y)))" "try x catch e y end" => "(try (block x) (catch e (block y)))" "try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))" "try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))" "try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))" "try x finally y end" => "(try (block x) (finally (block y)))" # v1.8 only - ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))" + ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (block)))" ((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))" - ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))" + ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (error (block))))" # finally before catch :-( "try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))" "try x end" => "(try (block x) (error-t))" diff --git a/test/tokenize.jl b/test/tokenize.jl index eb30370f..8913a20c 100644 --- a/test/tokenize.jl +++ b/test/tokenize.jl @@ -198,9 +198,10 @@ end end @testset "tokenizing true/false literals" begin - @test tok("somtext true", 3).kind == K"true" - @test tok("somtext false", 3).kind == K"false" + @test tok("somtext true", 3).kind == K"Bool" + @test tok("somtext false", 3).kind == K"Bool" @test tok("somtext tr", 3).kind == K"Identifier" + @test tok("somtext truething", 3).kind == K"Identifier" @test tok("somtext falsething", 3).kind == K"Identifier" end @@ -962,9 +963,6 @@ const all_kws = Set([ "primitive", "type", "var", - # Literals - "true", - "false", # Word-like operators "in", "isa", @@ -974,14 +972,14 @@ const all_kws = Set([ function check_kw_hashes(iter) for cs in iter str = String([cs...]) - if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash) + if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash) @test str in all_kws end end end @testset "simple_hash" begin - @test length(all_kws) == length(Tokenize.kw_hash) + @test length(all_kws) == length(Tokenize._kw_hash) @testset "Length $len keywords" for len in 1:5 check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...))