From 0a51f3d3135fbf63277860638c7f24421e04d562 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 15:25:00 +1000 Subject: [PATCH 1/3] Clean up and document syntax tree child access API Here I commit to a more consistent but simpler child access API for syntax trees, as informed by the JuliaLowering work so far: * `is_leaf(node)` is given a precise definition (previously `!haschildren()` - but that had issues - see #483) * `children(node)` returns the child list, or `nothing` if there are no children. The `nothing` might be seen as inconvenient, but mapping across the children of a leaf node is probably an error and one should probably branch on `is_leaf` first. * `numchildren(node)` is documented * `node[i]`, `node[i:j]` are documented to index into the child list We distinguish `GreenNode` and its implementation of `span` from `SyntaxNode` and its implementation of `byte_range` and `sourcetext` - these seem to just have very different APIs, at least as of now. I've deleted the questionable overloads of multidimensional `getindex` and the `child` function in favor of single dimensional getindex. I don't know whether anyone ever ended up using these. But I didn't and they didn't seem useful+consistent enough to keep the complexity. I've kept setindex! for now, to set a child of a `SyntaxNode`. Though I'm not sure this is a good idea to support by default. --- docs/src/api.md | 39 ++++++++++++++-- src/green_tree.jl | 50 +++++++++++++++++---- src/hooks.jl | 7 +-- src/syntax_tree.jl | 107 ++++++++++++++------------------------------ test/green_node.jl | 9 ++++ test/runtests.jl | 6 --- test/syntax_tree.jl | 44 +++++++++--------- test/test_utils.jl | 5 ++- 8 files changed, 149 insertions(+), 118 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index da9b81e1..9fa1a37f 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -124,13 +124,46 @@ JuliaSyntax.SHORT_FORM_FUNCTION_FLAG ## Syntax trees -Syntax tree types: +Access to the children of a tree node is provided by the functions + +```@docs +JuliaSyntax.is_leaf +JuliaSyntax.numchildren +JuliaSyntax.children +``` + +For convenient access to the children, we also provide `node[i]`, `node[i:j]` +and `node[begin:end]` by implementing `Base.getindex()`, `Base.firstindex()` and +`Base.lastindex()`. We choose to return a view from `node[i:j]` to make it +non-allocating. + +Tree traversal is supported by using these functions along with the predicates +such as [`kind`](@ref) listed above. + +### Trees referencing the source ```@docs JuliaSyntax.SyntaxNode -JuliaSyntax.GreenNode ``` -Functions applicable to syntax trees include everything in the sections on +Functions applicable to `SyntaxNode` include everything in the sections on heads/kinds as well as the accessor functions in the source code handling section. + +### Relocatable syntax trees + +[`GreenNode`](@ref) is a special low level syntax tree: it's "relocatable" in +the sense that it doesn't carry an absolute position in the source code or even +a reference to the source text. This allows it to be reused for incremental +parsing, but does make it a pain to work with directly! + +```@docs +JuliaSyntax.GreenNode +``` + +Green nodes only have a relative position so implement `span()` instead of +`byte_range()`: + +```@docs +JuliaSyntax.span +``` diff --git a/src/green_tree.jl b/src/green_tree.jl index 8a4b4634..be55e4f6 100644 --- a/src/green_tree.jl +++ b/src/green_tree.jl @@ -23,24 +23,58 @@ As implementation choices, we choose that: struct GreenNode{Head} head::Head span::UInt32 - args::Union{Nothing,Vector{GreenNode{Head}}} + children::Union{Nothing,Vector{GreenNode{Head}}} end -function GreenNode(head::Head, span::Integer, args=nothing) where {Head} - GreenNode{Head}(head, span, args) +function GreenNode(head::Head, span::Integer, children=nothing) where {Head} + GreenNode{Head}(head, span, children) end # Accessors / predicates -is_leaf(node::GreenNode) = isnothing(node.args) -children(node::GreenNode) = isnothing(node.args) ? () : node.args -span(node::GreenNode) = node.span +is_leaf(node::GreenNode) = isnothing(node.children) +children(node::GreenNode) = node.children +numchildren(node::GreenNode) = isnothing(node.children) ? 0 : length(node.children) head(node::GreenNode) = node.head +""" + span(node) + +Get the number of bytes this node covers in the source text. +""" +span(node::GreenNode) = node.span + +Base.getindex(node::GreenNode, i::Int) = children(node)[i] +Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::GreenNode) = 1 +Base.lastindex(node::GreenNode) = length(children(node)) + +""" +Get absolute position and span of the child of `node` at the given tree `path`. +""" +function child_position_span(node::GreenNode, path::Int...) + n = node + p = 1 + for index in path + cs = children(n) + for i = 1:index-1 + p += span(cs[i]) + end + n = cs[index] + end + return n, p, n.span +end + +function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) + _, p, span = child_position_span(node, path...) + q = p + span - 1 + highlight(io, source, p:q; kws...) +end + Base.summary(node::GreenNode) = summary(node.head) -Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.args), h) +Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.children), h) function Base.:(==)(n1::GreenNode, n2::GreenNode) - n1.head == n2.head && n1.span == n2.span && n1.args == n2.args + n1.head == n2.head && n1.span == n2.span && n1.children == n2.children end # Pretty printing diff --git a/src/hooks.jl b/src/hooks.jl index 97189321..228f14b0 100644 --- a/src/hooks.jl +++ b/src/hooks.jl @@ -45,11 +45,8 @@ function _incomplete_tag(n::SyntaxNode, codelen) return :none end end - if kind(c) == K"error" && begin - cs = children(c) - length(cs) > 0 - end - for cc in cs + if kind(c) == K"error" && numchildren(c) > 0 + for cc in children(c) if kind(cc) == K"error" return :other end diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index 02225795..aa3d4009 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -106,10 +106,36 @@ function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, end end +""" + is_leaf(node) + +Determine whether the node is a leaf of the tree. In our trees a "leaf" +corresponds to a single token in the source text. +""" is_leaf(node::TreeNode) = node.children === nothing -children(node::TreeNode) = (c = node.children; return c === nothing ? () : c) + +""" + children(node) + +Return an iterable list of children for the node. For leaves, return `nothing`. +""" +children(node::TreeNode) = node.children + +""" + numchildren(node) + +Return `length(children(node))` but possibly computed in a more efficient way. +""" numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children)) +Base.getindex(node::AbstractSyntaxNode, i::Int) = children(node)[i] +Base.getindex(node::AbstractSyntaxNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::AbstractSyntaxNode) = 1 +Base.lastindex(node::AbstractSyntaxNode) = length(children(node)) + +function Base.setindex!(node::SN, x::SN, i::Int) where {SN<:AbstractSyntaxNode} + children(node)[i] = x +end """ head(x) @@ -217,10 +243,12 @@ function Base.copy(node::TreeNode) # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar # copy "un-parents" the top-level `node` that you're copying newnode = typeof(node)(nothing, is_leaf(node) ? nothing : typeof(node)[], copy(node.data)) - for child in children(node) - newchild = copy(child) - newchild.parent = newnode - push!(newnode, newchild) + if !is_leaf(node) + for child in children(node) + newchild = copy(child) + newchild.parent = newnode + push!(newnode, newchild) + end end return newnode end @@ -235,71 +263,4 @@ function build_tree(::Type{SyntaxNode}, stream::ParseStream; SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens) end -#------------------------------------------------------------------------------- -# Tree utilities - -""" - child(node, i1, i2, ...) - -Get child at a tree path. If indexing accessed children, it would be -`node[i1][i2][...]` -""" -function child(node, path::Integer...) - n = node - for index in path - n = children(n)[index] - end - return n -end - -function setchild!(node::SyntaxNode, path, x) - n1 = child(node, path[1:end-1]...) - n1.children[path[end]] = x -end - -# We can overload multidimensional Base.getindex / Base.setindex! for node -# types. -# -# The justification for this is to view a tree as a multidimensional ragged -# array, where descending depthwise into the tree corresponds to dimensions of -# the array. -# -# However... this analogy is only good for complete trees at a given depth (= -# dimension). But the syntax is oh-so-handy! -function Base.getindex(node::Union{SyntaxNode,GreenNode}, path::Int...) - child(node, path...) -end -function Base.lastindex(node::Union{SyntaxNode,GreenNode}) - length(children(node)) -end - -function Base.setindex!(node::SyntaxNode, x::SyntaxNode, path::Int...) - setchild!(node, path, x) -end - -""" -Get absolute position and span of the child of `node` at the given tree `path`. -""" -function child_position_span(node::GreenNode, path::Int...) - n = node - p = 1 - for index in path - cs = children(n) - for i = 1:index-1 - p += span(cs[i]) - end - n = cs[index] - end - return n, p, n.span -end - -function child_position_span(node::SyntaxNode, path::Int...) - n = child(node, path...) - n, n.position, span(n) -end - -function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) - _, p, span = child_position_span(node, path...) - q = p + span - 1 - highlight(io, source, p:q; kws...) -end +@deprecate haschildren(x) !is_leaf(x) false diff --git a/test/green_node.jl b/test/green_node.jl index 020c212d..c3c4da40 100644 --- a/test/green_node.jl +++ b/test/green_node.jl @@ -13,6 +13,15 @@ SyntaxHead(K"Identifier", 0x0000) ] + @test numchildren(t) == 5 + @test !is_leaf(t) + @test is_leaf(t[1]) + + @test t[1] === children(t)[1] + @test t[2:4] == [t[2],t[3],t[4]] + @test firstindex(t) == 1 + @test lastindex(t) == 5 + t2 = parsestmt(GreenNode, "aa + b") @test t == t2 @test t !== t2 diff --git a/test/runtests.jl b/test/runtests.jl index 0fc08d08..3fd62278 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,12 +1,6 @@ using JuliaSyntax using Test -using JuliaSyntax: SourceFile - -using JuliaSyntax: GreenNode, SyntaxNode, - flags, EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, - children, child, setchild!, SyntaxHead - include("test_utils.jl") include("test_utils_tests.jl") include("fuzz_test.jl") diff --git a/test/syntax_tree.jl b/test/syntax_tree.jl index 97124384..f647f1ae 100644 --- a/test/syntax_tree.jl +++ b/test/syntax_tree.jl @@ -3,33 +3,35 @@ tt = "a*b + c" t = parsestmt(SyntaxNode, tt) - @test sourcetext(child(t, 1)) == "a*b" - @test sourcetext(child(t, 1, 1)) == "a" - @test sourcetext(child(t, 1, 2)) == "*" - @test sourcetext(child(t, 1, 3)) == "b" - @test sourcetext(child(t, 2)) == "+" - @test sourcetext(child(t, 3)) == "c" + @test sourcetext(t[1]) == "a*b" + @test sourcetext(t[1][1]) == "a" + @test sourcetext(t[1][2]) == "*" + @test sourcetext(t[1][3]) == "b" + @test sourcetext(t[2]) == "+" + @test sourcetext(t[3]) == "c" - @test JuliaSyntax.first_byte(child(t, 2)) == findfirst(==('+'), tt) - @test JuliaSyntax.source_line(child(t, 3)) == 1 - @test source_location(child(t, 3)) == (1, 7) + @test JuliaSyntax.first_byte(t[2]) == findfirst(==('+'), tt) + @test JuliaSyntax.source_line(t[3]) == 1 + @test source_location(t[3]) == (1, 7) # Child indexing - @test t[1] === child(t, 1) - @test t[1, 1] === child(t, 1, 1) - @test t[end] === child(t, 3) - # Unfortunately, can't make t[1, end] work - # as `lastindex(t, 2)` isn't well defined + @test t[end] === t[3] + @test sourcetext.(t[2:3]) == ["+", "c"] + @test sourcetext.(t[2:end]) == ["+", "c"] + @test firstindex(t) == 1 + @test lastindex(t) == 3 + @test !is_leaf(t) + @test is_leaf(t[3]) @test sprint(show, t) == "(call-i (call-i a * b) + c)" @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) == "(call-i (call-i a::Identifier *::* b::Identifier) +::+ c::Identifier)" - @test sprint(highlight, child(t, 1, 3)) == "a*b + c\n# ╙" + @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙" # Pass-through field access - node = child(t, 1, 1) + node = t[1][1] @test node.val === :a # The specific error text has evolved over Julia versions. Check that it involves `SyntaxData` and immutability e = try node.val = :q catch e e end @@ -40,20 +42,20 @@ ct = copy(t) ct.data = nothing @test ct.data === nothing && t.data !== nothing - @test child(ct, 1).parent === ct - @test child(ct, 1) !== child(t, 1) + @test ct[1].parent === ct + @test ct[1] !== t[1] node = parsestmt(SyntaxNode, "f()") push!(node, parsestmt(SyntaxNode, "x")) @test length(children(node)) == 2 node[2] = parsestmt(SyntaxNode, "y") - @test sourcetext(child(node, 2)) == "y" + @test sourcetext(node[2]) == "y" # SyntaxNode with offsets t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) @test t.position == 13 - @test child(t,1).position == 19 - @test child(t,1).val == :b + @test t[1].position == 19 + @test t[1].val == :b # Unicode character ranges src = "ab + αβ" diff --git a/test/test_utils.jl b/test/test_utils.jl index 32a1df94..7553bf1c 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -1,6 +1,6 @@ using Test -# We need a relative include here as JuliaSyntax my come from Base. +# We need a relative include here as JuliaSyntax may come from Base. using .JuliaSyntax: # Parsing ParseStream, @@ -23,14 +23,15 @@ using .JuliaSyntax: # Node inspection kind, flags, + EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, head, span, SyntaxHead, is_trivia, sourcetext, is_leaf, + numchildren, children, - child, fl_parseall, fl_parse, highlight, From bafc712102d6b0bf0e5b7885d83b53d65c670b61 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Wed, 7 Aug 2024 17:20:23 +1000 Subject: [PATCH 2/3] Mark public symbols with `public` keyword --- src/JuliaSyntax.jl | 84 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl index 4e2144ee..4718824b 100644 --- a/src/JuliaSyntax.jl +++ b/src/JuliaSyntax.jl @@ -1,23 +1,81 @@ module JuliaSyntax -# Conservative list of exports - only export the most common/useful things -# here. +macro _public(syms) + if VERSION >= v"1.11" + names = syms isa Symbol ? [syms] : syms.args + esc(Expr(:public, names...)) + else + nothing + end +end + +# Public API, in the order of docs/src/api.md + +# Parsing. +export parsestmt, + parseall, + parseatom + +@_public parse!, + ParseStream, + build_tree -# Parsing. See also -# parse!(), ParseStream -export parsestmt, parseall, parseatom # Tokenization -export tokenize, Token, untokenize -# Source file handling. See also -# highlight() sourcetext() source_line() source_location() char_range() +@_public tokenize, + Token, + untokenize + +# Source file handling +@_public sourcefile, + byte_range, + char_range, + first_byte, + last_byte, + filename, + source_line, + source_location, + sourcetext, + highlight + export SourceFile -# Expression heads/kinds. See also -# flags() and related predicates. -export @K_str, kind, head -# Syntax tree types. See also -# GreenNode +@_public source_line_range + +# Expression predicates, kinds and flags +export @K_str, kind +@_public Kind + +@_public flags, + SyntaxHead, + head, + is_trivia, + is_prefix_call, + is_infix_op_call, + is_prefix_op_call, + is_postfix_op_call, + is_dotted, + is_suffixed, + is_decorated, + numeric_flags, + has_flags, + TRIPLE_STRING_FLAG, + RAW_STRING_FLAG, + PARENS_FLAG, + COLON_QUOTE, + TOPLEVEL_SEMICOLONS_FLAG, + MUTABLE_FLAG, + BARE_MODULE_FLAG, + SHORT_FORM_FUNCTION_FLAG + +# Syntax trees +@_public is_leaf, + numchildren, + children + export SyntaxNode +@_public GreenNode, + span + # Helper utilities include("utils.jl") From b644c879ff54e2a0accf23b60e66f892f457287b Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 8 Aug 2024 08:47:26 +1000 Subject: [PATCH 3/3] fixup! Mark public symbols with `public` keyword --- src/JuliaSyntax.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl index 4718824b..9afff872 100644 --- a/src/JuliaSyntax.jl +++ b/src/JuliaSyntax.jl @@ -21,7 +21,7 @@ export parsestmt, build_tree # Tokenization -@_public tokenize, +export tokenize, Token, untokenize