Skip to content

Commit

Permalink
Implement char_range() to respect string indices
Browse files Browse the repository at this point in the history
Also widen `byte_range()` signatures to accept other integer types
  • Loading branch information
c42f committed Aug 8, 2024
1 parent d8796c6 commit 1a8c666
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ JuliaSyntax.last_byte
JuliaSyntax.filename
JuliaSyntax.source_line
JuliaSyntax.source_location
JuliaSyntax.char_range
JuliaSyntax.sourcetext
JuliaSyntax.highlight
```
Expand Down
2 changes: 1 addition & 1 deletion src/JuliaSyntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export parsestmt, parseall, parseatom
# Tokenization
export tokenize, Token, untokenize
# Source file handling. See also
# highlight() sourcetext() source_line() source_location()
# highlight() sourcetext() source_line() source_location() char_range()
export SourceFile
# Expression heads/kinds. See also
# flags() and related predicates.
Expand Down
19 changes: 16 additions & 3 deletions src/source_files.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,24 @@ end
"""
byte_range(x)
Return the range of bytes which `x` covers in the source text.
Return the range of bytes which `x` covers in the source text. See also
[`char_range`](@ref).
"""
function byte_range
end

"""
char_range(x)
Compute the range in *character indices* over the source text for syntax object
`x`. If you want to index the source string you need this, rather than
[`byte_range`](@ref).
"""
function char_range(x)
br = byte_range(x)
first(br):thisind(sourcefile(x), last(br))
end

"""
first_byte(x)
Expand Down Expand Up @@ -232,11 +245,11 @@ function Base.view(source::SourceFile, rng::AbstractUnitRange)
SubString(source.code, i, j)
end

function Base.getindex(source::SourceFile, i::Int)
function Base.getindex(source::SourceFile, i::Integer)
source.code[i - source.byte_offset]
end

function Base.thisind(source::SourceFile, i::Int)
function Base.thisind(source::SourceFile, i::Integer)
thisind(source.code, i - source.byte_offset) + source.byte_offset
end

Expand Down
9 changes: 9 additions & 0 deletions test/syntax_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@
@test t.position == 13
@test child(t,1).position == 19
@test child(t,1).val == :b

# Unicode character ranges
src = "ab + αβ"
t = parsestmt(SyntaxNode, src)
@test char_range(t[1]) == 1:2
@test char_range(t[2]) == 4:4
@test char_range(t[3]) == 6:8
# conversely, β takes two bytes so char_range(t[3]) != byte_range(t[3])
@test byte_range(t[3]) == 6:9
end

@testset "SyntaxNode pretty printing" begin
Expand Down
4 changes: 3 additions & 1 deletion test/test_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ using .JuliaSyntax:
highlight,
tokenize,
untokenize,
filename
filename,
byte_range,
char_range

if VERSION < v"1.6"
# Compat stuff which might not be in Base for older versions
Expand Down

0 comments on commit 1a8c666

Please sign in to comment.