Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefix automaton -- julia bug? #75

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ version = "0.4.0"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
MacroTools = "0.5"
ProgressMeter = "1"
SparseArrays = "1.6"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Documenter"]
9 changes: 6 additions & 3 deletions src/Automata/Automata.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
module Automata

import ..KnuthBendix
import ..KnuthBendix: AbstractWord, RewritingOrdering, RewritingSystem, Rule
import ..KnuthBendix: alphabet, rules, word_type
import ..KnuthBendix:
AbstractWord, RewritingOrdering, AbstractRewritingSystem, Rule
import ..KnuthBendix: alphabet, ordering, rules, word_type

export IndexAutomaton
export IndexAutomaton, PrefixAutomaton

include("states.jl")
include("interface.jl")
include("index_automaton.jl")
include("rebuilding_idxA.jl")

include("prefix_automaton.jl")

include("backtrack.jl")

end # of module Automata
84 changes: 45 additions & 39 deletions src/Automata/index_automaton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,57 +26,29 @@ mutable struct IndexAutomaton{S,O<:RewritingOrdering} <: Automaton{S}
end

initial(idxA::IndexAutomaton) = idxA.initial
KnuthBendix.ordering(idxA::IndexAutomaton) = idxA.ordering

hasedge(::IndexAutomaton, ::State, ::Integer) = true

addedge!(idxA::IndexAutomaton, src::State, dst::State, label) = src[label] = dst

isfail(idxA::IndexAutomaton, σ::State) = σ === idxA.fail
isaccepting(idxA::IndexAutomaton, σ::State) = !isdefined(σ, :value)

signature(idxA::IndexAutomaton, σ::State) = id(σ)
isaccepting(::IndexAutomaton, σ::State) = !isdefined(σ, :value)

Base.isempty(idxA::IndexAutomaton) = degree(initial(idxA)) == 0
hasedge(::IndexAutomaton, ::State, ::Integer) = true

function KnuthBendix.word_type(::Type{<:IndexAutomaton{S}}) where {S}
return eltype(valtype(S))
function addedge!(idxA::IndexAutomaton, src::State, dst::State, label)
src[label] = dst
return idxA
end

Base.Base.@propagate_inbounds function trace(
function trace(
label::Integer,
::IndexAutomaton,
σ::State,
)
return σ[label]
end

function IndexAutomaton(rws::RewritingSystem{W}) where {W}
if !KnuthBendix.isreduced(rws)
throw(
ArgumentError(
"""`IndexAutomaton` can be constructed from reduced rewriting systems only.
Call `KnuthBendix.reduce!(rws)` and try again.""",
),
)
end

id = @view one(W)[1:0]
S = State{typeof(id),UInt32,eltype(rules(rws))}
ord = KnuthBendix.ordering(rws)
A = alphabet(ord)
fail = S(Vector{S}(undef, length(A)), id, 0)
α = State(fail, id, 0)
Base.isempty(idxA::IndexAutomaton) = degree(initial(idxA)) == 0

idxA = IndexAutomaton(ord, α, fail, Vector{typeof(α)}[])
idxA = self_complete!(idxA, fail, override = true)
idxA = direct_edges!(idxA, rules(rws))
idxA = skew_edges!(idxA)
signature(::IndexAutomaton, σ::State) = id(σ)

return idxA
end

KnuthBendix.isreduced(idxA::Automata.IndexAutomaton) = true
# construction/modification

function direct_edges!(idxA::IndexAutomaton, rwrules)
for (idx, rule) in enumerate(rwrules)
Expand Down Expand Up @@ -179,11 +151,45 @@ function Base.show(io::IO, idxA::IndexAutomaton)
count(st -> !Automata.isaccepting(idxA, st), states) for
states in idxA.states
]
ord = KnuthBendix.ordering(idxA)
ord = ordering(idxA)
A = alphabet(ord)
println(io, "index automaton over $(typeof(ord)) with $(length(A)) letters")
nstates = sum(length, idxA.states)
nstates = sum(length, idxA.states) + 1 # the initial one
println(io, " • ", nstates, " state" * (nstates == 1 ? "" : "s"))
print(io, " • ", sum(rules_count), " non-accepting states (rw rules)")
return
end

# for using IndexAutomaton as rewriting struct in KnuthBendix
KnuthBendix.ordering(idxA::IndexAutomaton) = idxA.ordering

function KnuthBendix.word_type(::Type{<:IndexAutomaton{S}}) where {S}
return eltype(valtype(S))
end

function IndexAutomaton(rws::AbstractRewritingSystem{W}) where {W}
if !KnuthBendix.isreduced(rws)
throw(
ArgumentError(
"""`IndexAutomaton` can be constructed from reduced rewriting systems only.
Call `KnuthBendix.reduce!(rws)` and try again.""",
),
)
end

id = @view one(W)[1:0]
S = State{typeof(id),UInt32,eltype(rules(rws))}
ord = ordering(rws)
A = alphabet(ord)
fail = S(Vector{S}(undef, length(A)), id, 0)
α = State(fail, id, 0)

idxA = IndexAutomaton(ord, α, fail, Vector{typeof(α)}[])
idxA = self_complete!(idxA, fail, override = true)
idxA = direct_edges!(idxA, rules(rws))
idxA = skew_edges!(idxA)

return idxA
end

KnuthBendix.isreduced(::IndexAutomaton) = true
8 changes: 4 additions & 4 deletions src/Automata/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ returned.
for (i, l) in enumerate(w)
if hasedge(A, σ, l)
τ = trace(l, A, σ)
isfail(A, τ) && return i - 1, σ
σ = τ
else
return return i - 1, σ
end
if isfail(A, τ)
return i - 1, σ
end
σ = τ
end
return length(w), σ
end
Expand Down
177 changes: 177 additions & 0 deletions src/Automata/prefix_automaton.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
using SparseArrays

struct PrefixAutomaton{O<:RewritingOrdering,V} <: Automaton{Int32}
ordering::O
transitions::Vector{SparseVector{Int32,UInt32}}
__storage::BitSet
rwrules::V
# 1 is the initial state
# 0 is the fail state
# negative values in transitions indicate indices to values stored in rwrules

function PrefixAutomaton(
ordering::RewritingOrdering,
rules::V,
) where {V<:AbstractVector}
transitions = Vector{SparseVector{Int32,UInt32}}(undef, 0)
__storage = BitSet()
pfxA = new{typeof(ordering),V}(ordering, transitions, __storage, rules)
_ = addstate!(pfxA)
for (i, rule) in pairs(rules)
add_direct_path!(pfxA, rule.lhs, -i)
end
return pfxA
end
end

initial(::PrefixAutomaton) = one(Int32)
isfail(::PrefixAutomaton, σ::Integer) = iszero(σ)
isaccepting(pfx::PrefixAutomaton, σ::Integer) = 1 ≤ σ ≤ length(pfx.transitions)

hasedge(pfxA::PrefixAutomaton, σ::Integer, lab) = pfxA.transitions[σ][lab] ≠ 0

function addedge!(
pfxA::PrefixAutomaton,
src::Integer,
dst::Integer,
label,
)
pfxA.transitions[src][label] = dst
return pfxA
end

function trace(label::Integer, pfxA::PrefixAutomaton, σ::Integer)
return pfxA.transitions[σ][label]
end

function Base.isempty(pfxA::PrefixAutomaton)
return all(iszero, pfxA.transitions[initial(pfxA)])
end

function Base.empty!(pfxA::PrefixAutomaton)
union!(pfxA.__storage, 2:length(pfxA.transitions))
pfxA.transitions[1] .= 0
return pfxA
end

# construction/modification

function addstate!(pfxA::PrefixAutomaton)
if !isempty(pfxA.__storage)
st = popfirst!(pfxA.__storage)
pfxA.transitions[st] .= 0
# dropzeros!(pfxA.transitions[st])
return st
else
l = length(alphabet(ordering(pfxA)))
vec = SparseVector(l, UInt32[], Int32[])
push!(pfxA.transitions, vec)
return length(pfxA.transitions)
end
end

function add_direct_path!(
pfxA::PrefixAutomaton,
lhs::AbstractWord,
val::Integer,
)
@assert val ≤ 0
σ = initial(pfxA)
for (i, letter) in pairs(lhs)
τ = trace(letter, pfxA, σ)
# @info "idx = $i" letter τ
if i == lastindex(lhs)
addedge!(pfxA, σ, val, letter)
return true, pfxA
elseif isfail(pfxA, τ)
τ = addstate!(pfxA)
addedge!(pfxA, σ, τ, letter)
end
σ = τ
if !isaccepting(pfxA, σ)
@debug "prefix of length $i of $lhs is aready a lhs of a rule" σ

# this may happen if the rule.lhs we push into pfxA
# has a prefix that is reducible; then we return false,
# and we don't enlarge pfxA
return false, pfxA
end
end
@error "unintended exit"
return false, pfxA
end

function remove_direct_path!(pfxA::PrefixAutomaton, lhs::AbstractWord)
σ = initial(pfxA)
on_leaf = false
leaf_start = (σ, 0)

for (i, letter) in enumerate(lhs)
# analyze edge with (src=σ, label=letter, dst=τ)
τ = trace(letter, pfxA, σ)
isfail(pfxA, τ) && return pfxA
if !isaccepting(pfxA, τ)
if i == length(lhs)
break # we reached the leaf corresponding to lhs
end
# reached a leaf node before lhs is completed
# i.e. lhs does not define a leaf, so there's nothing to remove
return pfxA
end
if degree(pfxA, τ) > 1
on_leaf = false
elseif !on_leaf
on_leaf = true
leaf_start = (σ, i)
end
σ = τ
end

σ, i = leaf_start
for letter in @view lhs[i+1:end-1]
# we're on the "long-leaf" part
τ = trace(letter, pfxA, σ)
# by the early exit above we know there's something to remove
@assert isaccepting(pfxA, τ)
pfxA.transitions[σ][letter] = 0
push!(pfxA.__storage, τ)
σ = τ
end

return pfxA
end

function Base.show(io::IO, ::MIME"text/plain", pfxA::PrefixAutomaton)
ord = ordering(pfxA)
A = alphabet(ord)
println(
io,
"prefix automaton over $(typeof(ord)) with $(length(A)) letters",
)
accept_states = length(pfxA.transitions) - length(pfxA.__storage)
nrules = mapreduce(+, pairs(pfxA.transitions)) do (i, t)
return i in pfxA.__storage ? 0 : sum(<(0), t)
end
println(io, " • $(accept_states+nrules) states")
return print(io, " • $(nrules) non-accepting states (rw rules)")
end

function Base.push!(pfxA::PrefixAutomaton, rule::KnuthBendix.Rule)
n = length(pfxA.rwrules) + 1
added, pfxA = add_direct_path!(pfxA, rule.lhs, -n)
if added
push!(pfxA.rwrules, rule)
end
return pfxA
end

# for using IndexAutomaton as rewriting struct in KnuthBendix
KnuthBendix.ordering(pfxA::PrefixAutomaton) = pfxA.ordering

function KnuthBendix.word_type(::Type{<:PrefixAutomaton{O,V}}) where {O,V}
return KnuthBendix.word_type(eltype(V))
end

function PrefixAutomaton(rws::AbstractRewritingSystem)
return PrefixAutomaton(ordering(rws), KnuthBendix.__rawrules(rws))
end
4 changes: 2 additions & 2 deletions src/Automata/rebuilding_idxA.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function _rebuild!(idxA::IndexAutomaton, rws::RewritingSystem)
function _rebuild!(idxA::IndexAutomaton, rws::AbstractRewritingSystem)
# Most of the information in idxA can be reused;
# however here we just rebuild it from scratch
at = IndexAutomaton(rws)
Expand All @@ -8,7 +8,7 @@ function _rebuild!(idxA::IndexAutomaton, rws::RewritingSystem)
return idxA
end

function rebuild!(idxA::IndexAutomaton, rws::RewritingSystem)
function rebuild!(idxA::IndexAutomaton, rws::AbstractRewritingSystem)
# mark all states as not up to date
for states in idxA.states
for σ in states
Expand Down
3 changes: 3 additions & 0 deletions src/KnuthBendix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ export Alphabet, Word, RewritingSystem
export LenLex, WreathOrder, Recursive, WeightedLex
export alphabet, isconfluent, ordering, knuthbendix

const Stack{W} = Vector{Tuple{W,W}}

include("utils/packed_vector.jl")
include("Words/Words.jl")
using .Words
include("buffer_pair.jl")
Expand Down
2 changes: 1 addition & 1 deletion src/confluence_check.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function check_confluence(
is_reduced = isreduced(rws),
)
W = word_type(rws)
stack = Vector{Tuple{W,W}}()
stack = Stack{W}()
return check_confluence!(stack, rws; is_reduced = is_reduced)
end

Expand Down
Loading
Loading