Merge pull request #73 from kalmarek/mk/rename_index_at_terminals

rename index automaton terminals
kalmarek · Jul 22, 2024 · f8e65d9 · f8e65d9
2 parents 3bef24c + ccace0b
commit f8e65d9
Show file tree

Hide file tree

Showing 13 changed files with 93 additions and 63 deletions.
diff --git a/docs/src/KB_implementations.md b/docs/src/KB_implementations.md
@@ -20,6 +20,7 @@ CurrentModule = KnuthBendix
 
 ```@docs
 knuthbendix
+Settings
 ```
 
 ## Knuth-Bendix on Monoids and Automatic Groups (`kbmag`)

diff --git a/docs/src/automata.md b/docs/src/automata.md
@@ -9,7 +9,7 @@ Automaton
 initial
 hasedge
 isfail
-isterminal
+isaccepting
 trace
 
 ```

diff --git a/docs/src/index_automaton.md b/docs/src/index_automaton.md
@@ -2,6 +2,6 @@
 CurrentModule = KnuthBendix.Automata
 ```
 
-<!-- ```@docs
+```@docs
 IndexAutomaton
-``` -->
+```
diff --git a/docs/src/knuthbendix_idxA.md b/docs/src/knuthbendix_idxA.md
@@ -6,7 +6,7 @@ CurrentModule = KnuthBendix
 
 Major speedups in rewriting can be obtained by using a specialized data
 structure for rewriting system. The structure is known in the contex[^Sims1994]
-of Knuth-Bendix completion as an index automaton, but a more widely name for
+of Knuth-Bendix completion as an [index automaton](@ref IndexAutomaton), but a more widely name for
 is the Aho-Corasik automaton[^Aho1975].
 This automaton provides an optimal complexity for string searching in a corpus,
 namely, once the automaton is built, the searching for the left hand sides of
@@ -15,7 +15,7 @@ rewritten. Thus we can obtain rewrites in time independent of the size of the
 rewriting system.
 
 To learn more about fast rewriting using the automaton see
-[Index automaton rewriting](@ref "Index automaton").
+[Index automaton rewriting](@ref).
 
 ```@docs
 KBIndex
@@ -85,7 +85,7 @@ different pairs of rules `(ri, rj)`.
 
 ## Backtrack search and the test for (local) confluence
 
-Another very important feature of `IndexAutomaton` is that it allows us to test
+Another very important feature of [`IndexAutomaton`](@ref) is that it allows us to test
 cheaply for confluence. Naively we need to check all pairs of rules for their
 suffix-prefix intersections and resolve the potentially critical pairs. However
 if we have an `idxA::IndexAutomaton` at our disposal checking for confluence

diff --git a/docs/src/rewriting.md b/docs/src/rewriting.md
@@ -74,7 +74,7 @@ The naive rewriting with  a rewriting system is therefore in the worst case
 **proportional** to the total size of the whole `rws` which makes it a very
 inefficient rewriting strategy.
 
-## Index automaton
+## Index automaton rewriting
 
 ```@docs
 rewrite!(::AbstractWord, ::AbstractWord, ::Automata.IndexAutomaton; history)
@@ -84,7 +84,7 @@ In practice the complexity of building and
 maintaining `idxA` synchronized with `ℒ` overwhelms gains made in rewriting
 (to construct `idxA` one need to _reduce_ `rws` first which is `O(N²)` (??)).
 
-## Non-deterministic prefix automaton
+## Prefix automaton rewriting
 
 Rewriting with a non-deterministic prefix automaton `pfxA` traces the whole
 set of paths in `pfxA` which are determined by `w`. Since `pfxA` contains an
@@ -103,7 +103,7 @@ This rewriting can be also understood differently: given the non-deterministic
 automaton `pfxA` one could determinize it through power-set construction and
 then trace deterministicaly in the automaton whose states are subsets of states
 of `pfxA`. Here we do it without realizing the power-set explicitly and we are
-tracing through procedute described in Sims as _accessible subset construction_.
+tracing through procedure described in Sims as _accessible subset construction_.
 
 In practice the history consists of the subsets of states (of `pfxA`) which are
 stored in a contiguous array and an additional vector of indices marking the

diff --git a/src/Automata/backtrack.jl b/src/Automata/backtrack.jl
@@ -177,19 +177,19 @@ function (::ConfluenceOracle)(bts::BacktrackSearch)
     current_state = last(bts.history)
     path_len = length(signature(bts.automaton, current_state))
 
-    terminal = isterminal(bts.automaton, current_state)
+    irreducible = isaccepting(bts.automaton, current_state)
     skew_path = path_len < length(bts.history)
 
     # backtrack when the depth of search exceeds the length of the signature of
     # the last step
     # Equivalently: the length of completed word is greater or equal to
     # length(lhs) so that the suffix contains the whole signature which means
     # that the overlap of the initial word and the suffix is empty.
-    # OR we reached a terminal state
-    bcktrck = terminal || skew_path
+    # OR we reached a non-accepting state
+    bcktrck = !irreducible || skew_path
 
-    # we return only the terminal states
-    rtrn = terminal && !skew_path
+    # we return only the non-accepting states
+    rtrn = !irreducible && !skew_path
 
     return bcktrck, rtrn
 end
@@ -233,16 +233,16 @@ return_value(::LoopSearchOracle, bts::BacktrackSearch) = last(bts.history)
 
 function (oracle::LoopSearchOracle)(bts::BacktrackSearch)
     current_state = last(bts.history)
-    # backtrack on terminal states (leafs)
-    bcktrck = isterminal(bts.automaton, current_state)
+    # backtrack on non-accepting states (leafs)
+    bcktrck = !isaccepting(bts.automaton, current_state)
     if !bcktrck
         oracle.n_visited += 1
         oracle.max_depth = max(oracle.max_depth, length(bts.history) - 1)
     end
 
     # return when loop is found
     rtrn = findfirst(==(current_state), bts.history) ≠ lastindex(bts.history)
-    # the loop can be read of bs.tape or stack returned by Base.iterate(bts)
+    # the loop can be read of bts.history or stack returned by Base.iterate(bts)
 
     return bcktrck, rtrn
 end
@@ -278,7 +278,7 @@ end
 
 function (oracle::IrreducibleWordsOracle)(bts::BacktrackSearch)
     current_state = last(bts.history)
-    leaf_node = isterminal(bts.automaton, current_state)
+    leaf_node = !isaccepting(bts.automaton, current_state)
     bcktrck = leaf_node || length(bts.path) > oracle.max_length
 
     length_fits = oracle.min_length ≤ length(bts.path) ≤ oracle.max_length
@@ -314,8 +314,8 @@ return_value(::WordCountOracle, ::BacktrackSearch) = nothing
 
 function (oracle::WordCountOracle)(bts::BacktrackSearch)
     current_state = last(bts.history)
-    # backtrack on terminal states (leafs)
-    leaf_node = isterminal(bts.automaton, current_state)
+    # backtrack on non-accepting states (leafs)
+    leaf_node = !isaccepting(bts.automaton, current_state)
     bcktrck = leaf_node || length(bts.path) ≥ oracle.max_depth
 
     # never return, just count

diff --git a/src/Automata/index_automaton.jl b/src/Automata/index_automaton.jl
@@ -1,5 +1,23 @@
 ## particular implementation of Index Automaton
 
+"""
+    IndexAutomaton{S, O<:RewritingOrdering}
+    IndexAutomaton(rws::RewritingSystem)
+Index Automaton related to a **reduced** rewriting system `rws`.
+
+A complete, deterministic automaton with
+* a single initial state (the empty word)
+* the set of accepting states (the proper prefixes of lhs of the rules of `rws`).
+
+The language of the automaton consists of words irreducible w.r.t. `rws`.
+
+An `IndexAutomaton` acts as a transducer (map), storing in the non-accepting
+states references to rules which can be used for rewriting. For more information
+see [Rewriting with IndexAutomaton](@ref "Index automaton rewriting").
+
+Moreover the automaton can be used to quickly perform
+[confluence checks](@ref KnuthBendix.check_confluence) via [Backtrack searches](@ref).
+"""
 mutable struct IndexAutomaton{S,O<:RewritingOrdering} <: Automaton{S}
     ordering::O
     initial::S
@@ -15,7 +33,7 @@ hasedge(::IndexAutomaton, ::State, ::Integer) = true
 addedge!(idxA::IndexAutomaton, src::State, dst::State, label) = src[label] = dst
 
 isfail(idxA::IndexAutomaton, σ::State) = σ === idxA.fail
-isterminal(idxA::IndexAutomaton, σ::State) = isdefined(σ, :value)
+isaccepting(idxA::IndexAutomaton, σ::State) = !isdefined(σ, :value)
 
 signature(idxA::IndexAutomaton, σ::State) = id(σ)
 
@@ -128,7 +146,7 @@ function skew_edges!(idxA::IndexAutomaton)
     # to ensure that trace(U, idxA) is successful
     for states in idxA.states
         for σ in states # states of particular radius
-            if isterminal(idxA, σ)
+            if !isaccepting(idxA, σ)
                 self_complete!(idxA, σ, override = true)
                 continue
             end
@@ -140,7 +158,7 @@ function skew_edges!(idxA::IndexAutomaton)
                 l, τ = Automata.trace(U, idxA) # we're tracing a shorter word, so...
                 @assert l == length(U) # the whole U defines a path in A and
                 # by the induction step edges from τ lead to non-fail states
-                isterminal(idxA, τ) &&
+                !isaccepting(idxA, τ) &&
                     @warn "rws doesn't seem to be reduced!"
                 # @assert iscomplete(τ, idxA)
                 τ
@@ -157,15 +175,15 @@ function skew_edges!(idxA::IndexAutomaton)
 end
 
 function Base.show(io::IO, idxA::IndexAutomaton)
-    terminal_count = [
-        count(st -> Automata.isterminal(idxA, st), states) for
+    rules_count = [
+        count(st -> !Automata.isaccepting(idxA, st), states) for
         states in idxA.states
     ]
     ord = KnuthBendix.ordering(idxA)
     A = alphabet(ord)
     println(io, "index automaton over $(typeof(ord)) with $(length(A)) letters")
     nstates = sum(length, idxA.states)
     println(io, "  • ", nstates, " state" * (nstates == 1 ? "" : "s"))
-    print(io, "  • ", sum(terminal_count), " accepting states (rw rules)")
+    print(io, "  • ", sum(rules_count), " non-accepting states (rw rules)")
     return
 end
diff --git a/src/Automata/interface.jl b/src/Automata/interface.jl
@@ -25,10 +25,10 @@ Check if state `σ` is a fail state in automaton `A`.
 function isfail(A::Automaton{S}, σ::S) where {S} end
 
 """
-    isterminal(A::Automaton, σ)
-Check if state `σ` is a terminal state of automaton `A`.
+    isaccepting(A::Automaton, σ)
+Check if state `σ` is an accepting (terminal) state of automaton `A`.
 """
-function isterminal(A::Automaton{S}, σ::S) where {S} end
+function isaccepting(A::Automaton{S}, σ::S) where {S} end
 
 """
 	trace(label, A::Automaton, σ)
@@ -43,7 +43,8 @@ Return a pair `(l, τ)`, where
   at `σ` in `A` and
  * `τ` is the last state (node) on the path.
 
-Note: if `w` defines a path to a _fail state_ the last non-fail state will be
+!!! note
+    If `w` defines a path to a _fail state_ the last non-fail state will be
 returned.
 """
 @inline function trace(

diff --git a/src/Automata/rebuilding_idxA.jl b/src/Automata/rebuilding_idxA.jl
@@ -45,9 +45,9 @@ function rebuild_direct_path!(idxA::IndexAutomaton, rule::Rule, age)
             addedge!(idxA, σ, τ, letter)
         else # σ[letter] is already defined
             # we're rebuilding so there's still some work to do
-            if isterminal(idxA, σl) && signature(idxA, σl) ≠ lhs
-                # the edge leads to a redundant terminal state
-                # @warn "terminal state in the middle of the direct path found:" rule σl
+            if !isaccepting(idxA, σl) && signature(idxA, σl) ≠ lhs
+                # the edge leads to a redundant non-accepting state
+                # @warn "non-accepting state in the middle of the direct path found:" rule σl
                 τ = typeof(σ)(σl.transitions, signature(idxA, σl), age)
                 addstate!(idxA, τ)
                 addedge!(idxA, σ, τ, letter)
@@ -83,7 +83,7 @@ function rebuild_skew_edges!(idxA::IndexAutomaton)
     # since we're rebuilding idxA the induction step is already done
     for states in idxA.states
         for σ in states # states of particular radius
-            if isterminal(idxA, σ)
+            if !isaccepting(idxA, σ)
                 self_complete!(idxA, σ, override = true)
                 continue
             end

diff --git a/src/examples.jl b/src/examples.jl
@@ -172,7 +172,7 @@ end
     Sims_Example_5_5_recursive()
 Rewriting system of the group presentation of the Heisenberg group
 > `⟨ a, b, c | 1 = [a,c] = [b,c], [a,b] = c⟩`
-ordered by [`Recursive` ordering](@ref) `c < C < b < B < a < A`.
+ordered by [`Recursive`](@ref) ordering `c < C < b < B < a < A`.
 
 Same as [`Heisenberg`](@ref).
 """
@@ -227,7 +227,7 @@ Hurwitz4() = triangle_237_quotient(4)
     Hurwitz8()
 Rewriting system of the presentation of
 [Hurwitz group](https://en.wikipedia.org/wiki/Hurwitz%27s_automorphisms_theorem)
-[`quotient_237(8)`](@ref quotient_237).
+[`triangle_237_quotient(8)`](@ref triangle_237_quotient).
 
 `Hurwitz8` is finite of order `10752`.
 """

diff --git a/src/rewriting.jl b/src/rewriting.jl
@@ -217,17 +217,16 @@ function rewrite!(
 
         push!(v, x)
         push!(history, τ)
-
-        if Automata.isterminal(idxA, τ)
-            lhs, rhs = Automata.value(τ)
-            # lhs is a suffix of v·x, so we delete it from v
-            resize!(v, length(v) - length(lhs))
-            # and prepend rhs to w
-            prepend!(w, rhs)
-            # now we need to rewind the history tape
-            resize!(history, length(history) - length(lhs))
-            # @assert trace(v, ia) == (length(v), last(path))
-        end
+        Automata.isaccepting(idxA, τ) && continue
+        # else ...
+        lhs, rhs = Automata.value(τ)
+        # lhs is a suffix of v, so we delete it from v
+        resize!(v, length(v) - length(lhs))
+        # and prepend rhs to w
+        prepend!(w, rhs)
+        # now we need to rewind the history tape
+        resize!(history, length(history) - length(lhs))
+        # @assert trace(v, ia) == (length(v), last(path))
     end
     return v
 end
diff --git a/src/settings_workspace.jl b/src/settings_workspace.jl
@@ -1,26 +1,37 @@
 abstract type CompletionAlgorithm end
 
+"""
+    Settings{CA<:CompletionAlgorithm}
+    Settings(alg; kwargs...)
+Struct encompassing knobs and switches for the `knuthbendix` completion.
+
+# Arguments:
+* `alg::CompletionAlgorithm`: the algorithms used for Knuth-Bendix completion
+
+# Keyword arguments
+* `max_rules`: forcefully terminate Knuth-Bendix completion if the number of
+   rules exceeds `max_rules`. Note: this is only a hint, the returned `rws` may
+   contain more or fewer rewriting rules.
+* `stack_size`: Reduce the rws and incorporate new rules into `rws` whenever
+  the stack of newly discovered rules exceeds `stack_size`.
+* `confluence_delay`: Attempt a confluence check whenever no new critical pairs
+  are discovered after `confluence_delay` iterations in the `knuthbendix` main loop.
+* `max_length_lhs`: The upper bound on the length of lhs of new rules considered in the algorithm.
+  (reserved for future use).
+* `max_length_lhs`: The upper bound on the length of rhs of new rules considered in the algorithm.
+  (reserved for future use).
+* `max_length_overlap`: The upper bound on the overlaps considered when finding new critical pairs.
+  (reserved for future use).
+* `verbosity`: Specifies the level of verbosity.
+"""
 mutable struct Settings{CA<:CompletionAlgorithm}
-    """The algorithms used for Knuth-Bendix completion"""
     algorithm::CA
-    """Terminate Knuth-Bendix completion if the number of rules exceeds `max_rules`."""
     max_rules::Int
-    """Reduce the rws and update the indexing automaton whenever the stack
-    of newly discovered rules exceeds `stack_size`.
-    Note: this is only a hint."""
     stack_size::Int
-    """
-    Attempt a confluence check whenever no new rules are added to stack after
-    `confluence_delay` iterations in the `knuthbendix` main loop.
-    """
     confluence_delay::Int
-    """Consider only new rules of lhs which does not exceed `max_length_lhs`."""
     max_length_lhs::Int
-    """Consider only the new rules of rhs which does not exceed `max_length_rhs`."""
     max_length_rhs::Int
-    """When finding critical pairs consider overlaps of length at most `max_overlap_length`."""
-    max_lenght_overlap::Int
-    """Specifies the level of verbosity"""
+    max_length_overlap::Int
     verbosity::Int
     dropped_rules::Bool
     update_progress::Any

diff --git a/test/backtrack.jl b/test/backtrack.jl
@@ -49,12 +49,12 @@
             lhs₁, _ = rule
             tests = map(btsearch(lhs₁[2:end])) do r
                 lhs₂, _ = r
-                t1 = Automata.isterminal(idxA, last(btsearch.history))
+                irr = Automata.isaccepting(idxA, last(btsearch.history))
                 lb = length(lhs₂) - length(btsearch.history) + 1
                 t2 = lb ≥ 1
                 t3 = lb < length(lhs₂)
                 t4 = lhs₁[end-lb+1:end] == lhs₂[1:lb]
-                return t1 && t2 && t3 && t4
+                return !irr && t2 && t3 && t4
             end
             @test all(tests)
         end