Skip to content

Commit

Permalink
Properly implement and document Table element
Browse files Browse the repository at this point in the history
X-ref: #6
  • Loading branch information
mortenpi committed Jul 5, 2022
1 parent 6610331 commit 8bb7f56
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 24 deletions.
116 changes: 109 additions & 7 deletions src/markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,31 @@ elements that are only allowed to have inline child elements or to make sure tha
[`List`s](@ref List) only contain [`Item`s](@ref Item).
If the `parent` element is a leaf node (`iscontainer(parent) === false`)
# Extended help
When extending `can_contain` for custom abstract classes Markdown elements, similar to the
[`AbstractBlock`](@ref) or [`AbstractInline`](@ref) elements classes, the second argument to
[`can_contain`](@ref) should always be constrained exactly to `::AbstractElement`, in order
to avoid method ambiguities. I.e. for some abstract type `AbstractT <: AbstractElement`,
the method should be defined as
```julia
can_contain(parent::AbstractT, child::AbstractElement) = ...
```
For concrete parent types `T`, where the first argument is constrained as `parent::T` it
should then be fine to take advantage of multiple dispatch when implementing
[`can_contain`](@ref).
"""
function can_contain end
# These methods ensure that, by default, container blocks/inlines are allowed to contain
# any blocks/inlines (respectively):
can_contain(parent::AbstractBlock, child::AbstractBlock) = iscontainer(parent)
can_contain(parent::AbstractInline, child::AbstractInline) = iscontainer(parent)
# The following method will be called if one mixed blocks and inlines (unless specifically
# overridden):
# any blocks/inlines (respectively). However, we need the isa() calls here because the
# AbstractBlock and AbstractInline are abstract types.
can_contain(parent::AbstractBlock, child::AbstractElement) = iscontainer(parent) && isa(child, AbstractBlock)
can_contain(parent::AbstractInline, child::AbstractElement) = iscontainer(parent) && isa(child, AbstractInline)
# The following method will be called if one mixes blocks and inlines (unless specifically
# overridden), and also functions as the ultimate fallback for can_contain.
can_contain(parent::AbstractElement, child::AbstractElement) = false

"""
Expand Down Expand Up @@ -457,28 +474,113 @@ Base.:(==)(x::FootnoteLink, y::FootnoteLink) = (x.id == y.id)

# Markdown tables:
abstract type TableComponent <: AbstractBlock end
iscontainer(::TableComponent) = true

"""
mutable struct Table <: TableComponent
Container block representing a table, an extension of the CommonMark spec, and should be
interpreted as a rectangular grid of cells with a fixed number of rows and columns.
* A [`Table`](@ref) node can only contain either [`TableHeader`](@ref) or
[`TableBody`](@ref) nodes as children.
* [`TableHeader`](@ref) and [`TableBody`](@ref) can only contain [`TableRow`](@ref)s
as children. A [`TableHeader`](@ref) should contain only a single [`TableRow`](@ref), and
any additional ones should be ignored.
* Each [`TableRow`](@ref) contains only [`TableCell`](@ref)s as children. The row with the
largest number of cells determines the width (number of columns) of the table.
Since we can not constrain e.g. the number of children or in what order child nodes can
appear in a Markdown tree, it is possible to construct tables that can be difficult to
interpret. The following rules should be followed when interpreting tables:
* The decendants of a [`Table`](@ref) node should be exactly be a single
[`TableHeader`](@ref) followed by a [`TableBody`](@ref).
- If the first child is a [`TableBody`](@ref), the header row is assumed to be a list of
empty cells.
- The rows from any nodes following the first [`TableBody`](@ref) are interpreted as
additional table body rows, even if they are contained in a [`TableHeader`](@ref).
- A [`Table`](@ref) with no children is interpreted as a table with a single empty header
cell.
* A [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) should only contain
one [`TableRow`](@ref).
- If a [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) contains
additional rows, the additional rows are interpreted to be body rows.
- If a [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) is empty, it is
assumed to represent a header row with empty cells.
* Each [`TableRow`](@ref) of a table should contain the same number of [`TableCell`](@ref).
- Any row that has fewer cells than the longest row should be interpreted as if it is
padded with additional empty cells.
"""
mutable struct Table <: TableComponent
spec::Vector{Symbol}
spec :: Vector{Symbol}

function Table(spec)
for (ncol, s) in enumerate(spec)
s in [:left, :right, :center] && continue
error("Invalid table specifier '$s' for column $ncol")
end
new(collect(spec))
end
end
Base.:(==)(x::Table, y::Table) = (x.spec == y.spec)
can_contain(::Table, e::AbstractElement) = isa(e, TableHeader) || isa(e, TableBody)

"""
struct TableHeader <: TableComponent
Represents the header portion of a Markdown table and should only occur as the first child
of a [`Table`](@ref) node and should only contain a single [`TableRow`](@ref) as a child.
See [`Table`](@ref) for information on how to handle other circumstances.
It can only contain [`TableRow`](@ref) elements.
"""
struct TableHeader <: TableComponent end
can_contain(::TableHeader, e::AbstractElement) = isa(e, TableRow)

"""
struct TableBody <: TableComponent
Represents the body of a Markdown table and should only occur as the second child of a
[`Table`](@ref) node. See [`Table`](@ref) for information on how to handle other
circumstances.
It can only contain [`TableRow`](@ref) elements.
"""
struct TableBody <: TableComponent end
can_contain(::TableBody, e::AbstractElement) = isa(e, TableRow)

"""
struct TableRow <: TableComponent
Represents a row of a Markdown table. Can only contain [`TableCell`](@ref)s as children.
"""
struct TableRow <: TableComponent end
struct TablePipe <: AbstractInline end # TODO: ???
can_contain(::TableRow, e::AbstractElement) = isa(e, TableCell)

"""
mutable struct TableCell <: TableComponent
Represents a single cell in a Markdown table. Can contain inline nodes.
* `align :: Symbol`: declares the alignment of a cell (can be `:left`, `:right`, or
`:center`), and should match the `.spec` field of the ancestor [`Table`](@ref)
* `header :: Bool`: `true` if the cell is part of a header row, and should only be true if
the cell belongs to a row that is the first
* `column :: Int`: the column index of the cell, which should match its position in the
Markdown tree
It is possible that the fields of [`TableCell`](@ref) are inconsistent with the real
structure of the Markdown tree, in which case the structure or the `.spec` field should take
precedence when interpreting the elements.
"""
mutable struct TableCell <: TableComponent
align :: Symbol
header :: Bool
column :: Int
end
Base.:(==)(x::TableCell, y::TableCell) = (x.align == y.align) && (x.header == y.header) && (x.column == y.column)
can_contain(::TableCell, e::AbstractElement) = isa(e, AbstractInline)

# src/extensions/interpolation.jl:struct JuliaValue <: AbstractInline
# struct Backslash <: AbstractInline end
Expand Down
51 changes: 43 additions & 8 deletions src/stdlib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,33 +56,68 @@ function _convert_block(b::Markdown.List)
end

function _convert_block(b::Markdown.Table)
# TODO: I have no idea if this is even remotely correct.
@assert length(b.rows) >= 1
# If the Markdown table is somehow empty, we'll return an empty Table node
isempty(b.rows) && return Node(Table([]))
# We assume that the width of the table is the width of the widest row
ncols = maximum(length(row) for row in b.rows)
# Markdown uses :l / :r / :c for the table specs. We will also pad it with `:right`
# values (standard library's default) if need be, or drop the last ones if there are too
# many somehow.
spec = _convert_column_spec.(b.align)
if ncols > length(spec)
rpad_array!(spec, ncols, :right)
elseif ncols < length(spec)
spec = spec[1:ncols]
end
# A MD table should always contain a header. We'll split it off from the rest.
header_row, body_rows = Iterators.peel(b.rows)
tablenode = Node(Table(Symbol[]))
tablenode = Node(Table(spec))
headernode = Node(TableHeader())
# A MD table should always contain a header
push!(headernode.children, _convert_table_row(header_row))
push!(headernode.children, _convert_table_row(header_row, spec=spec, isheader = true))
push!(tablenode.children, headernode)
# If it doesn't have any more rows, then we don't append a TableBody
if length(b.rows) >= 2
bodynode = Node(TableBody())
for row in body_rows
push!(bodynode.children, _convert_table_row(row))
push!(bodynode.children, _convert_table_row(row, spec=spec, isheader = false))
end
push!(tablenode.children, bodynode)
end
return tablenode
end
function _convert_table_row(row)
function _convert_table_row(row; spec, isheader)
ncols = length(spec)
rownode = Node(TableRow())
for (i, cell) in enumerate(row)
c = TableCell(:nothing, false, i)
c = TableCell(spec[i], isheader, i)
cellnode = _convert(c, _convert_inline, cell)
push!(rownode.children, cellnode)
end
# If need be, we pad the row with empty TableCells, to make sure that each row has the
# same number of cells
if length(row) < ncols
for i in (length(row)+1):ncols
cell = TableCell(spec[i], isheader, i)
push!(rownode.children, Node(cell))
end
end
return rownode
end
_convert_column_spec(s :: Symbol) = (s === :r) ? :right :
(s === :l) ? :left : (s === :c) ? :center : begin
@warn "Invalid table spec in Markdown table: '$s'"
:right
end
function rpad_array!(xs::Vector{T}, n::Integer, e::T) where {T}
n > 0
length(xs) >= n && return xs
i = n - length(xs)
while i > 0
push!(xs, e)
i -= 1
end
return xs
end

# Inline nodes:
_convert_inline(s::Markdown.Bold) = _convert(Strong(), _convert_inline, s.text)
Expand Down
69 changes: 64 additions & 5 deletions test/markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ using MarkdownAST
using MarkdownAST: AbstractElement, AbstractBlock, AbstractInline,
Document,
Admonition, BlockQuote, CodeBlock, DisplayMath, FootnoteDefinition, HTMLBlock, Heading,
Item, List, Paragraph, TableComponent, ThematicBreak,
Code, Emph, FootnoteLink, HTMLInline, Image, InlineMath, Link, Strong, TablePipe, Text,
Item, List, Paragraph, ThematicBreak,
Code, Emph, FootnoteLink, HTMLInline, Image, InlineMath, Link, Strong,
TableComponent, Table, TableHeader, TableBody, TableRow, TableCell,
iscontainer, can_contain, isblock, isinline
using Test

Expand Down Expand Up @@ -96,7 +97,7 @@ MarkdownAST.iscontainer(e::PseudoInline) = e.iscontainer
end
# (5) Inline leafs:
for e in [
Code("code"), FootnoteLink("id"), HTMLInline("html"), InlineMath("math"), Text("text")
Code("code"), FootnoteLink("id"), HTMLInline("html"), InlineMath("math"), MarkdownAST.Text("text")
]
@test ! iscontainer(e)
@test ! isblock(e)
Expand All @@ -110,6 +111,64 @@ MarkdownAST.iscontainer(e::PseudoInline) = e.iscontainer
# Lists are special:
# List, Item

# As are tables:
# TableComponent, TablePipe
# Tables
let e = Table([])
@test iscontainer(e)
@test can_contain(Document(), e)
@test ! can_contain(Paragraph(), e)
@test can_contain(e, TableHeader())
@test can_contain(e, TableBody())
@test ! can_contain(e, TableRow())
@test ! can_contain(e, TableCell(:c, true, 1))
@test ! can_contain(e, PseudoBlock(true))
@test ! can_contain(e, PseudoBlock(false))
@test ! can_contain(e, PseudoInline(true))
@test ! can_contain(e, PseudoInline(false))
end
for e = [TableHeader(), TableBody()]
@test iscontainer(e)
# Because all TableComponents are AbstractBlocks, we can actually put
# them in arbitrary nodes that can contain blocks..
@test_broken ! can_contain(Document(), e)
@test ! can_contain(Paragraph(), e)
@test ! can_contain(e, TableHeader())
@test ! can_contain(e, TableBody())
@test can_contain(e, TableRow())
@test ! can_contain(e, TableCell(:c, true, 1))
@test ! can_contain(e, PseudoBlock(true))
@test ! can_contain(e, PseudoBlock(false))
@test ! can_contain(e, PseudoInline(true))
@test ! can_contain(e, PseudoInline(false))
end
let e = TableRow()
@test iscontainer(e)
# Because all TableComponents are AbstractBlocks, we can actually put
# them in arbitrary nodes that can contain blocks..
@test_broken ! can_contain(Document(), e)
@test ! can_contain(Paragraph(), e)
@test ! can_contain(e, TableHeader())
@test ! can_contain(e, TableBody())
@test ! can_contain(e, TableRow())
@test can_contain(e, TableCell(:c, true, 1))
@test ! can_contain(e, PseudoBlock(true))
@test ! can_contain(e, PseudoBlock(false))
@test ! can_contain(e, PseudoInline(true))
@test ! can_contain(e, PseudoInline(false))
end
let e = TableCell(:c, true, 1)
@test iscontainer(e)
# Because all TableComponents are AbstractBlocks, we can actually put
# them in arbitrary nodes that can contain blocks..
@test_broken ! can_contain(Document(), e)
@test ! can_contain(Paragraph(), e)
@test ! can_contain(e, TableHeader())
@test ! can_contain(e, TableBody())
@test ! can_contain(e, TableRow())
@test ! can_contain(e, TableCell(:c, true, 1))
@test ! can_contain(e, PseudoBlock(true))
@test ! can_contain(e, PseudoBlock(false))
@test can_contain(e, PseudoInline(true))
@test can_contain(e, PseudoInline(false))
end
@test_throws ErrorException Table([:foo])
end
8 changes: 4 additions & 4 deletions test/node.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ _startswith(prefix) = s -> startswith(s, prefix)
stringvar = "bar"
containervar = CodeBlock("lang", "code()")
tree = @ast Document() do
Paragraph() do
Heading(1) do
"Foo"
end
BlockQuote() do
Paragraph() do
stringvar
"Foo"
MarkdownAST.Text(stringvar) # call expr
Expand All @@ -97,13 +97,13 @@ _startswith(prefix) = s -> startswith(s, prefix)
# Check the children:
cs = collect(tree.children)
# first child
@test cs[1].element isa Paragraph
@test cs[1].element isa Heading
@test length(cs[1].children) == 1
@test cs[1].parent === tree
@test cs[1].previous === nothing
@test cs[1].next === cs[2]
# second child
@test cs[2].element isa BlockQuote
@test cs[2].element isa Paragraph
@test length(cs[2].children) == 3
@test cs[2].parent === tree
@test cs[2].previous === cs[1]
Expand Down
Loading

0 comments on commit 8bb7f56

Please sign in to comment.