From 8bb7f56b63770c85c33cc8f67cc1755e76f478e9 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 5 Jul 2022 22:30:40 +1200 Subject: [PATCH] Properly implement and document Table element X-ref: #6 --- src/markdown.jl | 116 ++++++++++++++++++++++++++++++++++++++++++++--- src/stdlib.jl | 51 +++++++++++++++++---- test/markdown.jl | 69 ++++++++++++++++++++++++++-- test/node.jl | 8 ++-- test/stdlib.jl | 73 +++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+), 24 deletions(-) create mode 100644 test/stdlib.jl diff --git a/src/markdown.jl b/src/markdown.jl index 171cec3..dafc568 100644 --- a/src/markdown.jl +++ b/src/markdown.jl @@ -83,14 +83,31 @@ elements that are only allowed to have inline child elements or to make sure tha [`List`s](@ref List) only contain [`Item`s](@ref Item). If the `parent` element is a leaf node (`iscontainer(parent) === false`) + +# Extended help + +When extending `can_contain` for custom abstract classes Markdown elements, similar to the +[`AbstractBlock`](@ref) or [`AbstractInline`](@ref) elements classes, the second argument to +[`can_contain`](@ref) should always be constrained exactly to `::AbstractElement`, in order +to avoid method ambiguities. I.e. for some abstract type `AbstractT <: AbstractElement`, +the method should be defined as + +```julia +can_contain(parent::AbstractT, child::AbstractElement) = ... +``` + +For concrete parent types `T`, where the first argument is constrained as `parent::T` it +should then be fine to take advantage of multiple dispatch when implementing +[`can_contain`](@ref). """ function can_contain end # These methods ensure that, by default, container blocks/inlines are allowed to contain -# any blocks/inlines (respectively): -can_contain(parent::AbstractBlock, child::AbstractBlock) = iscontainer(parent) -can_contain(parent::AbstractInline, child::AbstractInline) = iscontainer(parent) -# The following method will be called if one mixed blocks and inlines (unless specifically -# overridden): +# any blocks/inlines (respectively). However, we need the isa() calls here because the +# AbstractBlock and AbstractInline are abstract types. +can_contain(parent::AbstractBlock, child::AbstractElement) = iscontainer(parent) && isa(child, AbstractBlock) +can_contain(parent::AbstractInline, child::AbstractElement) = iscontainer(parent) && isa(child, AbstractInline) +# The following method will be called if one mixes blocks and inlines (unless specifically +# overridden), and also functions as the ultimate fallback for can_contain. can_contain(parent::AbstractElement, child::AbstractElement) = false """ @@ -457,21 +474,105 @@ Base.:(==)(x::FootnoteLink, y::FootnoteLink) = (x.id == y.id) # Markdown tables: abstract type TableComponent <: AbstractBlock end +iscontainer(::TableComponent) = true """ mutable struct Table <: TableComponent + +Container block representing a table, an extension of the CommonMark spec, and should be +interpreted as a rectangular grid of cells with a fixed number of rows and columns. + +* A [`Table`](@ref) node can only contain either [`TableHeader`](@ref) or + [`TableBody`](@ref) nodes as children. +* [`TableHeader`](@ref) and [`TableBody`](@ref) can only contain [`TableRow`](@ref)s + as children. A [`TableHeader`](@ref) should contain only a single [`TableRow`](@ref), and + any additional ones should be ignored. +* Each [`TableRow`](@ref) contains only [`TableCell`](@ref)s as children. The row with the + largest number of cells determines the width (number of columns) of the table. + +Since we can not constrain e.g. the number of children or in what order child nodes can +appear in a Markdown tree, it is possible to construct tables that can be difficult to +interpret. The following rules should be followed when interpreting tables: + +* The decendants of a [`Table`](@ref) node should be exactly be a single + [`TableHeader`](@ref) followed by a [`TableBody`](@ref). + - If the first child is a [`TableBody`](@ref), the header row is assumed to be a list of + empty cells. + - The rows from any nodes following the first [`TableBody`](@ref) are interpreted as + additional table body rows, even if they are contained in a [`TableHeader`](@ref). + - A [`Table`](@ref) with no children is interpreted as a table with a single empty header + cell. +* A [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) should only contain + one [`TableRow`](@ref). + - If a [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) contains + additional rows, the additional rows are interpreted to be body rows. + - If a [`TableHeader`](@ref) that is the first child of a [`Table`](@ref) is empty, it is + assumed to represent a header row with empty cells. +* Each [`TableRow`](@ref) of a table should contain the same number of [`TableCell`](@ref). + - Any row that has fewer cells than the longest row should be interpreted as if it is + padded with additional empty cells. """ mutable struct Table <: TableComponent - spec::Vector{Symbol} + spec :: Vector{Symbol} + + function Table(spec) + for (ncol, s) in enumerate(spec) + s in [:left, :right, :center] && continue + error("Invalid table specifier '$s' for column $ncol") + end + new(collect(spec)) + end end Base.:(==)(x::Table, y::Table) = (x.spec == y.spec) +can_contain(::Table, e::AbstractElement) = isa(e, TableHeader) || isa(e, TableBody) + +""" + struct TableHeader <: TableComponent + +Represents the header portion of a Markdown table and should only occur as the first child +of a [`Table`](@ref) node and should only contain a single [`TableRow`](@ref) as a child. +See [`Table`](@ref) for information on how to handle other circumstances. + +It can only contain [`TableRow`](@ref) elements. +""" struct TableHeader <: TableComponent end +can_contain(::TableHeader, e::AbstractElement) = isa(e, TableRow) + +""" + struct TableBody <: TableComponent + +Represents the body of a Markdown table and should only occur as the second child of a +[`Table`](@ref) node. See [`Table`](@ref) for information on how to handle other +circumstances. + +It can only contain [`TableRow`](@ref) elements. +""" struct TableBody <: TableComponent end +can_contain(::TableBody, e::AbstractElement) = isa(e, TableRow) + +""" + struct TableRow <: TableComponent + +Represents a row of a Markdown table. Can only contain [`TableCell`](@ref)s as children. +""" struct TableRow <: TableComponent end -struct TablePipe <: AbstractInline end # TODO: ??? +can_contain(::TableRow, e::AbstractElement) = isa(e, TableCell) """ mutable struct TableCell <: TableComponent + +Represents a single cell in a Markdown table. Can contain inline nodes. + +* `align :: Symbol`: declares the alignment of a cell (can be `:left`, `:right`, or + `:center`), and should match the `.spec` field of the ancestor [`Table`](@ref) +* `header :: Bool`: `true` if the cell is part of a header row, and should only be true if + the cell belongs to a row that is the first +* `column :: Int`: the column index of the cell, which should match its position in the + Markdown tree + +It is possible that the fields of [`TableCell`](@ref) are inconsistent with the real +structure of the Markdown tree, in which case the structure or the `.spec` field should take +precedence when interpreting the elements. """ mutable struct TableCell <: TableComponent align :: Symbol @@ -479,6 +580,7 @@ mutable struct TableCell <: TableComponent column :: Int end Base.:(==)(x::TableCell, y::TableCell) = (x.align == y.align) && (x.header == y.header) && (x.column == y.column) +can_contain(::TableCell, e::AbstractElement) = isa(e, AbstractInline) # src/extensions/interpolation.jl:struct JuliaValue <: AbstractInline # struct Backslash <: AbstractInline end diff --git a/src/stdlib.jl b/src/stdlib.jl index ba9b082..fcc1c30 100644 --- a/src/stdlib.jl +++ b/src/stdlib.jl @@ -56,33 +56,68 @@ function _convert_block(b::Markdown.List) end function _convert_block(b::Markdown.Table) - # TODO: I have no idea if this is even remotely correct. - @assert length(b.rows) >= 1 + # If the Markdown table is somehow empty, we'll return an empty Table node + isempty(b.rows) && return Node(Table([])) + # We assume that the width of the table is the width of the widest row + ncols = maximum(length(row) for row in b.rows) + # Markdown uses :l / :r / :c for the table specs. We will also pad it with `:right` + # values (standard library's default) if need be, or drop the last ones if there are too + # many somehow. + spec = _convert_column_spec.(b.align) + if ncols > length(spec) + rpad_array!(spec, ncols, :right) + elseif ncols < length(spec) + spec = spec[1:ncols] + end + # A MD table should always contain a header. We'll split it off from the rest. header_row, body_rows = Iterators.peel(b.rows) - tablenode = Node(Table(Symbol[])) + tablenode = Node(Table(spec)) headernode = Node(TableHeader()) - # A MD table should always contain a header - push!(headernode.children, _convert_table_row(header_row)) + push!(headernode.children, _convert_table_row(header_row, spec=spec, isheader = true)) push!(tablenode.children, headernode) # If it doesn't have any more rows, then we don't append a TableBody if length(b.rows) >= 2 bodynode = Node(TableBody()) for row in body_rows - push!(bodynode.children, _convert_table_row(row)) + push!(bodynode.children, _convert_table_row(row, spec=spec, isheader = false)) end push!(tablenode.children, bodynode) end return tablenode end -function _convert_table_row(row) +function _convert_table_row(row; spec, isheader) + ncols = length(spec) rownode = Node(TableRow()) for (i, cell) in enumerate(row) - c = TableCell(:nothing, false, i) + c = TableCell(spec[i], isheader, i) cellnode = _convert(c, _convert_inline, cell) push!(rownode.children, cellnode) end + # If need be, we pad the row with empty TableCells, to make sure that each row has the + # same number of cells + if length(row) < ncols + for i in (length(row)+1):ncols + cell = TableCell(spec[i], isheader, i) + push!(rownode.children, Node(cell)) + end + end return rownode end +_convert_column_spec(s :: Symbol) = (s === :r) ? :right : + (s === :l) ? :left : (s === :c) ? :center : begin + @warn "Invalid table spec in Markdown table: '$s'" + :right + end +function rpad_array!(xs::Vector{T}, n::Integer, e::T) where {T} + n > 0 + length(xs) >= n && return xs + i = n - length(xs) + while i > 0 + push!(xs, e) + i -= 1 + end + return xs +end # Inline nodes: _convert_inline(s::Markdown.Bold) = _convert(Strong(), _convert_inline, s.text) diff --git a/test/markdown.jl b/test/markdown.jl index ba46ea9..9eb2bc2 100644 --- a/test/markdown.jl +++ b/test/markdown.jl @@ -2,8 +2,9 @@ using MarkdownAST using MarkdownAST: AbstractElement, AbstractBlock, AbstractInline, Document, Admonition, BlockQuote, CodeBlock, DisplayMath, FootnoteDefinition, HTMLBlock, Heading, - Item, List, Paragraph, TableComponent, ThematicBreak, - Code, Emph, FootnoteLink, HTMLInline, Image, InlineMath, Link, Strong, TablePipe, Text, + Item, List, Paragraph, ThematicBreak, + Code, Emph, FootnoteLink, HTMLInline, Image, InlineMath, Link, Strong, + TableComponent, Table, TableHeader, TableBody, TableRow, TableCell, iscontainer, can_contain, isblock, isinline using Test @@ -96,7 +97,7 @@ MarkdownAST.iscontainer(e::PseudoInline) = e.iscontainer end # (5) Inline leafs: for e in [ - Code("code"), FootnoteLink("id"), HTMLInline("html"), InlineMath("math"), Text("text") + Code("code"), FootnoteLink("id"), HTMLInline("html"), InlineMath("math"), MarkdownAST.Text("text") ] @test ! iscontainer(e) @test ! isblock(e) @@ -110,6 +111,64 @@ MarkdownAST.iscontainer(e::PseudoInline) = e.iscontainer # Lists are special: # List, Item - # As are tables: - # TableComponent, TablePipe + # Tables + let e = Table([]) + @test iscontainer(e) + @test can_contain(Document(), e) + @test ! can_contain(Paragraph(), e) + @test can_contain(e, TableHeader()) + @test can_contain(e, TableBody()) + @test ! can_contain(e, TableRow()) + @test ! can_contain(e, TableCell(:c, true, 1)) + @test ! can_contain(e, PseudoBlock(true)) + @test ! can_contain(e, PseudoBlock(false)) + @test ! can_contain(e, PseudoInline(true)) + @test ! can_contain(e, PseudoInline(false)) + end + for e = [TableHeader(), TableBody()] + @test iscontainer(e) + # Because all TableComponents are AbstractBlocks, we can actually put + # them in arbitrary nodes that can contain blocks.. + @test_broken ! can_contain(Document(), e) + @test ! can_contain(Paragraph(), e) + @test ! can_contain(e, TableHeader()) + @test ! can_contain(e, TableBody()) + @test can_contain(e, TableRow()) + @test ! can_contain(e, TableCell(:c, true, 1)) + @test ! can_contain(e, PseudoBlock(true)) + @test ! can_contain(e, PseudoBlock(false)) + @test ! can_contain(e, PseudoInline(true)) + @test ! can_contain(e, PseudoInline(false)) + end + let e = TableRow() + @test iscontainer(e) + # Because all TableComponents are AbstractBlocks, we can actually put + # them in arbitrary nodes that can contain blocks.. + @test_broken ! can_contain(Document(), e) + @test ! can_contain(Paragraph(), e) + @test ! can_contain(e, TableHeader()) + @test ! can_contain(e, TableBody()) + @test ! can_contain(e, TableRow()) + @test can_contain(e, TableCell(:c, true, 1)) + @test ! can_contain(e, PseudoBlock(true)) + @test ! can_contain(e, PseudoBlock(false)) + @test ! can_contain(e, PseudoInline(true)) + @test ! can_contain(e, PseudoInline(false)) + end + let e = TableCell(:c, true, 1) + @test iscontainer(e) + # Because all TableComponents are AbstractBlocks, we can actually put + # them in arbitrary nodes that can contain blocks.. + @test_broken ! can_contain(Document(), e) + @test ! can_contain(Paragraph(), e) + @test ! can_contain(e, TableHeader()) + @test ! can_contain(e, TableBody()) + @test ! can_contain(e, TableRow()) + @test ! can_contain(e, TableCell(:c, true, 1)) + @test ! can_contain(e, PseudoBlock(true)) + @test ! can_contain(e, PseudoBlock(false)) + @test can_contain(e, PseudoInline(true)) + @test can_contain(e, PseudoInline(false)) + end + @test_throws ErrorException Table([:foo]) end diff --git a/test/node.jl b/test/node.jl index 0bca1ee..7668744 100644 --- a/test/node.jl +++ b/test/node.jl @@ -81,10 +81,10 @@ _startswith(prefix) = s -> startswith(s, prefix) stringvar = "bar" containervar = CodeBlock("lang", "code()") tree = @ast Document() do - Paragraph() do + Heading(1) do "Foo" end - BlockQuote() do + Paragraph() do stringvar "Foo" MarkdownAST.Text(stringvar) # call expr @@ -97,13 +97,13 @@ _startswith(prefix) = s -> startswith(s, prefix) # Check the children: cs = collect(tree.children) # first child - @test cs[1].element isa Paragraph + @test cs[1].element isa Heading @test length(cs[1].children) == 1 @test cs[1].parent === tree @test cs[1].previous === nothing @test cs[1].next === cs[2] # second child - @test cs[2].element isa BlockQuote + @test cs[2].element isa Paragraph @test length(cs[2].children) == 3 @test cs[2].parent === tree @test cs[2].previous === cs[1] diff --git a/test/stdlib.jl b/test/stdlib.jl new file mode 100644 index 0000000..3f8b8fe --- /dev/null +++ b/test/stdlib.jl @@ -0,0 +1,73 @@ +using MarkdownAST: MarkdownAST, Node, @ast, Document, + Table, TableHeader, TableBody, TableRow, TableCell, + Emph, Strong, Code, InlineMath +using Markdown: Markdown +using Test + +@testset "Markdown stdlib conversion" begin + # Helper functions + @test MarkdownAST.rpad_array!([], 3, 0) == [0, 0, 0] + @test MarkdownAST.rpad_array!([1], 3, 0) == [1, 0, 0] + @test MarkdownAST.rpad_array!([1, 2], 3, 0) == [1, 2, 0] + @test MarkdownAST.rpad_array!([1, 2, 3], 3, 0) == [1, 2, 3] + @test MarkdownAST.rpad_array!([1, 2, 3, 4], 3, 0) == [1, 2, 3, 4] + @test MarkdownAST.rpad_array!([], 0, 0) == [] + @test MarkdownAST.rpad_array!([], -2, 0) == [] + @test MarkdownAST.rpad_array!([1], 0, 0) == [1] + @test MarkdownAST.rpad_array!([1], -2, 0) == [1] + + @test convert(Node, Markdown.md""" + | Column One | Column Two | Column Three | + |:---------- | ---------- |:------------:| + | Row `1` | Column `2` | | + | *Row* 2 | **Row** 2 | Column ``3`` | + """) == @ast Document() do + Table([:left, :right, :center]) do + TableHeader() do + TableRow() do + TableCell(:left, true, 1) do + "Column One" + end + TableCell(:right, true, 2) do + "Column Two" + end + TableCell(:center, true, 3) do + "Column Three" + end + end + end + TableBody() do + TableRow() do + TableCell(:left, false, 1) do + "Row " + Code("1") + end + TableCell(:right, false, 2) do + "Column " + Code("2") + end + TableCell(:center, false, 3) + end + TableRow() do + TableCell(:left, false, 1) do + Emph() do + "Row" + end + " 2" + end + TableCell(:right, false, 2) do + Strong() do + "Row" + end + " 2" + end + TableCell(:center, false, 3) do + "Column " + InlineMath("3") + end + end + end + end + end + # TODO: tests for problematic cases +end