From c238f14a662d6568b614054fea31bd511a47e78d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 8 Sep 2024 13:46:44 -0400 Subject: [PATCH 1/4] test: update tests to reflect new libxml2 HTML5 parsing behaviors See https://gitlab.gnome.org/GNOME/libxml2/-/issues/758#note_2217350 --- test/html4/test_comments.rb | 55 +++++++++++++++++++++++++--- test/html4/test_document.rb | 2 + test/html4/test_document_fragment.rb | 24 +++++++++++- test/xml/test_node.rb | 33 ++++++++++++++--- 4 files changed, 102 insertions(+), 12 deletions(-) diff --git a/test/html4/test_comments.rb b/test/html4/test_comments.rb index bef48b134e..5fc4d55d0e 100644 --- a/test/html4/test_comments.rb +++ b/test/html4/test_comments.rb @@ -96,7 +96,19 @@ class TestComment < Nokogiri::TestCase let(:doc) { Nokogiri::HTML4(html) } let(:subject) { doc.at_css("div#under-test") } - if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml?(">= 2.14.0") + it "behaves as if the comment is closed immediately before the end of the input stream" do # COMPLIANT + assert_pattern do + subject => { + name: "div", + attributes: [{ name: "id", value: "under-test" }], + children: [ + { name: "comment", content: "start of unterminated comment" } + ] + } + end + end + elsif Nokogiri.uses_libxml? it "behaves as if the comment is unterminated and doesn't exist" do # NON-COMPLIANT assert_equal 0, subject.children.length assert_equal 1, doc.errors.length @@ -132,8 +144,12 @@ class TestComment < Nokogiri::TestCase assert_equal inner_div, subject.children[1] assert_predicate subject.children[2], :comment? assert_equal "bar", subject.children[2].content - assert_equal 1, doc.errors.length - assert_match(/Comment incorrectly closed/, doc.errors.first.to_s) + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_empty doc.errors + else + assert_equal 1, doc.errors.length + assert_match(/Comment incorrectly closed/, doc.errors.first.to_s) + end end else # jruby, or libxml2 system lib less than 2.9.11 it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT @@ -161,7 +177,22 @@ class TestComment < Nokogiri::TestCase let(:body) { doc.at_css("body") } let(:subject) { doc.at_css("div#under-test") } - if Nokogiri.uses_libxml?("= 2.9.14") + if Nokogiri.uses_libxml?(">= 2.14.0") + it "parses as comments" do # COMPLIANT + assert_pattern do + body.children => [ + { + name: "div", + children: [ + { name: "comment", content: " comment
hello" }, + ] + end + end + elsif Nokogiri.uses_libxml?("= 2.9.14") it "parses as PCDATA" do # NON-COMPLIANT assert_equal 1, body.children.length assert_equal subject, body.children.first @@ -212,7 +243,21 @@ class TestComment < Nokogiri::TestCase let(:body) { doc.at_css("body") } let(:subject) { doc.at_css("div#under-test") } - if Nokogiri.uses_libxml?("= 2.9.14") + if Nokogiri.uses_libxml?(">= 2.14.0") + it "parses the [ + { + name: "div", children: [ + { name: "comment", content: "[if foo]" }, + { name: "div", attributes: [{name: "id", value: "do-i-exist"}] }, + { name: "comment", content: "[endif]" }, + ] + } + ] + end + end + elsif Nokogiri.uses_libxml?("= 2.9.14") it "parses the = 2.14.0") + [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE] elsif Nokogiri.uses_libxml?(">= 2.10.0") [Nokogiri::XML::Node::COMMENT_NODE] else diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb index a157ff025e..eca57ce147 100644 --- a/test/html4/test_document_fragment.rb +++ b/test/html4/test_document_fragment.rb @@ -188,7 +188,29 @@ def test_element_children_counts def test_malformed_fragment_is_corrected fragment = Nokogiri::HTML4::DocumentFragment.parse("
") - assert_equal("
", fragment.to_s) + + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_pattern do + fragment => [ + { name: "div", attributes: [ + { name: "<", value: ""}, + { name: "div", value: ""}, + ]} + ] + end + else + assert_equal("
", fragment.to_s) + end + end + + def test_malformed_html5_fragment_serializes_like_gumbo + skip_unless_libxml2(">= 2.14.0") + + fragment = Nokogiri::HTML4::DocumentFragment.parse("
") + + pending "libxml2 does not serialize HTML5 like gumbo (yet)" do + assert_equal('
', fragment.to_s) + end end def test_unclosed_script_tag diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index 4a8e1fb40c..abb11161a3 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -105,9 +105,19 @@ def test_node_context_parsing_of_malformed_html_fragment context_node = doc.at_css("div") nodeset = context_node.parse("
") - assert_equal(1, doc.errors.length) - assert_equal(1, nodeset.length) - assert_equal("
", nodeset.to_s) + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_empty(doc.errors) + assert_pattern do + nodeset => [ + { name: "div", attributes: [{name: "<", value: ""}, { name: "div", value: ""}] }, + ] + end + else + assert_equal(1, doc.errors.length) + assert_equal(1, nodeset.length) + assert_equal("
", nodeset.to_s) + end + assert_instance_of(Nokogiri::HTML4::Document, nodeset.document) assert_instance_of(Nokogiri::HTML4::Document, nodeset.first.document) end @@ -117,14 +127,25 @@ def test_node_context_parsing_of_malformed_html_fragment_with_recover_is_correct context_node = doc.at_css("div") nodeset = context_node.parse("
", &:recover) - assert_equal(1, doc.errors.length) - assert_equal(1, nodeset.length) - assert_equal("
", nodeset.to_s) + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_empty(doc.errors) + assert_pattern do + nodeset => [ + { name: "div", attributes: [{name: "<", value: ""}, { name: "div", value: ""}] }, + ] + end + else + assert_equal(1, doc.errors.length) + assert_equal(1, nodeset.length) + assert_equal("
", nodeset.to_s) + end assert_instance_of(Nokogiri::HTML4::Document, nodeset.document) assert_instance_of(Nokogiri::HTML4::Document, nodeset.first.document) end def test_node_context_parsing_of_malformed_html_fragment_without_recover_is_not_corrected + skip("libxml2 2.14.0 no longer raises this error") if Nokogiri.uses_libxml?(">= 2.14.0") + doc = HTML4.parse("
") context_node = doc.at_css("div") assert_raises(Nokogiri::XML::SyntaxError) do From 8df8693027ab9f16c9b5f94b85f3445297de8b8e Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 5 Oct 2024 15:13:57 -0400 Subject: [PATCH 2/4] feat: DocumentFragment::{XML,HTML4}#parse_options and fix some libxml2 HTML5-related changes, specifically around errors that are no longer generated. --- CHANGELOG.md | 1 + lib/nokogiri/html4/document_fragment.rb | 1 + lib/nokogiri/xml/document_fragment.rb | 6 ++ test/html4/test_document_fragment.rb | 103 +++++++++++------------- 4 files changed, 57 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 025306b9a0..7be0b05520 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ We've resolved many long-standing bugs in the various schema classes, validation * Introduce support for a new SAX callback `XML::SAX::Document#reference`, which is called to report some parsed XML entities when `XML::SAX::ParserContext#replace_entities` is set to the default value `false`. This is necessary functionality for some applications that were previously relying on incorrect entity error reporting which has been fixed (see below). For more information, read the docs for `Nokogiri::XML::SAX::Document`. [#1926] @flavorjones * `XML::SAX::Parser#parse_memory` and `#parse_file` now accept an optional `encoding` argument. When not provided, the parser will fall back to the encoding passed to the initializer, and then fall back to autodetection. [#3288] @flavorjones * `XML::SAX::ParserContext.memory` now accepts an optional `encoding` argument. When not provided, the encoding will be autodetected. [#3288] @flavorjones +* `XML::DocumentFragment#parse_options` and `HTML4::DocumentFragment#parse_options` return the options used to parse the document fragment. @flavorjones * [CRuby] `Nokogiri::HTML5::Builder` is similar to `HTML4::Builder` but returns an `HTML5::Document`. [#3119] @flavorjones * [CRuby] Attributes in an HTML5 document can be serialized individually, something that has always been supported by the HTML4 serializer. [#3125, #3127] @flavorjones * [CRuby] Introduce a compile-time option, `--disable-xml2-legacy`, to remove from libxml2 its dependencies on `zlib` and `liblzma` and disable implicit `HTTP` network requests. These all remain enabled by default, and are present in the precompiled native gems. This option is a precursor for removing these libraries in a future major release, but may be interesting for the security-minded who do not need features like automatic decompression and would like to remove these dependencies. You can read more and give feedback on these plans in #3168. [#3247] @flavorjones diff --git a/lib/nokogiri/html4/document_fragment.rb b/lib/nokogiri/html4/document_fragment.rb index 1681822acb..eae79bcb14 100644 --- a/lib/nokogiri/html4/document_fragment.rb +++ b/lib/nokogiri/html4/document_fragment.rb @@ -91,6 +91,7 @@ def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEF return self unless tags options = Nokogiri::XML::ParseOptions.new(options) if Integer === options + @parse_options = options yield options if block_given? if ctx diff --git a/lib/nokogiri/xml/document_fragment.rb b/lib/nokogiri/xml/document_fragment.rb index 40cc8f4fa1..dbdc46b424 100644 --- a/lib/nokogiri/xml/document_fragment.rb +++ b/lib/nokogiri/xml/document_fragment.rb @@ -4,6 +4,11 @@ module Nokogiri module XML class DocumentFragment < Nokogiri::XML::Node + # The options used to parse the document fragment. Returns the value of any options that were + # passed into the constructor as a parameter or set in a config block, else the default + # options for the specific subclass. + attr_reader :parse_options + #### # Create a Nokogiri::XML::DocumentFragment from +tags+ def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block) @@ -20,6 +25,7 @@ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_ return self unless tags options = Nokogiri::XML::ParseOptions.new(options) if Integer === options + @parse_options = options yield options if block_given? children = if ctx diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb index eca57ce147..e5a00b8b90 100644 --- a/test/html4/test_document_fragment.rb +++ b/test/html4/test_document_fragment.rb @@ -220,31 +220,31 @@ def test_unclosed_script_tag end def test_error_propagation_on_fragment_parse - frag = Nokogiri::HTML4::DocumentFragment.parse("oh, hello there.") - assert(frag.errors.any? { |err| err.to_s.include?("Tag hello invalid") }, "errors should be copied to the fragment") + frag = Nokogiri::HTML4::DocumentFragment.parse("oh, hello there") + assert(frag.errors.any? { |err| err.to_s.include?("Unexpected end tag") }, "errors should be copied to the fragment") end def test_error_propagation_on_fragment_parse_in_node_context doc = Nokogiri::HTML4::Document.parse("
") context_node = doc.at_css("div") - frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) + frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there", context_node) assert( frag.errors.any? do |err| - err.to_s.include?("Tag hello invalid") + err.to_s.include?("Unexpected end tag") end, "errors should be on the context node's document", ) end def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors - doc = Nokogiri::HTML4::Document.parse("
") + doc = Nokogiri::HTML4::Document.parse("
") assert(doc.errors.any? { |err| err.to_s.include?("jimmy") }, "assert on setup") context_node = doc.at_css("div") - frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) + frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) assert( frag.errors.any? do |err| - err.to_s.include?("Tag hello invalid") + err.to_s.include?("goodbye") end, "errors should be on the context node's document", ) @@ -267,14 +267,14 @@ def test_capturing_nonparse_errors_during_fragment_clone def test_capturing_nonparse_errors_during_node_copy_between_fragments # Errors should be emitted while parsing only, and should not change when moving nodes. - frag1 = Nokogiri::HTML4.fragment("one") - frag2 = Nokogiri::HTML4.fragment("two") + frag1 = Nokogiri::HTML4.fragment("
one") + frag2 = Nokogiri::HTML4.fragment("
two") node1 = frag1.at_css("#unique") node2 = frag2.at_css("#unique") original_errors1 = frag1.errors.dup original_errors2 = frag2.errors.dup - assert(original_errors1.any? { |e| e.to_s.include?("Tag diva invalid") }, "it should complain about the tag name") - assert(original_errors2.any? { |e| e.to_s.include?("Tag dive invalid") }, "it should complain about the tag name") + assert(original_errors1.any? { |e| e.to_s.include?("Unexpected end tag") }) + assert(original_errors2.any? { |e| e.to_s.include?("Unexpected end tag") }) node1.add_child(node2) @@ -363,106 +363,101 @@ def test_parse_with_io Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover end - let(:input) { "
foofoo
" } it "sets the test up correctly" do assert_predicate(html4_strict, :strict?) end describe "HTML4.fragment" do - it "has sane defaults" do + it "has reasonable defaults" do frag = Nokogiri::HTML4.fragment(input) + assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + assert_equal(html4_default, frag.parse_options) end it "accepts options" do - frag = Nokogiri::HTML4.fragment(input, nil, html4_default) - assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + frag = Nokogiri::HTML4.fragment(input, nil, html4_strict) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4.fragment(input, nil, html4_strict) - end + assert_equal("
foo
", frag.to_html) + assert_equal(html4_strict, frag.parse_options) end it "takes a config block" do default_config = nil - Nokogiri::HTML4.fragment(input) do |config| - default_config = config + frag = Nokogiri::HTML4.fragment(input) do |config| + default_config = config.dup + config.strict end - refute_predicate(default_config, :strict?) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4.fragment(input, &:norecover) - end + assert_equal(html4_default, default_config) + refute_predicate(default_config, :strict?) + assert_predicate(frag.parse_options, :strict?) end end describe "HTML4::DocumentFragment.parse" do - it "has sane defaults" do + it "has reasonable defaults" do frag = Nokogiri::HTML4::DocumentFragment.parse(input) + assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + assert_equal(html4_default, frag.parse_options) end it "accepts options" do - frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_default) - assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict) - end + assert_equal("
foo
", frag.to_html) + assert_equal(html4_strict, frag.parse_options) end it "takes a config block" do default_config = nil - Nokogiri::HTML4::DocumentFragment.parse(input) do |config| - default_config = config + frag = Nokogiri::HTML4::DocumentFragment.parse(input) do |config| + default_config = config.dup + config.strict end - refute_predicate(default_config, :strict?) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4::DocumentFragment.parse(input, &:norecover) - end + assert_equal(html4_default, default_config) + refute_predicate(default_config, :strict?) + assert_predicate(frag.parse_options, :strict?) end end describe "HTML4::DocumentFragment.new" do describe "without a context node" do - it "has sane defaults" do + it "has reasonable defaults" do frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) + assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + assert_equal(html4_default, frag.parse_options) end it "accepts options" do - frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_default) - assert_equal("
foo
", frag.to_html) - refute_empty(frag.errors) + frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict) - end + assert_equal("
foo
", frag.to_html) + assert_equal(html4_strict, frag.parse_options) end it "takes a config block" do default_config = nil - Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config| - default_config = config + frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config| + default_config = config.dup + config.strict end - refute_predicate(default_config, :strict?) - assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, &:norecover) - end + assert_equal(html4_default, default_config) + refute_predicate(default_config, :strict?) + assert_predicate(frag.parse_options, :strict?) end end describe "with a context node" do let(:document) { Nokogiri::HTML4::Document.parse("") } let(:context_node) { document.at_css("context") } + let(:input) { "
foo Date: Sat, 5 Oct 2024 16:04:11 -0400 Subject: [PATCH 3/4] test: update more tests to reflect new libxml2 HTML5 behaviors - starting to deprecate HTML element description data - working around changed error generation - updated CRLF test --- test/html4/sax/test_document_error.rb | 9 ++------- test/html4/test_document.rb | 18 ++++++++---------- test/html4/test_document_encoding.rb | 2 +- test/html4/test_element_description.rb | 17 ++++++++++++++--- test/html4/test_node.rb | 5 +---- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/test/html4/sax/test_document_error.rb b/test/html4/sax/test_document_error.rb index 513164148f..fdd15ef694 100644 --- a/test/html4/sax/test_document_error.rb +++ b/test/html4/sax/test_document_error.rb @@ -20,15 +20,10 @@ def start_document end def test_warning_document_encounters_error_but_terminates_normally - # Probably I'm doing something wrong, but I can't make nekohtml report errors, - # despite setting http://cyberneko.org/html/features/report-errors. - # See https://nekohtml.sourceforge.net/settings.html for more info. - # I'd love some help here if someone finds this comment and cares enough to dig in. - skip_unless_libxml2("nekohtml sax parser does not seem to report errors?") - warning_parser = Nokogiri::HTML4::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new) warning_parser.parse("<
-
one") - doc2 = Nokogiri::HTML4("two") + doc1 = Nokogiri::HTML4("
one") + doc2 = Nokogiri::HTML4("
two") node1 = doc1.at_css("#unique") node2 = doc2.at_css("#unique") original_errors1 = doc1.errors.dup original_errors2 = doc2.errors.dup - assert(original_errors1.any? { |e| e.to_s.include?("Tag diva invalid") }, "it should complain about the tag name") - assert(original_errors2.any? { |e| e.to_s.include?("Tag dive invalid") }, "it should complain about the tag name") + assert(original_errors1.any? { |e| e.to_s.include?("foo1") }, "it should complain about the tag name") + assert(original_errors2.any? { |e| e.to_s.include?("foo2") }, "it should complain about the tag name") node1.add_child(node2) @@ -804,7 +802,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 end describe "read memory" do - let(:input) { "
" } describe "strict parsing" do let(:parse_options) { html_strict } @@ -826,7 +824,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 end describe "read io" do - let(:input) { StringIO.new("
") } describe "strict parsing" do let(:parse_options) { html_strict } diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb index 0abc705781..1cadf15158 100644 --- a/test/html4/test_document_encoding.rb +++ b/test/html4/test_document_encoding.rb @@ -148,7 +148,7 @@ def binopen(file) end describe "error handling" do - RAW = " RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input| it "#{flavor} should handle errors" do diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb index bdc8d6ce65..fda891ab53 100644 --- a/test/html4/test_element_description.rb +++ b/test/html4/test_element_description.rb @@ -58,7 +58,9 @@ def test_description def test_subelements sub_elements = ElementDescription["body"].sub_elements - if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_equal(0, sub_elements.length) + elsif Nokogiri.uses_libxml? assert_equal(65, sub_elements.length) else assert_equal(105, sub_elements.length) @@ -66,7 +68,12 @@ def test_subelements end def test_default_sub_element - assert_equal("div", ElementDescription["body"].default_sub_element) + sub_element = ElementDescription["body"].default_sub_element + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_nil(sub_element) + else + assert_equal("div", sub_element) + end end def test_null_default_sub_element @@ -86,7 +93,11 @@ def test_optional_attributes def test_deprecated_attributes attrs = ElementDescription["table"].deprecated_attributes assert(attrs) - assert_equal(2, attrs.length) + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_equal(0, attrs.length) + else + assert_equal(2, attrs.length) + end end def test_required_attributes diff --git a/test/html4/test_node.rb b/test/html4/test_node.rb index 60759b9ceb..45ee8feb19 100644 --- a/test/html4/test_node.rb +++ b/test/html4/test_node.rb @@ -168,13 +168,10 @@ def test_fragment_serialization end def test_to_html_does_not_contain_entities - # as generated by a tool like NKF html = "\r\n

test paragraph\r\nfoo bar

\r\n\r\n" nokogiri = Nokogiri::HTML4.parse(html) - if RUBY_PLATFORM.include?("java") - # NKF linebreak modes are not supported as of jruby 1.2 - # see http://jira.codehaus.org/browse/JRUBY-3602 for status + if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.14.0") assert_equal( "

testparagraph\nfoobar

", nokogiri.at("p").to_html.delete(" "), From a31c095dba4fc4f046f62d3be7478887681adca8 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 5 Oct 2024 16:32:27 -0400 Subject: [PATCH 4/4] test: update to accommodate JRuby --- test/html4/test_document.rb | 5 ++- test/html4/test_document_fragment.rb | 66 ++++++++++++++-------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 763a72af1f..988989f5f1 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -664,8 +664,9 @@ def test_capturing_nonparse_errors_during_node_copy_between_docs node2 = doc2.at_css("#unique") original_errors1 = doc1.errors.dup original_errors2 = doc2.errors.dup - assert(original_errors1.any? { |e| e.to_s.include?("foo1") }, "it should complain about the tag name") - assert(original_errors2.any? { |e| e.to_s.include?("foo2") }, "it should complain about the tag name") + + refute_empty(original_errors1) + refute_empty(original_errors2) node1.add_child(node2) diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb index e5a00b8b90..6ef3faac07 100644 --- a/test/html4/test_document_fragment.rb +++ b/test/html4/test_document_fragment.rb @@ -221,36 +221,28 @@ def test_unclosed_script_tag def test_error_propagation_on_fragment_parse frag = Nokogiri::HTML4::DocumentFragment.parse("oh, hello there") - assert(frag.errors.any? { |err| err.to_s.include?("Unexpected end tag") }, "errors should be copied to the fragment") + refute_empty(frag.errors) end def test_error_propagation_on_fragment_parse_in_node_context doc = Nokogiri::HTML4::Document.parse("
") context_node = doc.at_css("div") frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there", context_node) - assert( - frag.errors.any? do |err| - err.to_s.include?("Unexpected end tag") - end, - "errors should be on the context node's document", - ) + refute_empty(frag.errors) end def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors - doc = Nokogiri::HTML4::Document.parse("
") - assert(doc.errors.any? { |err| err.to_s.include?("jimmy") }, "assert on setup") + doc = Nokogiri::HTML4::Document.parse("
") + refute_empty(doc.errors) + doc_errors = doc.errors.map(&:to_s) context_node = doc.at_css("div") frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) - assert( - frag.errors.any? do |err| - err.to_s.include?("goodbye") - end, - "errors should be on the context node's document", - ) + refute_empty(frag.errors) + assert( frag.errors.none? do |err| - err.to_s.include?("jimmy") + doc_errors.include?(err.to_s) end, "errors should not include pre-existing document errors", ) @@ -273,8 +265,9 @@ def test_capturing_nonparse_errors_during_node_copy_between_fragments node2 = frag2.at_css("#unique") original_errors1 = frag1.errors.dup original_errors2 = frag2.errors.dup - assert(original_errors1.any? { |e| e.to_s.include?("Unexpected end tag") }) - assert(original_errors2.any? { |e| e.to_s.include?("Unexpected end tag") }) + + refute_empty(original_errors1) + refute_empty(original_errors2) node1.add_child(node2) @@ -363,10 +356,17 @@ def test_parse_with_io Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover end + let(:html4_huge) do + Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).huge + end + let(:input) { "
foo
" } it "sets the test up correctly" do + refute_predicate(html4_default, :strict?) + refute_predicate(html4_default, :huge?) assert_predicate(html4_strict, :strict?) + assert_predicate(html4_huge, :huge?) end describe "HTML4.fragment" do @@ -378,22 +378,22 @@ def test_parse_with_io end it "accepts options" do - frag = Nokogiri::HTML4.fragment(input, nil, html4_strict) + frag = Nokogiri::HTML4.fragment(input, nil, html4_huge) assert_equal("
foo
", frag.to_html) - assert_equal(html4_strict, frag.parse_options) + assert_equal(html4_huge, frag.parse_options) end it "takes a config block" do default_config = nil frag = Nokogiri::HTML4.fragment(input) do |config| default_config = config.dup - config.strict + config.huge end assert_equal(html4_default, default_config) - refute_predicate(default_config, :strict?) - assert_predicate(frag.parse_options, :strict?) + refute_predicate(default_config, :huge?) + assert_predicate(frag.parse_options, :huge?) end end @@ -406,22 +406,22 @@ def test_parse_with_io end it "accepts options" do - frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict) + frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_huge) assert_equal("
foo
", frag.to_html) - assert_equal(html4_strict, frag.parse_options) + assert_equal(html4_huge, frag.parse_options) end it "takes a config block" do default_config = nil frag = Nokogiri::HTML4::DocumentFragment.parse(input) do |config| default_config = config.dup - config.strict + config.huge end assert_equal(html4_default, default_config) - refute_predicate(default_config, :strict?) - assert_predicate(frag.parse_options, :strict?) + refute_predicate(default_config, :huge?) + assert_predicate(frag.parse_options, :huge?) end end @@ -435,22 +435,22 @@ def test_parse_with_io end it "accepts options" do - frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict) + frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_huge) assert_equal("
foo
", frag.to_html) - assert_equal(html4_strict, frag.parse_options) + assert_equal(html4_huge, frag.parse_options) end it "takes a config block" do default_config = nil frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config| default_config = config.dup - config.strict + config.huge end assert_equal(html4_default, default_config) - refute_predicate(default_config, :strict?) - assert_predicate(frag.parse_options, :strict?) + refute_predicate(default_config, :huge?) + assert_predicate(frag.parse_options, :huge?) end end