Cheat sheet

Nokolexbor cheat sheet

Creating documents

Everything on Nokolexbor starts with creating a Document.

# Create a blank document by `new`
doc = Nokolexbor::Document.new
# Create a blank document by parsing an empty string
doc = Nokolexbor::HTML('')
# Create a document by parsing a string of HTML
doc = Nokolexbor::HTML('<div></div>')
# Create a document by passing any object that responds to `#read`
doc = Nokolexbor::HTML(URI.open('https://github.com/serpapi/nokolexbor'))

Working with documents

doc = Nokolexbor.HTML <<-HTML
<html>
  <head><title>This is title</title></head>
  <body>
    <div class="a">
      <span>Text</span>
    </div>
  </body>
</html>
HTML

# Get the title
doc.title
# => "This is title"

# Set the title
doc.title = 'New title'
doc.at_css('head').to_html
# => "<head><title>New title</title></head>"

# Get the root node
doc.root
# => #<Nokolexbor::Element <html>>

# The document itself is a `Node`, it refers to the root node of the document, so every method of `Node` applies to `Document`. For example, searching with css selector:
doc.at_css('div')
# => #<Nokolexbor::Element <div class="a">>

Creating nodes

# Before creating nodes, you need a document
doc = Nokolexbor::HTML('')

# Create an element
doc.create_element("div") # <div></div>
Nokolexbor::Element.new('div', doc) # <div></div>
doc.create_element("div", class: "a") # <div class="a"></div>
doc.create_element("div", "Text") # <div>Text</div>
doc.create_element("div", "Text", class: "a", style: "b") # <div class="a" style="b">Text</div>
doc.create_element("div") { |node| node['class'] = "a" } # <div class="a"></div>

# Create a text node
doc.create_text_node("Some text") # => Some text
Nokolexbor::Text.new("Some text", doc) # => Some text

# Create a comment
doc.create_comment("Some comment") # <!--Some comment-->
Nokolexbor::Comment.new("Some comment", doc) # <!--Some comment-->

# Create a cdata
doc.create_cdata("Some CDATA")
Nokolexbor::CDATA.new("Some CDATA", doc)

# Create a processing instruction
Nokolexbor::ProcessingInstruction.new("xml", "some data", doc) # <?xml some data>

Searching nodes

doc = Nokolexbor.HTML <<-HTML
  <div>
    <span class="a">Text 1</span>
    <span class="b">Text 2</span>
    <span class="a">Text 3</span>
    <span class="b">Text 4</span>
    <span class="a">Text 5</span>
    <span class="b">Text 6</span>
    <div><span>Text 7</span>Text 8</div>
  </div>
HTML
# Search for all the matching nodes with css selector
doc.css('div > span.a')
# Search for all the matching nodes with multiple css selectors
doc.css('div > span.a, div > div')
# Search for all the matching nodes among the children of the called node, with multiple css selectors
doc.at_css('div').css('> span.a, > div')
# Search for the first matching node with css selector
# `#at_css` if faster than `#css` if you only need the first result
doc.at_css('div > span.a')
# Search for the first matching text node with css selector
doc.at_css('div > div > ::text')

# -------------
# Search for all the matching nodes with xpath
doc.xpath('//div / span[@class="a"]')
# Search for all the matching nodes with multiple xpath
doc.xpath('//div / span[@class="a"]', '//div / div')
# Search for the first matching node with css selector
doc.at_xpath('//div / span[@class="a"]')
# Search for the first matching text node with css selector
doc.at_xpath('//div / div / text()')

Working with nodes

node.document # Get owner document

node.name # Get node name
node.name= # Set node name

# Node type
node.type # enum of type
node.cdata?
node.comment?
node.element?
node.fragment?
node.document?
node.text?

# Attributes
node['href'] # Get attribute value (String)
node['href'] = 'http://example.com' # Set attribute value
node.key?('href') # Has attribute?
node.keys # Get an array of attribute names
node.values # Get an array of attribute values
node.delete('href') # Delete attribute
node.each { |attr_name, attr_value| } # Iterate attributes

# Attributes nodes (Nokolexbor::Attribute)
node.attribute('href') # Get attribute node
node.attribute_nodes # Get an array of the attribute nodes
node.attributes # Get a hash of the attribute nodes, it's keys are attribute names

# Inserting / modifying nodes
# The param of all the methods of this section can be: String, Node, DocumentFragment, NodeSet
param = '<a>123</a><a>456</a>' # String
param = doc.create_element('div') # Node
param = doc.fragment('<a>123</a><a>456</a>') # DocumentFragment
param = doc.css('div') # NodeSet
# Add `param` as previous sibling
node.previous=(param) 
node.before(param)
node.add_previous_sibling(param)
# Add `param` as next sibling
node.next=(param) 
node.after(param)
node.add_next_sibling(param)
# Add `param` as its child
node.add_child(param)
node << param
# Replace the node (including itself) with `param`
node.replace(param)
node.swap(param)
# Wrap the node with another node.
node.wrap("<div class='container'></div>")
# Replace the node's children with `param`
node.children=(param)
node.inner_html=(param)
# Re-parent the node
node.parent=(node)
# Replace the node's content with a text node containing `string`.
node.content=(string) 

# Deleting nodes
node.remove # Removed nodes can be re-attached to doc.

# Traversing
node.traverse { |node| } # Yields all children recursively.
node.next # Get next node.
node.next_element # Get next node of type Element.
node.previous # Get previous node.
node.previous_element # Get previous node of type Element.
node.parent # Get parent node
node.child # Get first child
node.children # Get the list of children as a NodeSet
node.elements # Get the list of element children of this node as a NodeSet.
node.first_element_child # Get the first child node of this node that is an element.
node.last_element_child # Get the last child node of this node that is an element.
node.ancestors # List ancestor nodes, closest to furthest, as a NodeSet.
node.ancestors(selector) # Filter the ancestors that match the selector. (Not good in performance)

# Serialization
# Get the text content of the node
node.content 
node.text
node.inner_text
node.to_str
# Get the inner html of the node
node.inner_html
# Get the outer html of the node
node.outer_html
node.to_html
node.serialize
node.to_s

# Self testing
node.matches?(selector) # Does this node match this selector? (Not good in performance)

# Rubyisms
node == another_node # Returns true if the nodes are the same one in memory.
node.clone # Copy this node.

# Write it out to an IO object that responds to `#write`
node.write_to(io)

# Inspection
node.inspect

# Utility
node.fragment(param) # Create a DocumentFragment containing tags that is relative to this context node.
node.parse(string) # Parse `string` as a document fragment within the context of this node.

Working with attributes

doc = Nokolexbor.HTML('<div class="a" href="b" style="c"></div>')
node = doc.at_css('div')

# Get attribute (Nokolexbor::Attribute)
attr = node.attribute('class')
# => #<Nokolexbor::Attribute class="a">

# Get name and value
attr.name # "class"
attr.value # "a"

# Set name and value
attr.name = "class1"
attr.value = "a1"
node.to_html # <div class1="a1" href="b" style="c"></div>

attr.parent # => #<Nokolexbor::Element <div class1="a1" href="b" style="c">>

# Traversing
attr.next # => #<Nokolexbor::Attribute href="b">
attr.next.previous == attr # => true

Working with NodeSet

# Create NodeSet
doc = Nokolexbor::HTML('<div class="a"></div><div class="b"></div>')
nodes = Nokolexbor::NodeSet.new(doc, [])

# Get NodeSet by searching methods
nodes = doc.css('div')
nodes = doc.xpath('//div')

# Set operations
nodes | other_nodeset # Union, return a new NodeSet with merged nodes, excluding dupliates
nodes + other_nodeset # Union, return a new NodeSet with merged nodes, excluding dupliates
nodes & other_nodeset # Intersection, return a new NodeSet with the common nodes only
nodes - other_nodeset # Difference, return a new NodeSet with the nodes in this NodeSet that aren't in other_nodeset
nodes.include?(node)
nodes.empty?
nodes.length
nodes.size
nodes.delete(node)

# List operations (includes Enumerable)
nodes.each { |node| }
nodes.map { |node| }
nodes.select { |node| }
nodes.find { |node| }
nodes.first
nodes.last
nodes.reverse
nodes.index(node) # Returns the numeric index or nil
nodes[3] # Get element at index 3
nodes[3, 4] # Return a NodeSet of size 4, starting at index 3
nodes[3..6] # Return a NodeSet using a range of indexes
nodes.pop
nodes.push(node)
nodes.shift

nodes.children # Returns a new NodeSet containing all the children of all the nodes in the NodeSet

# Serialization
nodes.content 
nodes.text
nodes.inner_text
nodes.to_str
# Get the inner html of the nodes
nodes.inner_html
# Get the outer html of the nodes
nodes.outer_html
nodes.to_html
nodes.serialize
nodes.to_s

# Batch operations on nodes
nodes.remove # Remove all its containing nodes.
nodes.wrap("<div class='container'></div>") # Wrap all containing nodes.
nodes.before(datum) # Insert datum before the first Node in this NodeSet # e.g. first.before(datum)
nodes.after(datum) # Insert datum after the last Node in this NodeSet # e.g. last.after(datum)
nodes.attr(key, value) # Set attribute on all containing nodes.
nodes.attr(key) { |node| 'value' } # Set attribute on all containing nodes.
nodes.remove_attr(name) # Remove attribute from all containing nodes.
nodes.add_class(name) # Append class to all containing nodes.

# Searching
nodes.css(selectors)
nodes.at_css(selectors)
nodes.xpath(paths)
nodes.at_xpath(paths)

# Convert to array
nodes.to_a

# Rubyisms
nodes == nodes # Two NodeSets are equal if they contain the same number of elements and if each element is equal to the corresponding element in the other NodeSet
nodes.dup # Duplicate this node set
nodes.inspect

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cheat sheet

Nokolexbor cheat sheet

Creating documents

Working with documents

Creating nodes

Searching nodes

Working with nodes

Working with attributes

Working with NodeSet

Clone this wiki locally