From b45845fbea02225291e93be826fb46188d1dad35 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 18 Apr 2024 12:38:19 -0700
Subject: [PATCH 01/37] testing attribute-based protection  [skip ci]

I've modified the escape-text function escape text based on wether or
not it exists in an escapable range.

This commit implements a proof of concept that protects the first
escapable character and will not pass check.
---
 R/asis-nodes.R                  | 22 ++++++++++---
 inst/stylesheets/xml2md_gfm.xsl | 58 +++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index e3264dc..353883b 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -121,11 +121,12 @@ protect_inline_math <- function(body, ns) {
 
   # protect math that is strictly inline
   if (length(imath)) {
-    new_nodes <- purrr::map(imath, fix_fully_inline)
+    purrr::map(imath, label_fully_inline)
+    # new_nodes <- purrr::map(imath, fix_fully_inline)
     # since we split up the nodes, we have to do this node by node
-    for (i in seq(new_nodes)) {
-      add_node_siblings(imath[[i]], new_nodes[[i]], remove = TRUE)
-    }
+    # for (i in seq(new_nodes)) {
+    #   add_node_siblings(imath[[i]], new_nodes[[i]], remove = TRUE)
+    # }
   }
 
   # protect math that is broken across lines or markdown elements
@@ -205,6 +206,19 @@ fix_fully_inline <- function(math) {
   make_text_nodes(char)
 }
 
+label_fully_inline <- function(math) {
+  char <- xml2::xml_text(math)
+  locations <- gregexpr(pattern = inline_dollars_regex("full"), 
+    char, 
+    perl = TRUE
+  )
+  pos <- paste(locations[[1]], collapse = " ")
+  len <- paste(attr(locations[[1]], "match.len"),  collapse = " ")
+  xml2::xml_set_attr(math, "protect.pos", pos)
+  xml2::xml_set_attr(math, "protect.len", len)
+  
+}
+
 #' Transform a character vector of XML into text nodes
 #'
 #' This is useful in the case where we want to modify some text content to
diff --git a/inst/stylesheets/xml2md_gfm.xsl b/inst/stylesheets/xml2md_gfm.xsl
index 54816bc..6ea95c4 100644
--- a/inst/stylesheets/xml2md_gfm.xsl
+++ b/inst/stylesheets/xml2md_gfm.xsl
@@ -8,6 +8,7 @@
     <!-- Import commonmark XSL -->
 
     <xsl:import href="xml2md.xsl"/>
+
     <xsl:template match="/">
         <xsl:apply-imports/>
     </xsl:template>
@@ -16,8 +17,65 @@
 
     <xsl:output method="text" encoding="utf-8"/>
 
+
+    <xsl:template name="escape-text-protect">
+      <xsl:param name="text"/>
+      <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
+      <xsl:param name="pos" select="0"/>
+      <xsl:param name="protect.pos" select="0"/>
+      <xsl:param name="protect.len" select="0"/>
+
+      <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
+      <xsl:choose>
+        <xsl:when test="contains($trans, '\')">
+          <xsl:variable name="start" select="substring-before($protect.pos, ' ')"/>
+          <xsl:variable name="len" select="substring-before($protect.len, ' ')"/>
+          <xsl:variable name="end" select="$start + $len - 1"/>
+          <xsl:variable name="safe" select="substring-before($trans, '\')"/>
+          <xsl:variable name="l" select="string-length($safe)"/>
+          <xsl:variable name="newpos" select="$pos + $l - 1"/>
+          <xsl:message terminate="no">
+            <xsl:text>&#10;safe: </xsl:text>
+            <xsl:value-of select="$safe"/>
+            <xsl:text>&#10;length: </xsl:text>
+            <xsl:value-of select="$l"/>
+            <xsl:text>&#9;position: </xsl:text>
+            <xsl:value-of select="$newpos"/>
+            <xsl:text>&#9;range: </xsl:text>
+            <xsl:value-of select="$start"/>
+            <xsl:text> .. </xsl:text>
+            <xsl:value-of select="$end"/>
+            <xsl:text>&#10;translated: </xsl:text>
+            <xsl:value-of select="$trans"/>
+            <xsl:text>&#10;</xsl:text>
+          </xsl:message>
+          <xsl:value-of select="$safe"/>
+          <xsl:if test="($newpos &lt; $start) and ($newpos &gt; $end)">
+            <xsl:text>\</xsl:text>
+          </xsl:if>
+          <xsl:value-of select="substring($text, $l + 1, 1)"/>
+          <xsl:call-template name="escape-text-protect">
+            <xsl:with-param name="text" select="substring($text, $l + 2)"/>
+            <xsl:with-param name="escape" select="$escape"/>
+            <xsl:with-param name="pos" select="$newpos"/>
+          </xsl:call-template>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$text"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
       <!-- Text that needs to be preserved (e.g. math/checkboxes) -->
 
+    <xsl:template match="md:text[@protect.pos]">
+      <xsl:call-template name="escape-text-protect">
+          <xsl:with-param name="text" select="string(.)"/>
+          <xsl:with-param name="protect.pos" select="string(@protect.pos)"/>
+          <xsl:with-param name="protect.len" select="string(@protect.len)"/>
+      </xsl:call-template>
+    </xsl:template>
+
     <xsl:template match="md:emph[@asis='true']">
       <!-- 
         Multiple subscripts in a LaTeX equation will result in emph tags.

From 1adfd97ca1e082973879523579018181abc48da3 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 19 Apr 2024 09:15:55 -0700
Subject: [PATCH 02/37] first working version of protected escape text.

In this version, we no longer need to split nodes in order to protect
them if we also want them to be continuous. I've taken the XSL template
"escape-text" and modified it so that it takes in three new parameters:

1. `pos`..........the position of the current character
2. `protect.pos`..a space-separated list of starting positions for
   protection
3. `protect.end`..a space-separated list of ending positions for protection

I've also added three new helper templates to handle list contents:

`peek` returns the top of the list, `trim` trims off the first element
of the list (or returns the value if it's not a list), and `adjust-range`
trims a list depending on if the current value is within range.

There's a lot of printing here because I wasn't too confident with
debugging, but based on my test in inst/extdata/xml_protect.xml, it
produces results correctly.
---
 R/asis-nodes.R                  |   8 +--
 inst/extdata/xml_protect.xml    |   7 +++
 inst/stylesheets/xml2md_gfm.xsl | 104 +++++++++++++++++++++++++++++---
 3 files changed, 106 insertions(+), 13 deletions(-)
 create mode 100644 inst/extdata/xml_protect.xml

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 353883b..fc1138c 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -212,10 +212,10 @@ label_fully_inline <- function(math) {
     char, 
     perl = TRUE
   )
-  pos <- paste(locations[[1]], collapse = " ")
-  len <- paste(attr(locations[[1]], "match.len"),  collapse = " ")
-  xml2::xml_set_attr(math, "protect.pos", pos)
-  xml2::xml_set_attr(math, "protect.len", len)
+  pos <- locations[[1]]
+  len <- attr(locations[[1]], "match.len")
+  xml2::xml_set_attr(math, "protect.pos", paste(pos, collapse = " "))
+  xml2::xml_set_attr(math, "protect.end", paste(pos + len, collapse = " "))
   
 }
 
diff --git a/inst/extdata/xml_protect.xml b/inst/extdata/xml_protect.xml
new file mode 100644
index 0000000..125c967
--- /dev/null
+++ b/inst/extdata/xml_protect.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE document SYSTEM "CommonMark.dtd">
+<document xmlns="http://commonmark.org/xml/1.0">
+  <paragraph>
+    <text xml:space="preserve" protect.pos="27 47 57 89" protect.end="31 51 71 103">This is a thing with some $x_i$ math elements $x_j$ and $let x_i = x_j$ [not math] with $\frac{i}{\pi}$ after all.</text>
+  </paragraph>
+</document>
diff --git a/inst/stylesheets/xml2md_gfm.xsl b/inst/stylesheets/xml2md_gfm.xsl
index 6ea95c4..44376c9 100644
--- a/inst/stylesheets/xml2md_gfm.xsl
+++ b/inst/stylesheets/xml2md_gfm.xsl
@@ -17,23 +17,92 @@
 
     <xsl:output method="text" encoding="utf-8"/>
 
+    <xsl:template name="adjust-range">
+      <xsl:param name="current" select="0"/>
+      <xsl:param name="list" select="0"/>
+      <xsl:param name="end" select="0"/>
+      <xsl:choose>
+        <xsl:when test="contains($list, ' ') and ($current &gt; $end)">
+          <xsl:call-template name="adjust-range">
+            <xsl:with-param name="current" select="$current"/>
+            <xsl:with-param name="list">
+              <xsl:call-template name="trim">
+                <xsl:with-param name="list" select="$list"/>
+              </xsl:call-template>
+            </xsl:with-param>
+            <xsl:with-param name="end" select="$current + $end"/>
+          </xsl:call-template>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+    <xsl:template name="peek">
+      <xsl:param name="list" select="0"/>
+      <xsl:choose>
+        <xsl:when test="contains($list, ' ')">
+          <xsl:value-of select="substring-before($list, ' ')"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+    <xsl:template name="trim">
+      <xsl:param name="list" select="0"/>
+      <xsl:choose>
+        <xsl:when test="contains($list, ' ')">
+          <xsl:value-of select="substring-after($list, ' ')"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
 
     <xsl:template name="escape-text-protect">
       <xsl:param name="text"/>
       <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
-      <xsl:param name="pos" select="0"/>
+      <xsl:param name="pos" select="1"/>
       <xsl:param name="protect.pos" select="0"/>
-      <xsl:param name="protect.len" select="0"/>
+      <xsl:param name="protect.end" select="0"/>
 
       <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
       <xsl:choose>
         <xsl:when test="contains($trans, '\')">
-          <xsl:variable name="start" select="substring-before($protect.pos, ' ')"/>
-          <xsl:variable name="len" select="substring-before($protect.len, ' ')"/>
-          <xsl:variable name="end" select="$start + $len - 1"/>
+          <xsl:variable name="i" select="substring-before($protect.pos, ' ')"/>
+          <xsl:variable name="k" select="substring-before($protect.end, ' ')"/>
           <xsl:variable name="safe" select="substring-before($trans, '\')"/>
           <xsl:variable name="l" select="string-length($safe)"/>
-          <xsl:variable name="newpos" select="$pos + $l - 1"/>
+          <xsl:variable name="newpos" select="$pos + $l"/>
+          <xsl:variable name="new.pos">
+            <xsl:call-template name="adjust-range">
+              <xsl:with-param name="current" select="$newpos"/>
+              <xsl:with-param name="list" select="$protect.pos"/>
+              <xsl:with-param name="end" select="$k"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <xsl:variable name="new.end">
+            <xsl:call-template name="adjust-range">
+              <xsl:with-param name="current" select="$newpos"/>
+              <xsl:with-param name="list" select="$protect.end"/>
+              <xsl:with-param name="end" select="$k"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <xsl:variable name="start">
+            <xsl:call-template name="peek">
+              <xsl:with-param name="list" select="$new.pos"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <xsl:variable name="end">
+            <xsl:call-template name="peek">
+              <xsl:with-param name="list" select="$new.end"/>
+            </xsl:call-template>
+          </xsl:variable>
           <xsl:message terminate="no">
             <xsl:text>&#10;safe: </xsl:text>
             <xsl:value-of select="$safe"/>
@@ -45,19 +114,36 @@
             <xsl:value-of select="$start"/>
             <xsl:text> .. </xsl:text>
             <xsl:value-of select="$end"/>
+            <xsl:text>&#10;positions: </xsl:text>
+            <xsl:text>(</xsl:text>
+            <xsl:value-of select="$new.pos"/>
+            <xsl:text>) </xsl:text>
+            <xsl:text> (</xsl:text>
+            <xsl:value-of select="$new.end"/>
+            <xsl:text>)</xsl:text>
             <xsl:text>&#10;translated: </xsl:text>
             <xsl:value-of select="$trans"/>
             <xsl:text>&#10;</xsl:text>
           </xsl:message>
+          <!-- print the first part of the string which needs no escaping -->
           <xsl:value-of select="$safe"/>
-          <xsl:if test="($newpos &lt; $start) and ($newpos &gt; $end)">
+          <!-- escape -->
+          <xsl:if test="($newpos &lt; $start) or ($newpos &gt; $end)">
+            <xsl:message>
+              <xsl:text>===>Escaping</xsl:text>
+              <xsl:text>&#10;</xsl:text>
+            </xsl:message>
             <xsl:text>\</xsl:text>
           </xsl:if>
+          <!-- print the escaped character -->
           <xsl:value-of select="substring($text, $l + 1, 1)"/>
+          <!-- recurse until the string is complete -->
           <xsl:call-template name="escape-text-protect">
             <xsl:with-param name="text" select="substring($text, $l + 2)"/>
             <xsl:with-param name="escape" select="$escape"/>
-            <xsl:with-param name="pos" select="$newpos"/>
+            <xsl:with-param name="pos" select="$newpos + 1"/>
+            <xsl:with-param name="protect.pos" select="$new.pos"/>
+            <xsl:with-param name="protect.end" select="$new.end"/>
           </xsl:call-template>
         </xsl:when>
         <xsl:otherwise>
@@ -72,7 +158,7 @@
       <xsl:call-template name="escape-text-protect">
           <xsl:with-param name="text" select="string(.)"/>
           <xsl:with-param name="protect.pos" select="string(@protect.pos)"/>
-          <xsl:with-param name="protect.len" select="string(@protect.len)"/>
+          <xsl:with-param name="protect.end" select="string(@protect.end)"/>
       </xsl:call-template>
     </xsl:template>
 

From f1c57ce4bfc2d0250c3d21c48d635ebc2f34a473 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 19 Apr 2024 09:33:50 -0700
Subject: [PATCH 03/37] move helper templates; document; remove comments [skip
 ci]

---
 inst/stylesheets/tinkr-helpers.xsl | 136 +++++++++++++++++++++++++++++
 inst/stylesheets/xml2md_gfm.xsl    | 136 +----------------------------
 2 files changed, 137 insertions(+), 135 deletions(-)
 create mode 100644 inst/stylesheets/tinkr-helpers.xsl

diff --git a/inst/stylesheets/tinkr-helpers.xsl b/inst/stylesheets/tinkr-helpers.xsl
new file mode 100644
index 0000000..18fb060
--- /dev/null
+++ b/inst/stylesheets/tinkr-helpers.xsl
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet
+    version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:md="http://commonmark.org/xml/1.0">
+
+    <!-- trim the top of a space-separated list given current and end positions -->
+    <xsl:template name="adjust-range">
+      <xsl:param name="list" select="0"/>
+      <xsl:param name="current" select="0"/>
+      <xsl:param name="end" select="0"/>
+      <xsl:choose>
+        <!-- when the list can be trimmed and we are out of range, trim it -->
+        <xsl:when test="contains($list, ' ') and ($current &gt; $end)">
+          <xsl:call-template name="adjust-range">
+            <xsl:with-param name="current" select="$current"/>
+            <xsl:with-param name="list">
+              <xsl:call-template name="trim">
+                <xsl:with-param name="list" select="$list"/>
+              </xsl:call-template>
+            </xsl:with-param>
+            <xsl:with-param name="end" select="$current + $end"/>
+          </xsl:call-template>
+        </xsl:when>
+        <!-- either we are in range or the list cannot be trimmed, return the list -->
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+    <!-- get the top value of a space-separated list -->
+    <xsl:template name="peek">
+      <xsl:param name="list" select="0"/>
+      <xsl:choose>
+        <xsl:when test="contains($list, ' ')">
+          <xsl:value-of select="substring-before($list, ' ')"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+    <!-- remove the top value of a space-separated list -->
+    <xsl:template name="trim">
+      <xsl:param name="list" select="0"/>
+      <xsl:choose>
+        <xsl:when test="contains($list, ' ')">
+          <xsl:value-of select="substring-after($list, ' ')"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$list"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+
+    <!-- escape text except for protected text -->
+    <xsl:template name="escape-text-protect">
+      <xsl:param name="text"/>
+      <!-- a set of characters to escape (note that / is implied) -->
+      <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
+      <!-- the current position of the text string (defaults to 1) -->
+      <xsl:param name="pos" select="1"/>
+      <!-- a space-separated list of starting positions to protect (defaults to 0) -->
+      <xsl:param name="protect.pos" select="0"/>
+      <!-- a space-separated list of ending positions to protect (defaults to 0) -->
+      <xsl:param name="protect.end" select="0"/>
+
+      <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
+      <xsl:choose>
+        <xsl:when test="contains($trans, '\')">
+          <xsl:variable name="safe" select="substring-before($trans, '\')"/>
+          <xsl:variable name="l" select="string-length($safe)"/>
+          <xsl:variable name="newpos" select="$pos + $l"/>
+          <!-- Determine if we need to advance the protected range 
+            If the current end of the range (k) is less than the current
+            position, then it is outdated and we need to update, but only if
+            there continues to be range to update, which is why we pass it
+            into `adjust-range`.
+
+            NOTE: XPATH cannot "update" variables, it can only create new ones,
+            which is why we are using templates. 
+          --> 
+          <xsl:variable name="k" select="substring-before($protect.end, ' ')"/>
+          <xsl:variable name="new.pos">
+            <xsl:call-template name="adjust-range">
+              <xsl:with-param name="current" select="$newpos"/>
+              <xsl:with-param name="list" select="$protect.pos"/>
+              <xsl:with-param name="end" select="$k"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <xsl:variable name="new.end">
+            <xsl:call-template name="adjust-range">
+              <xsl:with-param name="current" select="$newpos"/>
+              <xsl:with-param name="list" select="$protect.end"/>
+              <xsl:with-param name="end" select="$k"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <!-- Get the current range of protection -->
+          <xsl:variable name="start">
+            <xsl:call-template name="peek">
+              <xsl:with-param name="list" select="$new.pos"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <xsl:variable name="end">
+            <xsl:call-template name="peek">
+              <xsl:with-param name="list" select="$new.end"/>
+            </xsl:call-template>
+          </xsl:variable>
+          <!-- print the first part of the string which needs no escaping -->
+          <xsl:value-of select="$safe"/>
+          <!-- escape if we are in range -->
+          <xsl:if test="($newpos &lt; $start) or ($newpos &gt; $end)">
+            <xsl:text>\</xsl:text>
+          </xsl:if>
+          <!-- print the escaped character -->
+          <xsl:value-of select="substring($text, $l + 1, 1)"/>
+          <!-- recurse until the string is complete -->
+          <xsl:call-template name="escape-text-protect">
+            <xsl:with-param name="text" select="substring($text, $l + 2)"/>
+            <xsl:with-param name="escape" select="$escape"/>
+            <!-- do not forget to advance the position -->
+            <xsl:with-param name="pos" select="$newpos + 1"/>
+            <xsl:with-param name="protect.pos" select="$new.pos"/>
+            <xsl:with-param name="protect.end" select="$new.end"/>
+          </xsl:call-template>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:value-of select="$text"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:template>
+
+</xsl:stylesheet>
diff --git a/inst/stylesheets/xml2md_gfm.xsl b/inst/stylesheets/xml2md_gfm.xsl
index 44376c9..8710940 100644
--- a/inst/stylesheets/xml2md_gfm.xsl
+++ b/inst/stylesheets/xml2md_gfm.xsl
@@ -8,6 +8,7 @@
     <!-- Import commonmark XSL -->
 
     <xsl:import href="xml2md.xsl"/>
+    <xsl:import href="tinkr-helpers.xsl"/>
 
     <xsl:template match="/">
         <xsl:apply-imports/>
@@ -17,141 +18,6 @@
 
     <xsl:output method="text" encoding="utf-8"/>
 
-    <xsl:template name="adjust-range">
-      <xsl:param name="current" select="0"/>
-      <xsl:param name="list" select="0"/>
-      <xsl:param name="end" select="0"/>
-      <xsl:choose>
-        <xsl:when test="contains($list, ' ') and ($current &gt; $end)">
-          <xsl:call-template name="adjust-range">
-            <xsl:with-param name="current" select="$current"/>
-            <xsl:with-param name="list">
-              <xsl:call-template name="trim">
-                <xsl:with-param name="list" select="$list"/>
-              </xsl:call-template>
-            </xsl:with-param>
-            <xsl:with-param name="end" select="$current + $end"/>
-          </xsl:call-template>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-    <xsl:template name="peek">
-      <xsl:param name="list" select="0"/>
-      <xsl:choose>
-        <xsl:when test="contains($list, ' ')">
-          <xsl:value-of select="substring-before($list, ' ')"/>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-    <xsl:template name="trim">
-      <xsl:param name="list" select="0"/>
-      <xsl:choose>
-        <xsl:when test="contains($list, ' ')">
-          <xsl:value-of select="substring-after($list, ' ')"/>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-
-    <xsl:template name="escape-text-protect">
-      <xsl:param name="text"/>
-      <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
-      <xsl:param name="pos" select="1"/>
-      <xsl:param name="protect.pos" select="0"/>
-      <xsl:param name="protect.end" select="0"/>
-
-      <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
-      <xsl:choose>
-        <xsl:when test="contains($trans, '\')">
-          <xsl:variable name="i" select="substring-before($protect.pos, ' ')"/>
-          <xsl:variable name="k" select="substring-before($protect.end, ' ')"/>
-          <xsl:variable name="safe" select="substring-before($trans, '\')"/>
-          <xsl:variable name="l" select="string-length($safe)"/>
-          <xsl:variable name="newpos" select="$pos + $l"/>
-          <xsl:variable name="new.pos">
-            <xsl:call-template name="adjust-range">
-              <xsl:with-param name="current" select="$newpos"/>
-              <xsl:with-param name="list" select="$protect.pos"/>
-              <xsl:with-param name="end" select="$k"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:variable name="new.end">
-            <xsl:call-template name="adjust-range">
-              <xsl:with-param name="current" select="$newpos"/>
-              <xsl:with-param name="list" select="$protect.end"/>
-              <xsl:with-param name="end" select="$k"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:variable name="start">
-            <xsl:call-template name="peek">
-              <xsl:with-param name="list" select="$new.pos"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:variable name="end">
-            <xsl:call-template name="peek">
-              <xsl:with-param name="list" select="$new.end"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:message terminate="no">
-            <xsl:text>&#10;safe: </xsl:text>
-            <xsl:value-of select="$safe"/>
-            <xsl:text>&#10;length: </xsl:text>
-            <xsl:value-of select="$l"/>
-            <xsl:text>&#9;position: </xsl:text>
-            <xsl:value-of select="$newpos"/>
-            <xsl:text>&#9;range: </xsl:text>
-            <xsl:value-of select="$start"/>
-            <xsl:text> .. </xsl:text>
-            <xsl:value-of select="$end"/>
-            <xsl:text>&#10;positions: </xsl:text>
-            <xsl:text>(</xsl:text>
-            <xsl:value-of select="$new.pos"/>
-            <xsl:text>) </xsl:text>
-            <xsl:text> (</xsl:text>
-            <xsl:value-of select="$new.end"/>
-            <xsl:text>)</xsl:text>
-            <xsl:text>&#10;translated: </xsl:text>
-            <xsl:value-of select="$trans"/>
-            <xsl:text>&#10;</xsl:text>
-          </xsl:message>
-          <!-- print the first part of the string which needs no escaping -->
-          <xsl:value-of select="$safe"/>
-          <!-- escape -->
-          <xsl:if test="($newpos &lt; $start) or ($newpos &gt; $end)">
-            <xsl:message>
-              <xsl:text>===>Escaping</xsl:text>
-              <xsl:text>&#10;</xsl:text>
-            </xsl:message>
-            <xsl:text>\</xsl:text>
-          </xsl:if>
-          <!-- print the escaped character -->
-          <xsl:value-of select="substring($text, $l + 1, 1)"/>
-          <!-- recurse until the string is complete -->
-          <xsl:call-template name="escape-text-protect">
-            <xsl:with-param name="text" select="substring($text, $l + 2)"/>
-            <xsl:with-param name="escape" select="$escape"/>
-            <xsl:with-param name="pos" select="$newpos + 1"/>
-            <xsl:with-param name="protect.pos" select="$new.pos"/>
-            <xsl:with-param name="protect.end" select="$new.end"/>
-          </xsl:call-template>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$text"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
       <!-- Text that needs to be preserved (e.g. math/checkboxes) -->
 
     <xsl:template match="md:text[@protect.pos]">

From 30414604f81bd0d565441057c249f548d4ed2d6f Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 25 Apr 2024 12:59:58 -0700
Subject: [PATCH 04/37] [xml] use embedded str:tokenize function via EXSLT

I had initially found a tokenize template and had contacted the author
about license information (she gave permission):
<https://exslt.github.io/str/functions/tokenize/str.tokenize.template.xsl.html>

When I was working with it, I found that the function exists as part of
libxml because it bundles EXSLT functions, which allows me to do this
easier and more efficient by tracking and modifying a single index
instead of a pair of strings.
---
 inst/stylesheets/tinkr-helpers.xsl | 111 ++++++-----------------------
 inst/stylesheets/xml2md_gfm.xsl    |  12 ++--
 2 files changed, 30 insertions(+), 93 deletions(-)

diff --git a/inst/stylesheets/tinkr-helpers.xsl b/inst/stylesheets/tinkr-helpers.xsl
index 18fb060..c9652d8 100644
--- a/inst/stylesheets/tinkr-helpers.xsl
+++ b/inst/stylesheets/tinkr-helpers.xsl
@@ -2,60 +2,9 @@
 <xsl:stylesheet
     version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:str="http://exslt.org/strings"
     xmlns:md="http://commonmark.org/xml/1.0">
 
-    <!-- trim the top of a space-separated list given current and end positions -->
-    <xsl:template name="adjust-range">
-      <xsl:param name="list" select="0"/>
-      <xsl:param name="current" select="0"/>
-      <xsl:param name="end" select="0"/>
-      <xsl:choose>
-        <!-- when the list can be trimmed and we are out of range, trim it -->
-        <xsl:when test="contains($list, ' ') and ($current &gt; $end)">
-          <xsl:call-template name="adjust-range">
-            <xsl:with-param name="current" select="$current"/>
-            <xsl:with-param name="list">
-              <xsl:call-template name="trim">
-                <xsl:with-param name="list" select="$list"/>
-              </xsl:call-template>
-            </xsl:with-param>
-            <xsl:with-param name="end" select="$current + $end"/>
-          </xsl:call-template>
-        </xsl:when>
-        <!-- either we are in range or the list cannot be trimmed, return the list -->
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-    <!-- get the top value of a space-separated list -->
-    <xsl:template name="peek">
-      <xsl:param name="list" select="0"/>
-      <xsl:choose>
-        <xsl:when test="contains($list, ' ')">
-          <xsl:value-of select="substring-before($list, ' ')"/>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-    <!-- remove the top value of a space-separated list -->
-    <xsl:template name="trim">
-      <xsl:param name="list" select="0"/>
-      <xsl:choose>
-        <xsl:when test="contains($list, ' ')">
-          <xsl:value-of select="substring-after($list, ' ')"/>
-        </xsl:when>
-        <xsl:otherwise>
-          <xsl:value-of select="$list"/>
-        </xsl:otherwise>
-      </xsl:choose>
-    </xsl:template>
-
-
     <!-- escape text except for protected text -->
     <xsl:template name="escape-text-protect">
       <xsl:param name="text"/>
@@ -63,53 +12,36 @@
       <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
       <!-- the current position of the text string (defaults to 1) -->
       <xsl:param name="pos" select="1"/>
+      <xsl:param name="index" select="1"/>
       <!-- a space-separated list of starting positions to protect (defaults to 0) -->
       <xsl:param name="protect.pos" select="0"/>
       <!-- a space-separated list of ending positions to protect (defaults to 0) -->
       <xsl:param name="protect.end" select="0"/>
 
+
       <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
       <xsl:choose>
         <xsl:when test="contains($trans, '\')">
           <xsl:variable name="safe" select="substring-before($trans, '\')"/>
           <xsl:variable name="l" select="string-length($safe)"/>
           <xsl:variable name="newpos" select="$pos + $l"/>
-          <!-- Determine if we need to advance the protected range 
-            If the current end of the range (k) is less than the current
-            position, then it is outdated and we need to update, but only if
-            there continues to be range to update, which is why we pass it
-            into `adjust-range`.
-
-            NOTE: XPATH cannot "update" variables, it can only create new ones,
-            which is why we are using templates. 
-          --> 
-          <xsl:variable name="k" select="substring-before($protect.end, ' ')"/>
-          <xsl:variable name="new.pos">
-            <xsl:call-template name="adjust-range">
-              <xsl:with-param name="current" select="$newpos"/>
-              <xsl:with-param name="list" select="$protect.pos"/>
-              <xsl:with-param name="end" select="$k"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:variable name="new.end">
-            <xsl:call-template name="adjust-range">
-              <xsl:with-param name="current" select="$newpos"/>
-              <xsl:with-param name="list" select="$protect.end"/>
-              <xsl:with-param name="end" select="$k"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <!-- Get the current range of protection -->
-          <xsl:variable name="start">
-            <xsl:call-template name="peek">
-              <xsl:with-param name="list" select="$new.pos"/>
-            </xsl:call-template>
-          </xsl:variable>
-          <xsl:variable name="end">
-            <xsl:call-template name="peek">
-              <xsl:with-param name="list" select="$new.end"/>
-            </xsl:call-template>
+          <!-- Update the index variable for the protection -->
+          <xsl:variable name="idx">
+            <xsl:choose>
+              <xsl:when test="count($protect.end) = $index">
+                <xsl:value-of select="$index + 0"/>
+              </xsl:when>
+              <xsl:when test="$newpos &gt; $protect.end[$index + 0]">
+                <xsl:value-of select="$index + 1"/>
+              </xsl:when>
+              <xsl:otherwise>
+                <xsl:value-of select="$index + 0"/>
+              </xsl:otherwise>
+            </xsl:choose>
           </xsl:variable>
-          <!-- print the first part of the string which needs no escaping -->
+          <!-- create a start and end variable for testing -->
+          <xsl:variable name="start" select="$protect.pos[$idx + 0]"/>
+          <xsl:variable name="end" select="$protect.end[$idx + 0]"/>
           <xsl:value-of select="$safe"/>
           <!-- escape if we are in range -->
           <xsl:if test="($newpos &lt; $start) or ($newpos &gt; $end)">
@@ -123,8 +55,9 @@
             <xsl:with-param name="escape" select="$escape"/>
             <!-- do not forget to advance the position -->
             <xsl:with-param name="pos" select="$newpos + 1"/>
-            <xsl:with-param name="protect.pos" select="$new.pos"/>
-            <xsl:with-param name="protect.end" select="$new.end"/>
+            <xsl:with-param name="index" select="$idx"/>
+            <xsl:with-param name="protect.pos" select="$protect.pos"/>
+            <xsl:with-param name="protect.end" select="$protect.end"/>
           </xsl:call-template>
         </xsl:when>
         <xsl:otherwise>
diff --git a/inst/stylesheets/xml2md_gfm.xsl b/inst/stylesheets/xml2md_gfm.xsl
index 8710940..2f1aeb9 100644
--- a/inst/stylesheets/xml2md_gfm.xsl
+++ b/inst/stylesheets/xml2md_gfm.xsl
@@ -2,6 +2,7 @@
 <xsl:stylesheet
     version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:str="http://exslt.org/strings"
     xmlns:md="http://commonmark.org/xml/1.0">
 
 
@@ -18,13 +19,16 @@
 
     <xsl:output method="text" encoding="utf-8"/>
 
-      <!-- Text that needs to be preserved (e.g. math/checkboxes) -->
-
+    <!-- Text that needs to be preserved (e.g. math/checkboxes) -->
+    <!-- We use tokenization -->
+    <!-- https://developer.mozilla.org/en-US/docs/Web/EXSLT/str/tokenize -->
     <xsl:template match="md:text[@protect.pos]">
       <xsl:call-template name="escape-text-protect">
           <xsl:with-param name="text" select="string(.)"/>
-          <xsl:with-param name="protect.pos" select="string(@protect.pos)"/>
-          <xsl:with-param name="protect.end" select="string(@protect.end)"/>
+          <xsl:with-param name="protect.pos" 
+            select="str:tokenize(@protect.pos, ' ')"/>
+          <xsl:with-param name="protect.end" 
+            select="str:tokenize(@protect.end, ' ')"/>
       </xsl:call-template>
     </xsl:template>
 

From 31e5a0fde3b1f6d97f88f3aaf720b4f588f6dafc Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 25 Apr 2024 13:15:36 -0700
Subject: [PATCH 05/37] update comments

---
 inst/stylesheets/tinkr-helpers.xsl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/inst/stylesheets/tinkr-helpers.xsl b/inst/stylesheets/tinkr-helpers.xsl
index c9652d8..b7ddf59 100644
--- a/inst/stylesheets/tinkr-helpers.xsl
+++ b/inst/stylesheets/tinkr-helpers.xsl
@@ -12,10 +12,11 @@
       <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
       <!-- the current position of the text string (defaults to 1) -->
       <xsl:param name="pos" select="1"/>
+      <!-- the index of the current protection -->
       <xsl:param name="index" select="1"/>
-      <!-- a space-separated list of starting positions to protect (defaults to 0) -->
+      <!-- nodeset of tokens generated from tokenizing a list of numbers -->
       <xsl:param name="protect.pos" select="0"/>
-      <!-- a space-separated list of ending positions to protect (defaults to 0) -->
+      <!-- nodeset of tokens generated from tokenizing a list of numbers -->
       <xsl:param name="protect.end" select="0"/>
 
 
@@ -26,11 +27,15 @@
           <xsl:variable name="l" select="string-length($safe)"/>
           <xsl:variable name="newpos" select="$pos + $l"/>
           <!-- Update the index variable for the protection -->
+          <!-- NOTE: it's strange, but the value of the index must be converted to -->
+          <!-- a number with a math operation -->
           <xsl:variable name="idx">
             <xsl:choose>
+              <!-- when the index is at the end of the list, we increment no more -->
               <xsl:when test="count($protect.end) = $index">
                 <xsl:value-of select="$index + 0"/>
               </xsl:when>
+              <!-- if our position extends beyond the boundary of the last protection, we increment -->
               <xsl:when test="$newpos &gt; $protect.end[$index + 0]">
                 <xsl:value-of select="$index + 1"/>
               </xsl:when>
@@ -42,6 +47,7 @@
           <!-- create a start and end variable for testing -->
           <xsl:variable name="start" select="$protect.pos[$idx + 0]"/>
           <xsl:variable name="end" select="$protect.end[$idx + 0]"/>
+
           <xsl:value-of select="$safe"/>
           <!-- escape if we are in range -->
           <xsl:if test="($newpos &lt; $start) or ($newpos &gt; $end)">
@@ -49,6 +55,7 @@
           </xsl:if>
           <!-- print the escaped character -->
           <xsl:value-of select="substring($text, $l + 1, 1)"/>
+
           <!-- recurse until the string is complete -->
           <xsl:call-template name="escape-text-protect">
             <xsl:with-param name="text" select="substring($text, $l + 2)"/>

From 90561ef7725d587ab020bd05fd67e31f7971ac77 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 26 Apr 2024 11:53:42 -0700
Subject: [PATCH 06/37] add range updator

---
 R/asis-nodes.R                        | 13 -----
 R/node-protection.R                   | 71 +++++++++++++++++++++++++++
 man/protect_unescaped.Rd              |  2 +-
 tests/testthat/test-node-protection.R | 69 ++++++++++++++++++++++++++
 4 files changed, 141 insertions(+), 14 deletions(-)
 create mode 100644 R/node-protection.R
 create mode 100644 tests/testthat/test-node-protection.R

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index fc1138c..5ba5c70 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -206,19 +206,6 @@ fix_fully_inline <- function(math) {
   make_text_nodes(char)
 }
 
-label_fully_inline <- function(math) {
-  char <- xml2::xml_text(math)
-  locations <- gregexpr(pattern = inline_dollars_regex("full"), 
-    char, 
-    perl = TRUE
-  )
-  pos <- locations[[1]]
-  len <- attr(locations[[1]], "match.len")
-  xml2::xml_set_attr(math, "protect.pos", paste(pos, collapse = " "))
-  xml2::xml_set_attr(math, "protect.end", paste(pos + len, collapse = " "))
-  
-}
-
 #' Transform a character vector of XML into text nodes
 #'
 #' This is useful in the case where we want to modify some text content to
diff --git a/R/node-protection.R b/R/node-protection.R
new file mode 100644
index 0000000..d52715a
--- /dev/null
+++ b/R/node-protection.R
@@ -0,0 +1,71 @@
+label_fully_inline <- function(math) {
+  char <- xml2::xml_text(math)
+  locations <- gregexpr(pattern = inline_dollars_regex("full"), 
+    char, 
+    perl = TRUE
+  )
+  add_protection(math, locations)
+}
+
+add_protection <- function(node, locations) {
+  start <- locations[[1]]
+  end <- start + attr(locations[[1]], "match.len")
+  if (xml2::xml_has_attr(node, "protect.pos")) {
+    # extract the ranges from the attributes
+    ostart <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
+    oend <- strsplit(xml2::xml_attr(node, "protect.end"), " ")[[1]]
+    # update the ranges and the variables
+    new_ranges <- update_ranges(
+      start = c(as.integer(ostart), start),
+      end = c(as.integer(oend), end),
+    )
+    start <- new_ranges$start
+    end <- new_ranges$end
+  }
+  xml2::xml_set_attr(node, "protect.pos", paste(start, collapse = " "))
+  xml2::xml_set_attr(node, "protect.end", paste(end, collapse = " "))
+}
+
+inrange <- function(s1, e1, s2, e2) {
+  s1 <= s2 & e1 >= e2
+}
+
+overlap <- function(s1, e1, s2, e2) {
+  s1 <= e2 & s2 <= e1
+}
+
+
+# https://www.geeksforgeeks.org/merging-intervals/
+update_ranges <- function(start, end) {
+  # Sort the intervals based on the increasing order of starting time.
+  ord <- order(start)
+  start <- start[ord]
+  end <- end[ord]
+  n <- length(start)
+  nstart <- integer(n)
+  nend <- integer(n)
+  # Push the first interval into a stack.
+  nstart[1] <- start[1]
+  nend[1] <- end[1]
+  i <- 2
+  j <- 1
+  while (i <= n) {
+    # For each interval do the following:
+    if (overlap(nstart[j], nend[j], start[i], end[i])) {
+      # If the current interval overlap with the top of the stack then,
+      # update the stack top with the ending time of the current interval.
+      nend[j] <- max(nend[j], end[i])
+    } else {
+      # If the current interval does not overlap with the top of the stack
+      # then, push the current interval into the stack.
+      j <- j + 1
+      nstart[j] <- start[i]
+      nend[j] <- end[i]
+    }
+      i <- i + 1
+    # The end stack contains the merged intervals. 
+  }
+  keep <- seq(j)
+  return(list(start = nstart[keep], end = nend[keep]))
+
+}
diff --git a/man/protect_unescaped.Rd b/man/protect_unescaped.Rd
index 6d31652..44a9de4 100644
--- a/man/protect_unescaped.Rd
+++ b/man/protect_unescaped.Rd
@@ -79,7 +79,7 @@ md <- yarn$new(f, sourcepos = TRUE, unescaped = FALSE)
 md$show()
 if (requireNamespace("withr")) {
 lines <- readLines(f)[-length(md$yaml)]
-lnks <- withr::with_namespace("tinkr", 
+lnks <- withr::with_namespace("tinkr",
   protect_unescaped(body = md$body, txt = lines))
 md$body <- lnks
 md$show()
diff --git a/tests/testthat/test-node-protection.R b/tests/testthat/test-node-protection.R
new file mode 100644
index 0000000..93c8650
--- /dev/null
+++ b/tests/testthat/test-node-protection.R
@@ -0,0 +1,69 @@
+
+test_that("overlap returns false for separate overlaps", {
+  expect_false(overlap(1, 3, 5, 8))
+  expect_false(overlap(5, 8, 1, 3))
+  expect_false(overlap(1, 1, 5, 5))
+})
+test_that("overlap returns true for separate overlaps", {
+  expect_true(overlap(1, 8, 5, 13))
+  expect_true(overlap(5, 13, 1, 8))
+  expect_true(overlap(5, 9, 1, 10))
+  expect_true(overlap(1, 10, 5, 9))
+})
+
+
+test_that("update_ranges will add non-overlapping ranges", {
+  a = list(
+    start = c(1, 5, 10),
+    end = c(3, 8, 50)
+  )
+  b = list(
+    start = c(100, 500, 1000),
+    end = c(300, 800, 5000)
+  )
+  expect_equal(update_ranges(c(b$start, a$start), c(b$end, a$end)),
+    list(start = c(a$start, b$start), end = c(a$end, b$end))
+  )
+})
+
+test_that("update_ranges will merge overlapping ranges", {
+  # in this scenario, we get the ranges from (a) [5, 8], [10, 50], 
+  # joined by the first range from (b) [5, 35] 
+  # which ends up as [5, 50]
+  a = list(
+    start = c(1, 5, 10),
+    end   = c(3, 8, 50)
+  )
+  b = list(
+    start = c(5, 100, 500, 1000),
+    end   = c(35, 300, 800, 5000)
+  )
+  expect_equal(update_ranges(c(b$start, a$start), c(b$end, a$end)),
+    list(start = c(a$start[-3],  b$start[-1]), end = c(a$end[-2], b$end[-1]))
+  )
+})
+
+
+test_that("update_ranges will merge all overlapping ranges", {
+  a = list(
+    start = c(1, 5, 10),
+    end = c(3, 8, 50)
+  )
+  b = list(
+    start = c(100, 500, 1000),
+    end = c(300, 800, 5000)
+  )
+  c = list(
+    start = 1,
+    end = 2500
+  )
+  expect_equal(
+    update_ranges(
+      c(b$start, a$start, c$start), 
+      c(b$end, a$end, c$end)
+    ),
+    list(start = 1, end = 5000)
+  )
+
+})
+

From a7b33c8b812c955046ddea64aed709b383767bc8 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 10:01:43 -0700
Subject: [PATCH 07/37] update math protection to use labels.

---
 R/asis-nodes.R      |  85 ++++++++++++++++++++--------------
 R/node-protection.R | 108 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 134 insertions(+), 59 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 5ba5c70..1bfaa86 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -104,7 +104,6 @@ find_broken_math <- function(math) {
 #' cat(tinkr::to_md(list(body = protxt, yaml = "")), sep = "\n")
 #' }
 protect_inline_math <- function(body, ns) {
-  body  <- copy_xml(body)
   math  <- find_inline_math(body, ns)
   if (length(math) == 0) {
     return(body)
@@ -120,17 +119,12 @@ protect_inline_math <- function(body, ns) {
   bmath   <- math[!bespoke]
 
   # protect math that is strictly inline
-  if (length(imath)) {
-    purrr::map(imath, label_fully_inline)
-    # new_nodes <- purrr::map(imath, fix_fully_inline)
-    # since we split up the nodes, we have to do this node by node
-    # for (i in seq(new_nodes)) {
-    #   add_node_siblings(imath[[i]], new_nodes[[i]], remove = TRUE)
-    # }
+  if (length(imath) > 0L) {
+    purrr::walk(imath, label_fully_inline)
   }
 
   # protect math that is broken across lines or markdown elements
-  if (length(bmath)) {
+  if (length(bmath) > 0L) {
     if (any(broke$ambiguous)) {
       # ambiguous math may be due to inline r code that produces an answer:
       # $R^2 = `r runif(1)`$
@@ -156,7 +150,7 @@ protect_inline_math <- function(body, ns) {
       fix_partial_inline(i, body, ns)
     }
   }
-  copy_xml(body)
+  body
 }
 
 # Partial inline math are math elements that are not entirely embedded in a
@@ -172,38 +166,61 @@ fix_partial_inline <- function(tag, body, ns) {
   # find everything between the tagged pair
   math_lines <- find_between_inlines(body, ns, tag)
   # make sure everything between the tagged pair is labeled as 'asis'
+  # this is explicitly for symbols like `_`, which denote subscripts in LaTeX,
+  # but make _emph_ text in markdown.
   filling <- math_lines[is.na(xml2::xml_attr(math_lines, "latex-pair"))]
   set_asis(filling)
   filling <- xml2::xml_find_all(filling, ".//node()")
   set_asis(filling)
-  # paste the lines together and create new nodes
-  n <- length(math_lines)
-  char <- as.character(math_lines)
-  char[[1]] <- sub("[$]", "$</text><text asis='true'>", char[[1]])
-  char[[n]] <- sub("[<]text ", "<text asis='true' ", char[[n]])
-  nodes <- paste(char, collapse = "")
-  nodes <- make_text_nodes(nodes)
-  # add the new nodes to the bottom of the existing math lines
-  last_line <- math_lines[n]
-  to_remove <- math_lines[-n]
-  add_node_siblings(last_line, nodes, remove = TRUE)
-  # remove the duplicate lines
-  xml2::xml_remove(to_remove)
+  purrr::walk(math_lines, label_partial_inline)
 }
 
-fix_fully_inline <- function(math) {
-  char <- as.character(math)
-  # Find inline math that is complete and wrap it in text with asis
-  # <text>this is $\LaTeX$ text</text>
-  #   becomes
-  # <text>this is </text><text asis='true'>$\LaTeX$</text><text> text</text>
-  char <- gsub(
-    pattern = inline_dollars_regex("full"),
-    replacement = "</text><text asis='true'>\\1</text><text>",
-    x = char,
+label_partial_inline <- function(math) {
+  char <- xml2::xml_text(math)
+  # find lines that begin with `$` but do not have an end.
+  start <- gregexpr(inline_dollars_regex("start"),
+    char,
+    perl = TRUE
+  )
+  # find lines that end with `$` but do not have a beginning.
+  stop <- gregexpr(inline_dollars_regex("stop"),
+    char,
+    perl = TRUE
+  )
+  has_start <- start[[1]][1] > 0
+  has_end <- stop[[1]][1] > 0
+  if (has_start) {
+    # if the line contains the beginning of an inline math fragment,
+    # we start at the match and end at the end of the string
+    begin <- start[[1]]
+    end <- nchar(char)
+  } else if (has_end) {
+    # if the line contains the end of an inline math fragment,
+    # we start at the beginning of the string and end at the end of the match
+    begin <- 1
+    end <- stop[[1]] + attr(stop[[1]], "match.len")
+  } else {
+    # otherwise, the entire range should be protected.
+    begin <- 1
+    end <- nchar(char)
+  }
+  add_protected_ranges(math, begin, end)
+}
+
+label_fully_inline <- function(math) {
+  char <- xml2::xml_text(math)
+  # Find the locations of inline math that is complete
+  locations <- gregexpr(pattern = inline_dollars_regex("full"),
+    char,
     perl = TRUE
   )
-  make_text_nodes(char)
+  # add the ranges to the attributes
+  # <text>this is $\LaTeX$ text</text>
+  #   becomes
+  # <text protect.start='9' protect.end='16'>this is $\LaTeX$ text</text>
+  start <- locations[[1]]
+  end <- start + attr(locations[[1]], "match.len")
+  add_protected_ranges(math, start, end)
 }
 
 #' Transform a character vector of XML into text nodes
diff --git a/R/node-protection.R b/R/node-protection.R
index d52715a..654b162 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -1,41 +1,99 @@
-label_fully_inline <- function(math) {
-  char <- xml2::xml_text(math)
-  locations <- gregexpr(pattern = inline_dollars_regex("full"), 
-    char, 
-    perl = TRUE
-  )
-  add_protection(math, locations)
-}
-
-add_protection <- function(node, locations) {
-  start <- locations[[1]]
-  end <- start + attr(locations[[1]], "match.len")
-  if (xml2::xml_has_attr(node, "protect.pos")) {
+#' Handle protected ranges for a node
+#'
+#' @param node an XML `<text>` node.
+#' @param start `\[integer\]` a vector of starting indices of a set of ranges
+#' @param end `\[integer\]` a vector of ending indices that are paired with
+#'   `start`
+#' @return 
+#'   - `add_protected_ranges()`: the modified node
+#'   - `remove_protected_ranges()`: the modified node
+#'   - `is_protected()`: `TRUE` if the node has protection attributes
+#'   - `get_protected_ranges()` a list containing integer vectors `start` and
+#'     `end` if the node is protected, otherwise, it returns NULL
+#' @rdname protected_ranges
+add_protected_ranges <- function(node, start, end) {
+  if (any(start < 1)) {
+    # return early if there are no ranges to protect
+    return(node)
+  }
+  if (is_protected(node)) {
     # extract the ranges from the attributes
-    ostart <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
-    oend <- strsplit(xml2::xml_attr(node, "protect.end"), " ")[[1]]
+    orig <- get_protected_ranges(node)
     # update the ranges and the variables
-    new_ranges <- update_ranges(
-      start = c(as.integer(ostart), start),
-      end = c(as.integer(oend), end),
-    )
-    start <- new_ranges$start
-    end <- new_ranges$end
+    new <- update_ranges(start = c(start, orig$start), end = c(end, orig$end))
+    start <- new$start
+    end <- new$end
   }
   xml2::xml_set_attr(node, "protect.pos", paste(start, collapse = " "))
   xml2::xml_set_attr(node, "protect.end", paste(end, collapse = " "))
+  return(node)
 }
 
-inrange <- function(s1, e1, s2, e2) {
-  s1 <= s2 & e1 >= e2
+#' @rdname protected_ranges
+is_protected <- function(node) {
+  xml2::xml_has_attr(node, "protect.pos") && 
+    xml2::xml_has_attr(node, "protect.end")
 }
 
+#' @rdname protected_ranges
+get_protected_ranges <- function(node) {
+  if (is_protected(node)) {
+    start <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
+    end <- strsplit(xml2::xml_attr(node, "protect.end"), " ")[[1]]
+  } else {
+    return(NULL)
+  }
+  return(list(start = start, end = end))
+}
+
+#' @rdname protected_ranges
+remove_protected_ranges <- function(node) {
+  xml2::xml_set_attr(node, "protect.pos", NULL)
+  xml2::xml_set_attr(node, "protect.end", NULL)
+  return(node)
+}
+
+#' Detect if two ranges are overlapping
+#' 
+#' @param s1 \[integer\] starting index of first range
+#' @param e1 \[integer\] ending index of first range
+#' @param s2 \[integer\] starting index of second range
+#' @param e2 \[integer\] ending index of second range
+#' @return `TRUE` if the ranges overlap and `FALSE` if they do not
+#'
+#' @noRd
+#' @examples
+#' overlap(1, 10, 5, 15) # TRUE
+#' overlap(1, 4, 5, 15) # FALSE
 overlap <- function(s1, e1, s2, e2) {
   s1 <= e2 & s2 <= e1
 }
 
-
-# https://www.geeksforgeeks.org/merging-intervals/
+#' Update a set of ranges
+#' 
+#' @param start `\[integer\]` a vector of starting indices of a set of ranges
+#' @param end `\[integer\]` a vector of ending indices that are paired with
+#'   `start`
+#' @return a list of two integer vectors each with a length of at least one and
+#'   at most the same length as the input. 
+#'   - `start`
+#'   - `end`
+#'
+#' @details
+#' This function merges a set of ranges based on the algorithm presented in
+#' <https://www.geeksforgeeks.org/merging-intervals/>. If none of the intervals
+#' overlap, then the original `start` and `end` variables are returned sorted by
+#' the starting order.
+#'
+#' If there are overlaps, they will be condensed, removing up to n - 1 intervals
+#'
+#' @noRd
+#' @examples
+#' # in this example, the ranges of [10, 20] overlaps with [5, 15]
+#' ranges <- data.frame(start = c(1, 10, 100), end = c(2, 20, 200))
+#' new <- data.frame(start = c(5, 50, 500), end = c(15, 150, 1500))
+#' # thus they become [5, 20]
+#' update_ranges(c(ranges$start, new$start), c(ranges$end, new$end))
 update_ranges <- function(start, end) {
   # Sort the intervals based on the increasing order of starting time.
   ord <- order(start)

From aeb3f58b92ed01510e3a7f0cb831c63ad84ab66d Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 11:04:06 -0700
Subject: [PATCH 08/37] make sure node protection exists with an empty set

---
 R/node-protection.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/node-protection.R b/R/node-protection.R
index 654b162..b7ce509 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -12,7 +12,7 @@
 #'     `end` if the node is protected, otherwise, it returns NULL
 #' @rdname protected_ranges
 add_protected_ranges <- function(node, start, end) {
-  if (any(start < 1)) {
+  if (length(start) == 0 || any(start < 1)) {
     # return early if there are no ranges to protect
     return(node)
   }

From 8aa181b5e9b7f97423ab4df00e3b36c74bd0ab43 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 11:31:06 -0700
Subject: [PATCH 09/37] use attributes to protect square bracket nodes

---
 R/asis-nodes.R | 66 +++++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 1bfaa86..42581b6 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -391,11 +391,10 @@ protect_unescaped <- function(body, txt, ns = md_ns()) {
     warning(msg, call. = FALSE)
     return(body)
   }
-  body <- copy_xml(body)
   XPATH <- ".//md:text[not(@asis)][contains(text(), '[') or contains(text(), ']')]"
   snodes <- xml2::xml_find_all(body, XPATH, ns = ns)
   fix_unescaped_squares(snodes, txt)
-  copy_xml(body)
+  return(body)
 }
 
 #' Find the escaped square braces in text vector
@@ -425,9 +424,8 @@ find_escaped_squares <- function(txt) {
 #' not represented as markup, we use their `sourcepos` attributes to determine
 #' the lines and columns of the `txt` where _escaped_ square braces are.
 #'
-#' Knowing this, we can process each node by its line number and wrap all
-#' unescpaed square braces in text nodes with the `@asis` attribute, which is
-#' performed with the [fix_unescaped()] function.
+#' Knowing this, we can add protection attributes to the positions that should
+#' not be escaped.
 #'
 #' @return nothing, invisibly. This function is called for its side-effect.
 #' @noRd
@@ -456,7 +454,7 @@ fix_unescaped_squares <- function(nodes, txt) {
       }
     }
   }
-  invisible()
+  return(invisible())
 }
 
 
@@ -474,17 +472,17 @@ fix_unescaped_squares <- function(nodes, txt) {
 #' will produce a text node like this:
 #'
 #' ```html
-#' <text sourcepos='1:1-1:43'>this is [unescaped] and this is [escaped]</text>
+#' <text sourcepos='1:1-1:43'>
+#' this is [unescaped] and this is [escaped]
+#' </text>
 #' ```
 #'
 #' This function will replace the text node with this:
 #'
 #' ```html
-#' <text sourcepos='1:1-1:43'>this is </text>
-#' <text asis='true'>[</text>
-#' <text>unescaped</text>
-#' <text asis='true'>]</text>
-#' <text> and this is [escaped]</text>
+#' <text sourcepos='1:1-1:43' protect.pos='9 19' protect.end='9 19'>
+#' this is [unescaped] and this is [escaped]
+#' </text>
 #' ```
 #'
 #' This will ensure that the unescaped markdown remains unescaped.
@@ -497,35 +495,37 @@ fix_unescaped_squares <- function(nodes, txt) {
 #'   list items will have an offset of 4L because they are preceeded by ` - `.
 #'   Defaults to `1L`, indicating that this text node starts as a paragraph
 #'   whose parent is the root of the document.
-#' @return new XML nodes, invisibly
+#' @return modified XML nodes, invisibly
 #' @noRd
 fix_unescaped <- function(node, escaped = integer(0), offset = 1L) {
 
   txt <- as.character(node)
-  if (length(escaped) == 0) {
-    # If we have no escaped characters, then we can do a broad substitution
-    unescaped <- TRUE
-  } else {
-    # Converted to text, the node becomes <text ...>Actual text</text> Because
-    # the position is based on the actual text, we need to find the start of
-    # the actual text in the node text
-    text_start <- gregexpr("[>]", txt)[[1]][[1]] + 1L
+  if (length(escaped) > 0) {
     # Because the escaped characters were stripped off, we have to account for
     # a rolling count of the number of escapes
     missing_chars <- seq_along(escaped) - 1L
-    # If the source starts with markup, we have to take into account the offset
-    # position. This will set the escaped to start at the end of the XML markup
-    unescaped <- -(escaped + text_start - offset - missing_chars)
+    # If the source starts with markup (e.g. a list item), we have to take into
+    # account the offset position. This will set the escaped to start at the
+    # end of the XML markup
+    escaped <- escaped - missing_chars - offset + 1L
   }
-  # Here we split the character and exclude the escaped braces, protecting
-  # the unescaped braces.
-  chars <- strsplit(txt, "")[[1]]
-  chars[unescaped] <- sub(
+  return(label_unescaped(node, except = escaped))
+}
+
+
+label_unescaped <- function(node, except = integer(0)) {
+  char <- xml2::xml_text(node)
+  # Find the locations of inline chars that is complete
+  locations <- gregexpr(
     pattern = "(\\[|\\])",
-    replacement = "</text><text asis='true'>\\1</text><text>",
-    x = chars[unescaped]
+    char,
+    perl = TRUE
   )
-  new_nodes <- make_text_nodes(paste(chars, collapse = ""))
-  add_node_siblings(node, new_nodes, remove = TRUE)
+  # add the ranges to the attributes
+  # <text>this is $\LaTeX$ text</text>
+  #   becomes
+  # <text protect.start='9' protect.end='16'>this is $\LaTeX$ text</text>
+  pos <- locations[[1]]
+  pos <- pos[!pos %in% except]
+  return(add_protected_ranges(node, pos, pos))
 }
-

From e51eceb4005e7aba9147564a67ec3e18ae13fe31 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 11:31:54 -0700
Subject: [PATCH 10/37] use attributes to protect curly nodes

This will address #105
---
 R/attr-nodes.R | 47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/R/attr-nodes.R b/R/attr-nodes.R
index 488ec3a..ff5cf6d 100644
--- a/R/attr-nodes.R
+++ b/R/attr-nodes.R
@@ -25,25 +25,27 @@ find_curly <- function(body, ns) {
 }
 
 digest_curly <- function(curly, ns) {
-  char <- as.character(curly)
-  curlies <- regmatches(char, gregexpr("\\{.*?\\}", char))[[1]]
-  for (curl in curlies) {
-    attributes <- "curly='true'"
+  label_curly_nodes(curly)
+  char <- xml2::xml_text(curly)
+  alt_fragment <- regmatches(char, gregexpr("alt=['\"].*?['\"]", char))[[1]]
+  if (length(alt_fragment) > 0) {
+    alt_text <- sub("^alt=", "", alt_fragment)
+    xml2::xml_set_attr(curly, "alt", alt_text)
+  }
+}
 
-    alt_fragment <- regmatches(curl, gregexpr("alt=['\"].*?['\"]", curl))[[1]]
-    if (length(alt_fragment) > 0) {
-      alt_text <- sub("^alt=", "", alt_fragment)
-      attributes <- sprintf("%s alt=%s", attributes, alt_text)
-    }
+label_curly_nodes <- function(node) {
+  char <- xml2::xml_text(node)
+  # Find the locations of inline chars that is complete
+  locations <- gregexpr(
+    pattern = "\\{.*?\\}",
+    char,
+    perl = TRUE
+  )
+  start <- locations[[1]]
+  end   <- start + attr(locations[[1]], "match.len")
+  return(add_protected_ranges(node, start, end))
 
-    char <- sub(
-      curl,
-      sprintf("</text><text %s>%s</text><text>", attributes, curl),
-      char,
-      fixed = TRUE
-    )
-  }
-  make_text_nodes(char)
 }
 
 #' Protect curly elements for further processing
@@ -71,12 +73,7 @@ digest_curly <- function(curly, ns) {
 #' m$body <- protect_curly(m$body)
 #' xml2::xml_child(m$body)
 protect_curly <- function(body, ns = md_ns()) {
-  body  <- copy_xml(body)
-  curly  <- find_curly(body, ns)
-  new_nodes <- purrr::map(curly, digest_curly, ns = ns)
-  # since we split up the nodes, we have to do this node by node
-  for (i in seq(new_nodes)) {
-    add_node_siblings(curly[[i]], new_nodes[[i]], remove = TRUE)
-  }
-  copy_xml(body)
+  curly <- find_curly(body, ns)
+  purrr::walk(curly, digest_curly, ns = ns)
+  return(body)
 }

From 1b9ae6ab29a3a8bd4dcb1190d6598fd0816786fd Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 11:32:32 -0700
Subject: [PATCH 11/37] update tests

---
 tests/testthat/_snaps/attr-nodes.md | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index 55f8e57..2e0b7c4 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -7,19 +7,13 @@
       <!DOCTYPE document SYSTEM "CommonMark.dtd">
       <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-16:20">
         <heading sourcepos="2:1-2:33" level="1">
-          <text sourcepos="2:3-2:33" xml:space="preserve">preface </text>
-          <text curly="true">{#pre-face .unnumbered}</text>
-          <text/>
+          <text sourcepos="2:3-2:33" xml:space="preserve" protect.pos="9" protect.end="32">preface {#pre-face .unnumbered}</text>
         </heading>
         <paragraph sourcepos="4:1-4:5">
           <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
         </paragraph>
         <paragraph sourcepos="6:1-6:51">
-          <text sourcepos="6:1-6:51" xml:space="preserve">I like </text>
-          <text curly="true">{xml2}</text>
-          <text> but of course </text>
-          <text curly="true">{tinkr}</text>
-          <text> is even cooler!</text>
+          <text sourcepos="6:1-6:51" xml:space="preserve" protect.pos="8 29" protect.end="14 36">I like {xml2} but of course {tinkr} is even cooler!</text>
         </paragraph>
         <paragraph sourcepos="8:1-8:110">
           <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
@@ -28,29 +22,19 @@
           <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
             <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
           </image>
-          <text sourcepos="10:52-10:88" xml:space="preserve"/>
-          <text curly="true" alt="a picture of a kitten">{#kitteh alt='a picture of a kitten'}</text>
-          <text/>
+          <text sourcepos="10:52-10:88" xml:space="preserve" protect.pos="1" protect.end="38" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
         </paragraph>
         <paragraph sourcepos="12:1-13:10">
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
             <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
           </image>
-          <text sourcepos="12:48-12:68" xml:space="preserve"/>
-          <text curly="true" alt="a picture of a dog">{#dog alt="a picture
+          <text sourcepos="12:48-12:68" xml:space="preserve" alt="&quot;a picture&#10;of a dog&quot;">{#dog alt="a picture
       of a dog"}</text>
-          <text/>
           <softbreak/>
         </paragraph>
         <paragraph sourcepos="15:1-16:20">
-          <text sourcepos="15:1-15:47" xml:space="preserve"/>
-          <text asis="true">[</text>
-          <text>a span with attributes</text>
-          <text asis="true">]</text>
-          <text/>
-          <text curly="true">{.span-with-attributes
+          <text sourcepos="15:1-15:47" xml:space="preserve" protect.pos="1 24" protect.end="1 24">[a span with attributes]{.span-with-attributes
       style='color: red;'}</text>
-          <text/>
           <softbreak/>
         </paragraph>
       </document>

From 32d2ffdd52053300626241561e2cba103c684645 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 12:14:53 -0700
Subject: [PATCH 12/37] add NEWS; bump description

---
 DESCRIPTION |  2 +-
 NEWS.md     | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bb4cc73..1c487b2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tinkr
 Title: Cast '(R)Markdown' Files to 'XML' and Back Again
-Version: 0.2.0
+Version: 0.2.0.9000
 Authors@R: 
     c(person(given = "Maëlle",
              family = "Salmon",
diff --git a/NEWS.md b/NEWS.md
index 1f0533a..3ecfcbb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,32 @@
+# tinkr 1.0.0 
+
+## BREAKING CHANGES
+
+Node protection will no longer fragment text nodes into groups of 'asis' and
+regular text nodes. Instead, two attributes `protect.pos` and `protect.end`
+are added that record the ranges of the protected characters. If you have been
+relying on using `@asis` nodes to manipulate your documents, your code will
+break. We are instead exporting helper functions to handle this.
+
+ - `protect_math()`, `protect_curly()`, and `protect_unescaped()` now modify
+   the document in place.
+
+## NEW FUNCTIONS
+
+ - `add_protected_ranges()` adds and updates protected ranges for a given text
+   node
+ - `is_protected()` an indicator if a node has protection or not
+ - `get_protected_ranges()` returns a list of integer vectors that indicate the
+   protected ranges
+ - `remove_protected_ranges()` removes the `protect.pos` and `protect.end`
+   attributes from a node
+
+## BUG FIX
+
+ - `protect_math()` no longer failes if `protect_curly()` was run before it
+   (reported: @maelle, #105; fixed: @zkamvar)
+
+
 # tinkr 0.2.0 
 
 ## BUG FIX

From cd95f58619e4c0c168170f6b7f9da88931f90356 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 12:17:20 -0700
Subject: [PATCH 13/37] export protection functions

---
 NAMESPACE               |  4 ++++
 R/node-protection.R     |  4 ++++
 man/protected_ranges.Rd | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)
 create mode 100644 man/protected_ranges.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 93033f3..5c0412a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add_protected_ranges)
 export(find_between)
+export(get_protected_ranges)
+export(is_protected)
 export(md_ns)
 export(protect_curly)
 export(protect_math)
+export(remove_protected_ranges)
 export(stylesheet)
 export(to_md)
 export(to_xml)
diff --git a/R/node-protection.R b/R/node-protection.R
index b7ce509..3a09a1a 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -11,6 +11,7 @@
 #'   - `get_protected_ranges()` a list containing integer vectors `start` and
 #'     `end` if the node is protected, otherwise, it returns NULL
 #' @rdname protected_ranges
+#' @export
 add_protected_ranges <- function(node, start, end) {
   if (length(start) == 0 || any(start < 1)) {
     # return early if there are no ranges to protect
@@ -30,12 +31,14 @@ add_protected_ranges <- function(node, start, end) {
 }
 
 #' @rdname protected_ranges
+#' @export
 is_protected <- function(node) {
   xml2::xml_has_attr(node, "protect.pos") && 
     xml2::xml_has_attr(node, "protect.end")
 }
 
 #' @rdname protected_ranges
+#' @export
 get_protected_ranges <- function(node) {
   if (is_protected(node)) {
     start <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
@@ -47,6 +50,7 @@ get_protected_ranges <- function(node) {
 }
 
 #' @rdname protected_ranges
+#' @export
 remove_protected_ranges <- function(node) {
   xml2::xml_set_attr(node, "protect.pos", NULL)
   xml2::xml_set_attr(node, "protect.end", NULL)
diff --git a/man/protected_ranges.Rd b/man/protected_ranges.Rd
new file mode 100644
index 0000000..1b471ab
--- /dev/null
+++ b/man/protected_ranges.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/node-protection.R
+\name{add_protected_ranges}
+\alias{add_protected_ranges}
+\alias{is_protected}
+\alias{get_protected_ranges}
+\alias{remove_protected_ranges}
+\title{Handle protected ranges for a node}
+\usage{
+add_protected_ranges(node, start, end)
+
+is_protected(node)
+
+get_protected_ranges(node)
+
+remove_protected_ranges(node)
+}
+\arguments{
+\item{node}{an XML \verb{<text>} node.}
+
+\item{start}{\verb{\[integer\]} a vector of starting indices of a set of ranges}
+
+\item{end}{\verb{\[integer\]} a vector of ending indices that are paired with
+\code{start}}
+}
+\value{
+\itemize{
+\item \code{add_protected_ranges()}: the modified node
+\item \code{remove_protected_ranges()}: the modified node
+\item \code{is_protected()}: \code{TRUE} if the node has protection attributes
+\item \code{get_protected_ranges()} a list containing integer vectors \code{start} and
+\code{end} if the node is protected, otherwise, it returns NULL
+}
+}
+\description{
+Handle protected ranges for a node
+}

From 7c8d35b5e0a9a5d1f767f8ab4ee86b36b360950e Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 15:50:37 -0700
Subject: [PATCH 14/37] add test for #105

---
 tests/testthat/test-asis-nodes.R | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R
index 206548d..7fab492 100644
--- a/tests/testthat/test-asis-nodes.R
+++ b/tests/testthat/test-asis-nodes.R
@@ -100,3 +100,26 @@ test_that("protect_unescaped() will throw a warning if no sourcpos is available"
   })
 })
 
+
+test_that("(105) protection of one element does not impede protection of another", {
+  
+  expected <- "example\n\n$a_{ij}$ \n"
+
+  temp_file <- withr::local_tempfile()
+  brio::write_lines(expected, temp_file)
+  wool <- tinkr::yarn$new(temp_file)
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  expect_length(n, 0)
+
+  wool$protect_curly()
+
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  expect_length(n, 1)
+  expect_equal(get_protected_ranges(n[[1]]), list(start = 4L, end = 7L))
+  expect_no_error(wool$protect_math())
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  expect_length(n, 1)
+  expect_equal(get_protected_ranges(n[[1]]), list(start = 1L, end = 8L))
+  expect_snapshot(show_user(wool$show(), force = TRUE))
+})
+

From fd45728abd705d3b38c5aab418b30584824e81e7 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 15:51:20 -0700
Subject: [PATCH 15/37] ensure output of get_protected_ranges is integer

---
 R/node-protection.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/node-protection.R b/R/node-protection.R
index 3a09a1a..ca8b0b4 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -46,7 +46,7 @@ get_protected_ranges <- function(node) {
   } else {
     return(NULL)
   }
-  return(list(start = start, end = end))
+  return(list(start = as.integer(start), end = as.integer(end)))
 }
 
 #' @rdname protected_ranges

From f79c1239bf7763020dff8f8531ccc80bf1650526 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 15:51:41 -0700
Subject: [PATCH 16/37] fix off-by-one errors

---
 R/asis-nodes.R | 6 +++---
 R/attr-nodes.R | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 42581b6..33195b4 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -198,7 +198,7 @@ label_partial_inline <- function(math) {
     # if the line contains the end of an inline math fragment,
     # we start at the beginning of the string and end at the end of the match
     begin <- 1
-    end <- stop[[1]] + attr(stop[[1]], "match.len")
+    end <- stop[[1]] + attr(stop[[1]], "match.len") - 1L
   } else {
     # otherwise, the entire range should be protected.
     begin <- 1
@@ -219,7 +219,7 @@ label_fully_inline <- function(math) {
   #   becomes
   # <text protect.start='9' protect.end='16'>this is $\LaTeX$ text</text>
   start <- locations[[1]]
-  end <- start + attr(locations[[1]], "match.len")
+  end <- start + attr(locations[[1]], "match.len") - 1L
   add_protected_ranges(math, start, end)
 }
 
@@ -403,7 +403,7 @@ protect_unescaped <- function(body, txt, ns = md_ns()) {
 #' @return the same output as [base::gregexpr()]: a list the same length as
 #' `txt` with integer vectors indicating the character positions of the matches
 #' with attributes:
-#'   1. match.length the length of the match (will be '2')
+#'   1. match.len the length of the match (will be '2')
 #' @noRd
 find_escaped_squares <- function(txt) {
   gregexpr("(\\\\\\])|(\\\\\\[)", txt, useBytes = FALSE)
diff --git a/R/attr-nodes.R b/R/attr-nodes.R
index ff5cf6d..2d1ca1e 100644
--- a/R/attr-nodes.R
+++ b/R/attr-nodes.R
@@ -43,7 +43,7 @@ label_curly_nodes <- function(node) {
     perl = TRUE
   )
   start <- locations[[1]]
-  end   <- start + attr(locations[[1]], "match.len")
+  end   <- start + attr(locations[[1]], "match.len") - 1L
   return(add_protected_ranges(node, start, end))
 
 }

From 29b9b3d1f9516bcd877f3b705208b0479767a589 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Mon, 29 Apr 2024 15:51:51 -0700
Subject: [PATCH 17/37] rerun snaps

---
 tests/testthat/_snaps/asis-nodes.md | 10 ++++++++++
 tests/testthat/_snaps/attr-nodes.md |  6 +++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/_snaps/asis-nodes.md b/tests/testthat/_snaps/asis-nodes.md
index 9f14f0b..18f62d9 100644
--- a/tests/testthat/_snaps/asis-nodes.md
+++ b/tests/testthat/_snaps/asis-nodes.md
@@ -87,3 +87,13 @@
       - [This is a link](https://ropensci.org)
       - \[this is an example\]
 
+# (105) protection of one element does not impede protection of another
+
+    Code
+      show_user(wool$show(), force = TRUE)
+    Output
+      example
+      
+      $a_{ij}$
+      
+
diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index 2e0b7c4..a5658b4 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -7,13 +7,13 @@
       <!DOCTYPE document SYSTEM "CommonMark.dtd">
       <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-16:20">
         <heading sourcepos="2:1-2:33" level="1">
-          <text sourcepos="2:3-2:33" xml:space="preserve" protect.pos="9" protect.end="32">preface {#pre-face .unnumbered}</text>
+          <text sourcepos="2:3-2:33" xml:space="preserve" protect.pos="9" protect.end="31">preface {#pre-face .unnumbered}</text>
         </heading>
         <paragraph sourcepos="4:1-4:5">
           <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
         </paragraph>
         <paragraph sourcepos="6:1-6:51">
-          <text sourcepos="6:1-6:51" xml:space="preserve" protect.pos="8 29" protect.end="14 36">I like {xml2} but of course {tinkr} is even cooler!</text>
+          <text sourcepos="6:1-6:51" xml:space="preserve" protect.pos="8 29" protect.end="13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
         </paragraph>
         <paragraph sourcepos="8:1-8:110">
           <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
@@ -22,7 +22,7 @@
           <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
             <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
           </image>
-          <text sourcepos="10:52-10:88" xml:space="preserve" protect.pos="1" protect.end="38" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+          <text sourcepos="10:52-10:88" xml:space="preserve" protect.pos="1" protect.end="37" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
         </paragraph>
         <paragraph sourcepos="12:1-13:10">
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">

From bbc1e610e576a157d04d6f1cd1dd9773bd694db8 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 08:46:04 -0700
Subject: [PATCH 18/37] add extra checks for `add_protected_ranges()`

---
 R/node-protection.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/node-protection.R b/R/node-protection.R
index ca8b0b4..eb270ad 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -13,7 +13,9 @@
 #' @rdname protected_ranges
 #' @export
 add_protected_ranges <- function(node, start, end) {
-  if (length(start) == 0 || any(start < 1)) {
+  no_beginning <- length(start) == 0 || any(start < 1)
+  can_protect <- inherits(node, "xml_node") && xml2::xml_name(node) == "text"
+  if (no_beginning || !can_protect) {
     # return early if there are no ranges to protect
     return(node)
   }

From b7c94ee9844b6b7269bd68840ff438fe2865f0df Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 09:46:32 -0700
Subject: [PATCH 19/37] fix failing CI test

The square bracket _should_ be escaped since it's outside of the
protected range.
---
 tests/testthat/test-asis-nodes.R | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R
index 7fab492..6633930 100644
--- a/tests/testthat/test-asis-nodes.R
+++ b/tests/testthat/test-asis-nodes.R
@@ -5,14 +5,36 @@ test_that("mal-formed inline math throws an informative error", {
 })
 
 test_that("multi-line inline math can have punctutation after", {
-  template <- "C) $E(\\text{Weight}) = 81.37 + 1.26 \\times x_1 +\n2.65 \\times x_2$punk\n"
-  for (punk in c('--', '---', ',', ';', '.', '?', ')', ']', '}', '>')) {
-    expected <- sub("punk", punk, template)
+  template <- c(
+    "C) $E(\\text{Weight}) = 81.37 + 1.26 \\times x_1 +",
+    "2.65 \\times x_2$punk\n"
+  )
+  for (punk in c('--', '---', ',', ';', '.', '?', ')', '\\\\]', '}', '>')) {
+    expected <- paste(sub("punk", punk, template), collapse = "\n")
     math <- commonmark::markdown_xml(expected)
     txt <- xml2::read_xml(math)
+    nodes <- xml2::xml_find_all(txt, ".//md:text", ns = md_ns())
+    # no protection initially
+    expect_equal(
+      xml2::xml_attr(nodes, "protect.pos"),
+      c(NA_character_, NA_character_)
+    )
     protxt <- protect_inline_math(txt, md_ns())
+    # the transformed content is identical.
+    expect_identical(txt, protxt)
+    # protection exists
+    expect_equal(
+      xml2::xml_attr(nodes, "protect.pos"),
+      c('4', '1')
+    )
+    expect_equal(
+      xml2::xml_attr(nodes, "protect.end"),
+      c('48', '16')
+    )
     actual <- to_md(list(yaml = NULL, body = protxt))
-    expect_equal(actual, expected)
+    act <- substring(actual, nchar(actual) - 2, nchar(actual) - 1)
+
+    expect_equal(actual, expected, label = act, expected.label = punk)
   }
 })
 

From b478e97f0b30459e2db91d6f628e0347df4c59eb Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 09:48:40 -0700
Subject: [PATCH 20/37] add text node boolean functions

---
 R/node-protection.R | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/R/node-protection.R b/R/node-protection.R
index eb270ad..23d852b 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -14,8 +14,7 @@
 #' @export
 add_protected_ranges <- function(node, start, end) {
   no_beginning <- length(start) == 0 || any(start < 1)
-  can_protect <- inherits(node, "xml_node") && xml2::xml_name(node) == "text"
-  if (no_beginning || !can_protect) {
+  if (no_beginning || not_text_node(node)) {
     # return early if there are no ranges to protect
     return(node)
   }
@@ -32,6 +31,12 @@ add_protected_ranges <- function(node, start, end) {
   return(node)
 }
 
+is_text_node <- function(node) {
+  inherits(node, "xml_node") && xml2::xml_name(node) == "text"
+}
+
+not_text_node <- Negate(is_text_node)
+
 #' @rdname protected_ranges
 #' @export
 is_protected <- function(node) {
@@ -42,7 +47,7 @@ is_protected <- function(node) {
 #' @rdname protected_ranges
 #' @export
 get_protected_ranges <- function(node) {
-  if (is_protected(node)) {
+  if (is_text_node(node) && is_protected(node)) {
     start <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
     end <- strsplit(xml2::xml_attr(node, "protect.end"), " ")[[1]]
   } else {

From 741fbd781bcc82180f22dd64bdd478565b037da7 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 09:49:27 -0700
Subject: [PATCH 21/37] add comments to test file

---
 tests/testthat/test-asis-nodes.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R
index 6633930..745978c 100644
--- a/tests/testthat/test-asis-nodes.R
+++ b/tests/testthat/test-asis-nodes.R
@@ -130,17 +130,23 @@ test_that("(105) protection of one element does not impede protection of another
   temp_file <- withr::local_tempfile()
   brio::write_lines(expected, temp_file)
   wool <- tinkr::yarn$new(temp_file)
+  # no protection initially 
   n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
   expect_length(n, 0)
 
   wool$protect_curly()
 
+  # protection exists
   n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
   expect_length(n, 1)
+  # the ranges are initially betwen the curly braces
   expect_equal(get_protected_ranges(n[[1]]), list(start = 4L, end = 7L))
+
+  # protecting for math does not throw an error
   expect_no_error(wool$protect_math())
   n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
   expect_length(n, 1)
+  # the protected range now extends to the whole line
   expect_equal(get_protected_ranges(n[[1]]), list(start = 1L, end = 8L))
   expect_snapshot(show_user(wool$show(), force = TRUE))
 })

From 2066ba78c16720e6adcc4140c326f51fd0f3c74b Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 09:49:40 -0700
Subject: [PATCH 22/37] add protection tests

---
 tests/testthat/test-node-protection.R | 112 ++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/tests/testthat/test-node-protection.R b/tests/testthat/test-node-protection.R
index 93c8650..716b4bd 100644
--- a/tests/testthat/test-node-protection.R
+++ b/tests/testthat/test-node-protection.R
@@ -67,3 +67,115 @@ test_that("update_ranges will merge all overlapping ranges", {
 
 })
 
+
+
+
+test_that("protection can be added and removed", {
+  expected <- c(
+    "\\a\\b\\c\\d",
+    "\\e\\f\\g\\h",
+    ""
+  )
+  temp_file <- withr::local_tempfile()
+  brio::write_lines(expected, temp_file)
+  wool <- tinkr::yarn$new(temp_file, unescaped = FALSE)
+  nodes <- xml2::xml_find_all(wool$body, ".//md:text", ns = md_ns())
+
+  # NO RANGES -------------------------------------------------------
+  # the body is not a text node
+  expect_true(not_text_node(wool$body))
+  # the text nodes are not a single text node
+  expect_true(not_text_node(nodes))
+  # individual text nodes are nodes
+  expect_true(is_text_node(nodes[[1]]))
+  expect_false(not_text_node(nodes[[1]]))
+  expect_false(is_protected(nodes[[1]]))
+  expect_false(is_protected(nodes[[2]]))
+
+  # no range protection exists
+  expect_null(get_protected_ranges(nodes))
+  expect_equal(lapply(nodes, get_protected_ranges),
+    vector(mode = "list", length = 2) # empty list
+  )
+  
+  # no protection applied, the text is all escaped
+  no_protection <- wool$show()
+  expect_equal(no_protection, gsub("\\\\", "\\\\\\\\", expected))
+
+  # ADDING PROTECTION -----------------------------------------------
+  # protecting second and fourth entities
+  expect_false(is_protected(nodes[[1]]))
+
+  add_protected_ranges(nodes[[1]], start = c(3, 7), end = c(4, 8))
+
+  expect_true(is_protected(nodes[[1]]))
+
+  # protecting everything
+  expect_false(is_protected(nodes[[2]]))
+
+  add_protected_ranges(nodes[[2]], start = 1, end = 8)
+
+  expect_true(is_protected(nodes[[2]]))
+
+  expect_equal(get_protected_ranges(nodes[[1]]),
+    list(start = c(3, 7), end = c(4, 8))
+  )
+  expect_equal(get_protected_ranges(nodes[[2]]),
+    list(start = 1, end = 8)
+  )
+  some_protection <- wool$show()
+  # we expect all but the first and third entities to be protected
+  some_expected <- gsub("\\\\([ac])", "\\\\\\\\\\1", expected)
+  expect_equal(object = some_protection, expected = some_expected)
+
+  # OVERLAPPING PROTECTIONS ----------------------------------------
+  # adding identical protections do not duplicate them
+  add_protected_ranges(nodes[[1]], start = c(3, 7), end = c(4, 8))
+  expect_equal(get_protected_ranges(nodes[[1]]),
+    list(start = c(3, 7), end = c(4, 8))
+  )
+  # adding completely overlapping protections does not cause an error
+  add_protected_ranges(nodes[[1]], start = c(3, 7), end = c(4, 8))
+  expect_equal(get_protected_ranges(nodes[[2]]),
+    list(start = 1, end = 8)
+  )
+  some_protection <- wool$show()
+  # we expect all but the first and third entities to be protected
+  some_expected <- gsub("\\\\([ac])", "\\\\\\\\\\1", expected)
+  expect_equal(object = some_protection, expected = some_expected)
+
+  # NEW PROTECTIONS -----------------------------------------------
+  # we can add a new range that does not overlap (but it can abut)
+  add_protected_ranges(nodes[[1]], start = 1, end = 2)
+  expect_equal(get_protected_ranges(nodes[[1]]),
+    list(start = c(1, 3, 7), end = c(2, 4, 8))
+  )
+  some_protection <- wool$show()
+  some_expected <- gsub("\\\\([c])", "\\\\\\\\\\1", expected)
+  expect_equal(object = some_protection, expected = some_expected)
+  
+  # if we add an overlapping range, they are connected
+  add_protected_ranges(nodes[[1]], start = 1, end = 4)
+  expect_equal(get_protected_ranges(nodes[[1]]),
+    list(start = c(1, 7), end = c(4, 8))
+  )
+  some_protection <- wool$show()
+  some_expected <- gsub("\\\\([c])", "\\\\\\\\\\1", expected)
+  expect_equal(object = some_protection, expected = some_expected)
+  
+  # REMOVING PROTECTIONS -----------------------------------------
+  expect_true(is_protected(nodes[[1]]))
+
+  remove_protected_ranges(nodes[[1]])
+
+  expect_false(is_protected(nodes[[1]]))
+
+  expect_null(get_protected_ranges(nodes[[1]]))
+
+  # now the top line is unprotected 
+  some_protection <- wool$show()
+  some_expected <- gsub("\\\\([abcd])", "\\\\\\\\\\1", expected)
+  expect_equal(object = some_protection, expected = some_expected)
+
+})
+

From 739020a16df3cf97f20d435a7ee2a1d0096e6583 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 09:49:55 -0700
Subject: [PATCH 23/37] update documentation a bit

---
 R/asis-nodes.R      | 2 +-
 man/protect_math.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 33195b4..8fb5962 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -17,7 +17,7 @@
 #' txt <- textConnection(tinkr::to_md(m))
 #' cat(tail(readLines(txt)), sep = "\n") # broken math
 #' close(txt)
-#' m$body <- protect_math(m$body)
+#' protect_math(m$body)
 #' txt <- textConnection(tinkr::to_md(m))
 #' cat(tail(readLines(txt)), sep = "\n") # fixed math
 #' close(txt)
diff --git a/man/protect_math.Rd b/man/protect_math.Rd
index bbf25bf..5dc4daa 100644
--- a/man/protect_math.Rd
+++ b/man/protect_math.Rd
@@ -32,7 +32,7 @@ m <- tinkr::to_xml(system.file("extdata", "math-example.md", package = "tinkr"))
 txt <- textConnection(tinkr::to_md(m))
 cat(tail(readLines(txt)), sep = "\n") # broken math
 close(txt)
-m$body <- protect_math(m$body)
+protect_math(m$body)
 txt <- textConnection(tinkr::to_md(m))
 cat(tail(readLines(txt)), sep = "\n") # fixed math
 close(txt)

From 9357b95874193ea79a3b65bd943c13d0be147b86 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 10:03:39 -0700
Subject: [PATCH 24/37] document node protection

---
 DESCRIPTION             |  1 +
 R/node-protection.R     | 46 ++++++++++++++++++++++++++++++++++++++++
 man/protected_ranges.Rd | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1c487b2..a3e7e73 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,6 +50,7 @@ Suggests:
     rmarkdown,
     covr,
     testthat (>= 3.0.0),
+    brio,
     withr
 Config/testthat/edition: 3
 Encoding: UTF-8
diff --git a/R/node-protection.R b/R/node-protection.R
index 23d852b..dc979b7 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -12,6 +12,52 @@
 #'     `end` if the node is protected, otherwise, it returns NULL
 #' @rdname protected_ranges
 #' @export
+#' @examples
+#' # example of text to protect
+#' # SETUP ---------------------
+#' expected <- c(
+#'   "\\a\\b\\c\\d",
+#'   "\\e\\f\\g\\h",
+#'   ""
+#' )
+#' temp_file <- tempfile()
+#' writeLines(expected, temp_file)
+#' wool <- tinkr::yarn$new(temp_file)
+#' nodes <- xml2::xml_find_all(wool$body, ".//md:text", ns = md_ns())
+#' writeLines(expected) # this is how it should appear
+#' wool$show() # nothing is protected, so the '\' are escaped
+#' # ADDING PROTECTION ----------
+#' # protections are added _per node_
+#' add_protected_ranges(nodes[[1]], start = 1, end = 8) # protect whole range
+#' add_protected_ranges(nodes[[2]], start = c(1, 7), end = c(2, 8)) # partial
+#' wool$show() # the first row and "\e" and "\h" are protected
+#' 
+#' # extract the ranges
+#' is_protected(nodes[[1]])
+#' is_protected(nodes[[2]])
+#' get_protected_ranges(nodes[[1]])
+#' get_protected_ranges(nodes[[2]])
+#' 
+#' # Add additional protection that overlaps.
+#' # The current range is [1, 2] and [7, 8]. If we add [5, 8], the range
+#' # will be updated
+#' add_protected_ranges(nodes[[2]], start = 5, end = 8) 
+#' get_protected_ranges(nodes[[2]])
+#' 
+#' # overlapping protection is not duplicated
+#' add_protected_ranges(nodes[[1]], start = 1, end = 4) 
+#' get_protected_ranges(nodes[[1]])
+#'
+#' wool$show() # the first row and "\e", "\g", and "\h" are protected
+#' 
+#' # REMOVING PROTECTION --------
+#' remove_protected_ranges(nodes[[2]])
+#' is_protected(nodes[[2]])
+#' get_protected_ranges(nodes[[2]])
+#' wool$show()
+#' 
+#' # CLEAN UP -------------------
+#' if (file.exists(temp_file)) unlink(temp_file)
 add_protected_ranges <- function(node, start, end) {
   no_beginning <- length(start) == 0 || any(start < 1)
   if (no_beginning || not_text_node(node)) {
diff --git a/man/protected_ranges.Rd b/man/protected_ranges.Rd
index 1b471ab..5892110 100644
--- a/man/protected_ranges.Rd
+++ b/man/protected_ranges.Rd
@@ -35,3 +35,50 @@ remove_protected_ranges(node)
 \description{
 Handle protected ranges for a node
 }
+\examples{
+# example of text to protect
+# SETUP ---------------------
+expected <- c(
+  "\\\\a\\\\b\\\\c\\\\d",
+  "\\\\e\\\\f\\\\g\\\\h",
+  ""
+)
+temp_file <- tempfile()
+writeLines(expected, temp_file)
+wool <- tinkr::yarn$new(temp_file)
+nodes <- xml2::xml_find_all(wool$body, ".//md:text", ns = md_ns())
+writeLines(expected) # this is how it should appear
+wool$show() # nothing is protected, so the '\' are escaped
+# ADDING PROTECTION ----------
+# protections are added _per node_
+add_protected_ranges(nodes[[1]], start = 1, end = 8) # protect whole range
+add_protected_ranges(nodes[[2]], start = c(1, 7), end = c(2, 8)) # partial
+wool$show() # the first row and "\e" and "\h" are protected
+
+# extract the ranges
+is_protected(nodes[[1]])
+is_protected(nodes[[2]])
+get_protected_ranges(nodes[[1]])
+get_protected_ranges(nodes[[2]])
+
+# Add additional protection that overlaps.
+# The current range is [1, 2] and [7, 8]. If we add [5, 8], the range
+# will be updated
+add_protected_ranges(nodes[[2]], start = 5, end = 8) 
+get_protected_ranges(nodes[[2]])
+
+# overlapping protection is not duplicated
+add_protected_ranges(nodes[[1]], start = 1, end = 4) 
+get_protected_ranges(nodes[[1]])
+
+wool$show() # the first row and "\e", "\g", and "\h" are protected
+
+# REMOVING PROTECTION --------
+remove_protected_ranges(nodes[[2]])
+is_protected(nodes[[2]])
+get_protected_ranges(nodes[[2]])
+wool$show()
+
+# CLEAN UP -------------------
+if (file.exists(temp_file)) unlink(temp_file)
+}

From 2614c20b832dddb6200e6d07ad9d5570a9c031d3 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Tue, 30 Apr 2024 10:08:55 -0700
Subject: [PATCH 25/37] rename protect.pos -> protect.start

---
 NEWS.md                             |  4 ++--
 R/asis-nodes.R                      |  2 +-
 R/node-protection.R                 |  8 ++++----
 inst/extdata/xml_protect.xml        |  2 +-
 inst/stylesheets/tinkr-helpers.xsl  |  6 +++---
 inst/stylesheets/xml2md_gfm.xsl     |  6 +++---
 tests/testthat/_snaps/attr-nodes.md |  8 ++++----
 tests/testthat/test-asis-nodes.R    | 10 +++++-----
 8 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 3ed330e..30b9c64 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,7 +3,7 @@
 ## BREAKING CHANGES
 
 Node protection will no longer fragment text nodes into groups of 'asis' and
-regular text nodes. Instead, two attributes `protect.pos` and `protect.end`
+regular text nodes. Instead, two attributes `protect.start` and `protect.end`
 are added that record the ranges of the protected characters. If you have been
 relying on using `@asis` nodes to manipulate your documents, your code will
 break. We are instead exporting helper functions to handle this.
@@ -18,7 +18,7 @@ break. We are instead exporting helper functions to handle this.
  - `is_protected()` an indicator if a node has protection or not
  - `get_protected_ranges()` returns a list of integer vectors that indicate the
    protected ranges
- - `remove_protected_ranges()` removes the `protect.pos` and `protect.end`
+ - `remove_protected_ranges()` removes the `protect.start` and `protect.end`
    attributes from a node
 
 ## BUG FIX
diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 8fb5962..d3dcc0d 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -480,7 +480,7 @@ fix_unescaped_squares <- function(nodes, txt) {
 #' This function will replace the text node with this:
 #'
 #' ```html
-#' <text sourcepos='1:1-1:43' protect.pos='9 19' protect.end='9 19'>
+#' <text sourcepos='1:1-1:43' protect.start='9 19' protect.end='9 19'>
 #' this is [unescaped] and this is [escaped]
 #' </text>
 #' ```
diff --git a/R/node-protection.R b/R/node-protection.R
index dc979b7..a3dc802 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -72,7 +72,7 @@ add_protected_ranges <- function(node, start, end) {
     start <- new$start
     end <- new$end
   }
-  xml2::xml_set_attr(node, "protect.pos", paste(start, collapse = " "))
+  xml2::xml_set_attr(node, "protect.start", paste(start, collapse = " "))
   xml2::xml_set_attr(node, "protect.end", paste(end, collapse = " "))
   return(node)
 }
@@ -86,7 +86,7 @@ not_text_node <- Negate(is_text_node)
 #' @rdname protected_ranges
 #' @export
 is_protected <- function(node) {
-  xml2::xml_has_attr(node, "protect.pos") && 
+  xml2::xml_has_attr(node, "protect.start") && 
     xml2::xml_has_attr(node, "protect.end")
 }
 
@@ -94,7 +94,7 @@ is_protected <- function(node) {
 #' @export
 get_protected_ranges <- function(node) {
   if (is_text_node(node) && is_protected(node)) {
-    start <- strsplit(xml2::xml_attr(node, "protect.pos"), " ")[[1]]
+    start <- strsplit(xml2::xml_attr(node, "protect.start"), " ")[[1]]
     end <- strsplit(xml2::xml_attr(node, "protect.end"), " ")[[1]]
   } else {
     return(NULL)
@@ -105,7 +105,7 @@ get_protected_ranges <- function(node) {
 #' @rdname protected_ranges
 #' @export
 remove_protected_ranges <- function(node) {
-  xml2::xml_set_attr(node, "protect.pos", NULL)
+  xml2::xml_set_attr(node, "protect.start", NULL)
   xml2::xml_set_attr(node, "protect.end", NULL)
   return(node)
 }
diff --git a/inst/extdata/xml_protect.xml b/inst/extdata/xml_protect.xml
index 125c967..3213fa5 100644
--- a/inst/extdata/xml_protect.xml
+++ b/inst/extdata/xml_protect.xml
@@ -2,6 +2,6 @@
 <!DOCTYPE document SYSTEM "CommonMark.dtd">
 <document xmlns="http://commonmark.org/xml/1.0">
   <paragraph>
-    <text xml:space="preserve" protect.pos="27 47 57 89" protect.end="31 51 71 103">This is a thing with some $x_i$ math elements $x_j$ and $let x_i = x_j$ [not math] with $\frac{i}{\pi}$ after all.</text>
+    <text xml:space="preserve" protect.start="27 47 57 89" protect.end="31 51 71 103">This is a thing with some $x_i$ math elements $x_j$ and $let x_i = x_j$ [not math] with $\frac{i}{\pi}$ after all.</text>
   </paragraph>
 </document>
diff --git a/inst/stylesheets/tinkr-helpers.xsl b/inst/stylesheets/tinkr-helpers.xsl
index b7ddf59..a8969e1 100644
--- a/inst/stylesheets/tinkr-helpers.xsl
+++ b/inst/stylesheets/tinkr-helpers.xsl
@@ -15,7 +15,7 @@
       <!-- the index of the current protection -->
       <xsl:param name="index" select="1"/>
       <!-- nodeset of tokens generated from tokenizing a list of numbers -->
-      <xsl:param name="protect.pos" select="0"/>
+      <xsl:param name="protect.start" select="0"/>
       <!-- nodeset of tokens generated from tokenizing a list of numbers -->
       <xsl:param name="protect.end" select="0"/>
 
@@ -45,7 +45,7 @@
             </xsl:choose>
           </xsl:variable>
           <!-- create a start and end variable for testing -->
-          <xsl:variable name="start" select="$protect.pos[$idx + 0]"/>
+          <xsl:variable name="start" select="$protect.start[$idx + 0]"/>
           <xsl:variable name="end" select="$protect.end[$idx + 0]"/>
 
           <xsl:value-of select="$safe"/>
@@ -63,7 +63,7 @@
             <!-- do not forget to advance the position -->
             <xsl:with-param name="pos" select="$newpos + 1"/>
             <xsl:with-param name="index" select="$idx"/>
-            <xsl:with-param name="protect.pos" select="$protect.pos"/>
+            <xsl:with-param name="protect.start" select="$protect.start"/>
             <xsl:with-param name="protect.end" select="$protect.end"/>
           </xsl:call-template>
         </xsl:when>
diff --git a/inst/stylesheets/xml2md_gfm.xsl b/inst/stylesheets/xml2md_gfm.xsl
index 2f1aeb9..ac6eaa3 100644
--- a/inst/stylesheets/xml2md_gfm.xsl
+++ b/inst/stylesheets/xml2md_gfm.xsl
@@ -22,11 +22,11 @@
     <!-- Text that needs to be preserved (e.g. math/checkboxes) -->
     <!-- We use tokenization -->
     <!-- https://developer.mozilla.org/en-US/docs/Web/EXSLT/str/tokenize -->
-    <xsl:template match="md:text[@protect.pos]">
+    <xsl:template match="md:text[@protect.start]">
       <xsl:call-template name="escape-text-protect">
           <xsl:with-param name="text" select="string(.)"/>
-          <xsl:with-param name="protect.pos" 
-            select="str:tokenize(@protect.pos, ' ')"/>
+          <xsl:with-param name="protect.start" 
+            select="str:tokenize(@protect.start, ' ')"/>
           <xsl:with-param name="protect.end" 
             select="str:tokenize(@protect.end, ' ')"/>
       </xsl:call-template>
diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index a5658b4..bd2becb 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -7,13 +7,13 @@
       <!DOCTYPE document SYSTEM "CommonMark.dtd">
       <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-16:20">
         <heading sourcepos="2:1-2:33" level="1">
-          <text sourcepos="2:3-2:33" xml:space="preserve" protect.pos="9" protect.end="31">preface {#pre-face .unnumbered}</text>
+          <text sourcepos="2:3-2:33" xml:space="preserve" protect.start="9" protect.end="31">preface {#pre-face .unnumbered}</text>
         </heading>
         <paragraph sourcepos="4:1-4:5">
           <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
         </paragraph>
         <paragraph sourcepos="6:1-6:51">
-          <text sourcepos="6:1-6:51" xml:space="preserve" protect.pos="8 29" protect.end="13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
+          <text sourcepos="6:1-6:51" xml:space="preserve" protect.start="8 29" protect.end="13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
         </paragraph>
         <paragraph sourcepos="8:1-8:110">
           <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
@@ -22,7 +22,7 @@
           <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
             <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
           </image>
-          <text sourcepos="10:52-10:88" xml:space="preserve" protect.pos="1" protect.end="37" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+          <text sourcepos="10:52-10:88" xml:space="preserve" protect.start="1" protect.end="37" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
         </paragraph>
         <paragraph sourcepos="12:1-13:10">
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
@@ -33,7 +33,7 @@
           <softbreak/>
         </paragraph>
         <paragraph sourcepos="15:1-16:20">
-          <text sourcepos="15:1-15:47" xml:space="preserve" protect.pos="1 24" protect.end="1 24">[a span with attributes]{.span-with-attributes
+          <text sourcepos="15:1-15:47" xml:space="preserve" protect.start="1 24" protect.end="1 24">[a span with attributes]{.span-with-attributes
       style='color: red;'}</text>
           <softbreak/>
         </paragraph>
diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R
index 745978c..2a1bb2d 100644
--- a/tests/testthat/test-asis-nodes.R
+++ b/tests/testthat/test-asis-nodes.R
@@ -16,7 +16,7 @@ test_that("multi-line inline math can have punctutation after", {
     nodes <- xml2::xml_find_all(txt, ".//md:text", ns = md_ns())
     # no protection initially
     expect_equal(
-      xml2::xml_attr(nodes, "protect.pos"),
+      xml2::xml_attr(nodes, "protect.start"),
       c(NA_character_, NA_character_)
     )
     protxt <- protect_inline_math(txt, md_ns())
@@ -24,7 +24,7 @@ test_that("multi-line inline math can have punctutation after", {
     expect_identical(txt, protxt)
     # protection exists
     expect_equal(
-      xml2::xml_attr(nodes, "protect.pos"),
+      xml2::xml_attr(nodes, "protect.start"),
       c('4', '1')
     )
     expect_equal(
@@ -131,20 +131,20 @@ test_that("(105) protection of one element does not impede protection of another
   brio::write_lines(expected, temp_file)
   wool <- tinkr::yarn$new(temp_file)
   # no protection initially 
-  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.start]", ns = md_ns())
   expect_length(n, 0)
 
   wool$protect_curly()
 
   # protection exists
-  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.start]", ns = md_ns())
   expect_length(n, 1)
   # the ranges are initially betwen the curly braces
   expect_equal(get_protected_ranges(n[[1]]), list(start = 4L, end = 7L))
 
   # protecting for math does not throw an error
   expect_no_error(wool$protect_math())
-  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.pos]", ns = md_ns())
+  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.start]", ns = md_ns())
   expect_length(n, 1)
   # the protected range now extends to the whole line
   expect_equal(get_protected_ranges(n[[1]]), list(start = 1L, end = 8L))

From afef059f841992371736d9ef332ea18018b4f6b7 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Wed, 1 May 2024 16:46:15 -0700
Subject: [PATCH 26/37] create utils for playing with sourcepos

---
 R/resolve-links.R   | 18 -----------------
 R/utils-sourcepos.R | 49 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 18 deletions(-)
 create mode 100644 R/utils-sourcepos.R

diff --git a/R/resolve-links.R b/R/resolve-links.R
index 94e43f5..f7b3c49 100644
--- a/R/resolve-links.R
+++ b/R/resolve-links.R
@@ -198,21 +198,3 @@ al_title <- function(link) {
   escape_ampersand(titles)
 }
 
-#nocov start
-# Get the position of an element
-get_pos <- function(x, e = 1) {
-  as.integer(
-    gsub(
-      "^(\\d+?):(\\d+?)[-](\\d+?):(\\d+?)$",
-      glue::glue("\\{e}"),
-      xml2::xml_attr(x, "sourcepos")
-    )
-  )
-}
-
-# helpers for get_pos
-get_linestart <- function(x) get_pos(x, e = 1)
-get_colstart  <- function(x) get_pos(x, e = 2)
-get_lineend   <- function(x) get_pos(x, e = 3)
-get_colend    <- function(x) get_pos(x, e = 4)
-#nocov end
diff --git a/R/utils-sourcepos.R b/R/utils-sourcepos.R
new file mode 100644
index 0000000..02f25da
--- /dev/null
+++ b/R/utils-sourcepos.R
@@ -0,0 +1,49 @@
+has_sourcepos <- function(node) {
+  xml2::xml_has_attr(node, "sourcepos")
+}
+# Get the position of an element
+get_pos <- function(x, e = 1) {
+  as.integer(
+    gsub(
+      "^(\\d+?):(\\d+?)[-](\\d+?):(\\d+?)$",
+      glue::glue("\\{e}"),
+      xml2::xml_attr(x, "sourcepos")
+    )
+  )
+}
+
+# helpers for get_pos
+get_linestart <- function(x) get_pos(x, e = 1)
+get_colstart  <- function(x) get_pos(x, e = 2)
+get_lineend   <- function(x) get_pos(x, e = 3)
+get_colend    <- function(x) get_pos(x, e = 4)
+
+get_sourcepos <- function(node) {
+  list(
+    linestart = get_linestart(node),
+    colstart = get_colstart(node),
+    lineend = get_lineend(node),
+    colend = get_colend(node)
+  )
+}
+make_sourcepos <- function(pos) {
+  glue::glue("{pos$linestart}:{pos$colstart}-{pos$lineend}:{pos$colend}")
+}
+
+split_sourcepos <- function(node) {
+  pos <- get_sourcepos(node)
+  ranges <- get_full_ranges(node)
+  offset <- pos$colstart - 1L
+  pos$colstart <- ranges$start + offset
+  pos$colend <- ranges$end
+  make_sourcepos(pos)
+}
+
+join_sourcepos <- function(nodes) {
+  pos <- get_sourcepos(nodes)
+  pos$linestart <- min(pos$linestart)
+  pos$colstart <- min(pos$colstart)
+  pos$lineend <- max(pos$lineend)
+  pos$colend <- max(pos$colend)
+  make_sourcepos(pos)
+}

From 54c1d1f70ad25b6f5e38bcda5dbb098d8a48909d Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Wed, 1 May 2024 16:46:42 -0700
Subject: [PATCH 27/37] add capability to split and rejoin protected nodes

This begins to address limitations of the attribute-based protection
by providing a way to separate and rejoin nodes that were previously
split.
---
 DESCRIPTION                       |   2 +-
 NAMESPACE                         |   4 +
 R/asis-nodes.R                    |   2 +-
 R/node-protection.R               |  57 ++++++++++++
 R/node-splits.R                   | 142 ++++++++++++++++++++++++++++++
 man/protected_ranges.Rd           |  13 +++
 man/split_protected_nodes.Rd      |  49 +++++++++++
 tests/testthat/test-node-splits.R |  75 ++++++++++++++++
 8 files changed, 342 insertions(+), 2 deletions(-)
 create mode 100644 R/node-splits.R
 create mode 100644 man/split_protected_nodes.Rd
 create mode 100644 tests/testthat/test-node-splits.R

diff --git a/DESCRIPTION b/DESCRIPTION
index a3e7e73..fc95947 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -56,5 +56,5 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 5c0412a..dbd28aa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,15 +2,19 @@
 
 export(add_protected_ranges)
 export(find_between)
+export(get_protected_nodes)
 export(get_protected_ranges)
 export(is_protected)
+export(join_split_nodes)
 export(md_ns)
 export(protect_curly)
 export(protect_math)
 export(remove_protected_ranges)
+export(split_protected_nodes)
 export(stylesheet)
 export(to_md)
 export(to_xml)
+export(xpath_protected)
 export(yarn)
 importFrom(R6,R6Class)
 importFrom(magrittr,"%>%")
diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index d3dcc0d..8b55f4b 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -391,7 +391,7 @@ protect_unescaped <- function(body, txt, ns = md_ns()) {
     warning(msg, call. = FALSE)
     return(body)
   }
-  XPATH <- ".//md:text[not(@asis)][contains(text(), '[') or contains(text(), ']')]"
+  XPATH <- ".//md:text[contains(text(), '[') or contains(text(), ']')]"
   snodes <- xml2::xml_find_all(body, XPATH, ns = ns)
   fix_unescaped_squares(snodes, txt)
   return(body)
diff --git a/R/node-protection.R b/R/node-protection.R
index a3dc802..af83f8b 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -90,6 +90,10 @@ is_protected <- function(node) {
     xml2::xml_has_attr(node, "protect.end")
 }
 
+#' @rdname protected_ranges
+#' @export
+xpath_protected <- ".//node()[@protect.start and @protect.end]"
+
 #' @rdname protected_ranges
 #' @export
 get_protected_ranges <- function(node) {
@@ -110,6 +114,26 @@ remove_protected_ranges <- function(node) {
   return(node)
 }
 
+#' @rdname protected_ranges
+#' @param find all nodes that 
+#' @export
+get_protected_nodes <- function(body) {
+  xml2::xml_find_all(body, xpath_protected)
+}
+
+get_full_ranges <- function(node) {
+  if (!is_protected(node)) {
+    return(node)
+  }
+  ranges <- get_protected_ranges(node)
+  txt <- xml2::xml_text(node)
+  inv <- inverse_ranges(nchar(txt), ranges$start, ranges$end)
+  full <- update_ranges(c(inv$start, ranges$start), c(inv$end, ranges$end))
+  full$protected <- full$start %in% ranges$start
+  return(full)
+}
+
+
 #' Detect if two ranges are overlapping
 #' 
 #' @param s1 \[integer\] starting index of first range
@@ -184,3 +208,36 @@ update_ranges <- function(start, end) {
   return(list(start = nstart[keep], end = nend[keep]))
 
 }
+
+inverse_ranges <- function(upper, start, end) {
+  original <- seq.int(upper)
+  n <- length(start) + 1
+  res <- list(start = integer(n), end = integer(n))
+  i <- 1
+  j <- 1
+  # create a mask to kee track of where we have been
+  mask <- original > 0
+  while (i < n) {
+    mask <- mask & (original > end[i] | original < start[i])
+    keep <- mask & original < end[i]
+    if (!any(keep)) {
+      i <- i + 1
+      next
+    }
+    rng <- range(original[keep])
+    res$start[j] <- rng[1]
+    res$end[j] <- rng[2]
+    # update the mask to make sure we do not add this again
+    mask[rng[1]:rng[2]] <- FALSE
+    j <- j + 1
+    i <- i + 1
+  }
+  if (end[n - 1] < upper) {
+    res$start[j] <- end[n - 1] + 1
+    res$end[j] <- upper
+  }
+  # remove any empty cells
+  res$start <- res$start[res$start != 0]
+  res$end <- res$end[res$end != 0]
+  return(res)
+}
diff --git a/R/node-splits.R b/R/node-splits.R
new file mode 100644
index 0000000..ed8e876
--- /dev/null
+++ b/R/node-splits.R
@@ -0,0 +1,142 @@
+#' Split and Join nodes that have been protected
+#'
+#' @param body an `xml_document` class object
+#' @return 
+#'  - `split_protected_nodes()` - a copy of `body` where text nodes with 
+#'    protection are split into 'asis' and regular nodes.
+#'  - `join_split_nodes()` - the `body` object where text nodes that have been
+#'    split (those with the `@split-id` attribute) are joined into a single text
+#'    node.
+#' @rdname split_protected_nodes
+#' @export
+#' @examples
+#' ex <- system.file("extdata", "math-example.md", package = "tinkr")
+#' m <- tinkr::yarn$new(ex)
+#' m$protect_math()
+#' # protection gives us protected nodes
+#' get_protected_nodes(m$body)
+#' original_body <- m$body
+#' # ---- splitting --------------------------------------------
+#' # splitting transforms those nodes into split text nodes
+#' split_body <- split_protected_nodes(m$body)
+#' get_protected_nodes(split_body)
+#' xml2::xml_find_all(split_body, ".//node()[@split-id]")
+#' # The effect is the same
+#' m$head(10)
+#' m$body <- split_body
+#' m$head(10)
+#' # ---- joining ----------------------------------------------
+#' # joining is done in place
+#' join_split_nodes(m$body)
+#' m$head(10)
+#' get_protected_nodes(m$body)
+#' get_protected_nodes(original_body)
+split_protected_nodes <- function(body) {
+  body <- copy_xml(body)
+  protected <- get_protected_nodes(body)
+  if (length(protected) == 0) {
+    return(body)
+  }
+  purrr::iwalk(protected, split_node)
+  copy_xml(body)
+}
+
+# split a node into adjacent text nodes
+split_node <- function(node, id) {
+  if (!is_protected(node)) {
+    return(node)
+  }
+  frag <- split_node_text(node)
+  new_nodes <- glue::glue("<text>{frag$string}</text>")
+  new_nodes <- make_text_nodes(new_nodes)
+  attrs <- xml2::xml_attrs(node)
+  attrs <- attrs[!startsWith(names(attrs), "protect")]
+  for (attr in names(attrs)) {
+    val <- attrs[[attr]]
+    if (attr == "space") {
+      attr <- "xml:space"
+    }
+    xml2::xml_set_attr(new_nodes, attr, val)
+  }
+  if (has_sourcepos(node)) {
+    xml2::xml_set_attr(new_nodes, "sourcepos", split_sourcepos(node))
+  }
+  remove_protected_ranges(node)
+  xml2::xml_set_attr(new_nodes, "split-id", id)
+  xml2::xml_set_attr(new_nodes[frag$protected], "asis", "true")
+  add_node_siblings(node, new_nodes, remove = TRUE)
+}
+
+# splits node text based on protected ranges
+split_node_text <- function(node) {
+  if (!is_protected(node)) {
+    return(node)
+  }
+  full <- get_full_ranges(node)
+  txt <- xml2::xml_text(node)
+  parts <- list(
+    string = substring(txt, full$start, full$end),
+    protected = full$protected
+  )
+  return(parts)
+}
+
+#' @rdname split_protected_nodes
+#' @export
+join_split_nodes <- function(body) {
+  nodes <- xml2::xml_find_all(body, ".//md:text[@split-id]", ns = md_ns())
+  if (length(nodes) == 0) {
+    return(body)
+  }
+  ids <- xml2::xml_attr(nodes, "split-id")
+  purrr::walk(unique(ids), function(i) join_text_nodes(nodes[ids == i]))
+  return(body)
+}
+
+join_text_nodes <- function(nodes) {
+  nodetxt <- xml2::xml_text(nodes)
+  asis_nodes <- xml2::xml_has_attr(nodes, "asis")
+  # if there is a single node (e.g. it was part of a multiline math problem)
+  # and it was _all_ protected, then we need to catch it and return it early
+  if (length(nodes) == 1) {
+    new_node <- nodes[[1]]
+    if (has_sourcepos(nodes)) {
+      start <- 1
+      end <- get_colend(nodes)[asis_nodes] - get_colstart(nodes)[asis_nodes] + 1
+    } else {
+      start <- 1
+      end <- nchar(nodetxt)
+    }
+    add_protected_ranges(new_node, start, end)
+    xml2::xml_set_attr(new_node, "split-id", NULL)
+    xml2::xml_set_attr(new_node, "asis", NULL)
+    return(new_node)
+  }
+  # In this nodeset, we need to make sure to relabel the asis nodes 
+  txt <- paste(nodetxt, collapse = "")
+  # our new node is the donor for all other nodes
+  new_node <- nodes[[1]]
+  xml2::xml_set_text(new_node, txt)
+  if (has_sourcepos(new_node)) {
+    pos <- join_sourcepos(nodes)
+    offset <- get_colstart(new_node) - 1
+    start <- get_colstart(nodes)[asis_nodes] - offset
+    end <- get_colend(nodes)[asis_nodes] - offset
+    xml2::xml_set_attr(new_node, "sourcepos", pos)
+  } else {
+    # compute the protection from the string lengths
+    n <- cumsum(nchar(nodetxt))
+    start <- n[!asis_nodes] + 1
+    end <- n[asis_nodes]
+    # if the first node is an asis node, th
+    if (isTRUE(asis_nodes[1])) {
+      start <- c(1, n[!asis_nodes] + 1)
+    }
+    start <- start[seq_along(end)]
+  }
+  add_protected_ranges(new_node, start, end)
+  xml2::xml_set_attr(new_node, "split-id", NULL)
+  xml2::xml_set_attr(new_node, "asis", NULL)
+  xml2::xml_remove(nodes[-1])
+}
+
diff --git a/man/protected_ranges.Rd b/man/protected_ranges.Rd
index 5892110..54dad63 100644
--- a/man/protected_ranges.Rd
+++ b/man/protected_ranges.Rd
@@ -1,19 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/node-protection.R
+\docType{data}
 \name{add_protected_ranges}
 \alias{add_protected_ranges}
 \alias{is_protected}
+\alias{xpath_protected}
 \alias{get_protected_ranges}
 \alias{remove_protected_ranges}
+\alias{get_protected_nodes}
 \title{Handle protected ranges for a node}
+\format{
+An object of class \code{character} of length 1.
+}
 \usage{
 add_protected_ranges(node, start, end)
 
 is_protected(node)
 
+xpath_protected
+
 get_protected_ranges(node)
 
 remove_protected_ranges(node)
+
+get_protected_nodes(body)
 }
 \arguments{
 \item{node}{an XML \verb{<text>} node.}
@@ -22,6 +32,8 @@ remove_protected_ranges(node)
 
 \item{end}{\verb{\[integer\]} a vector of ending indices that are paired with
 \code{start}}
+
+\item{find}{all nodes that}
 }
 \value{
 \itemize{
@@ -82,3 +94,4 @@ wool$show()
 # CLEAN UP -------------------
 if (file.exists(temp_file)) unlink(temp_file)
 }
+\keyword{datasets}
diff --git a/man/split_protected_nodes.Rd b/man/split_protected_nodes.Rd
new file mode 100644
index 0000000..2503c87
--- /dev/null
+++ b/man/split_protected_nodes.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/node-splits.R
+\name{split_protected_nodes}
+\alias{split_protected_nodes}
+\alias{join_split_nodes}
+\title{Split and Join nodes that have been protected}
+\usage{
+split_protected_nodes(body)
+
+join_split_nodes(body)
+}
+\arguments{
+\item{body}{an \code{xml_document} class object}
+}
+\value{
+\itemize{
+\item \code{split_protected_nodes()} - a copy of \code{body} where text nodes with
+protection are split into 'asis' and regular nodes.
+\item \code{join_split_nodes()} - the \code{body} object where text nodes that have been
+split (those with the \verb{@split-id} attribute) are joined into a single text
+node.
+}
+}
+\description{
+Split and Join nodes that have been protected
+}
+\examples{
+ex <- system.file("extdata", "math-example.md", package = "tinkr")
+m <- tinkr::yarn$new(ex)
+m$protect_math()
+# protection gives us protected nodes
+get_protected_nodes(m$body)
+original_body <- m$body
+# ---- splitting --------------------------------------------
+# splitting transforms those nodes into split text nodes
+split_body <- split_protected_nodes(m$body)
+get_protected_nodes(split_body)
+xml2::xml_find_all(split_body, ".//node()[@split-id]")
+# The effect is the same
+m$head(10)
+m$body <- split_body
+m$head(10)
+# ---- joining ----------------------------------------------
+# joining is done in place
+join_split_nodes(m$body)
+m$head(10)
+get_protected_nodes(m$body)
+get_protected_nodes(original_body)
+}
diff --git a/tests/testthat/test-node-splits.R b/tests/testthat/test-node-splits.R
new file mode 100644
index 0000000..a44577b
--- /dev/null
+++ b/tests/testthat/test-node-splits.R
@@ -0,0 +1,75 @@
+
+test_that("splitting and joining protected nodes will work round trip", {
+
+  ex <- system.file("extdata", "math-example.md", package = "tinkr")
+  m <- tinkr::yarn$new(ex)
+  m$protect_math()
+  # protection gives us protected nodes
+  protected <- length(get_protected_nodes(m$body))
+  expect_gt(protected, 0)
+  # ---- splitting --------------------------------------------
+  # splitting transforms those nodes into split text nodes
+  split_body <- split_protected_nodes(m$body)
+  # no protected nodes exist
+  expect_false(identical(split_body, m$body))
+  expect_length(get_protected_nodes(split_body), 0)
+
+  # there should be the same number of unique ids as the protected
+  splits <- xml2::xml_find_all(split_body, ".//node()[@split-id]")
+  expect_gt(length(splits), 0)
+  ids <- unique(xml2::xml_attr(splits, "split-id"))
+  expect_equal(length(ids), protected)
+
+  # The effect is the same
+  h1 <- m$head(10)
+  m$body <- split_body
+  h2 <- m$head(10)
+  expect_equal(h1, h2)
+  # ---- joining ----------------------------------------------
+  # joining is done in place
+  join_split_nodes(m$body)
+  expect_identical(m$body, split_body)
+  h3 <- m$head(10)
+  expect_equal(h1, h3)
+  expect_equal(length(get_protected_nodes(m$body)), protected)
+
+})
+
+
+test_that("splitting and joining protected nodes will work round trip with sourcepos", {
+
+  ex <- system.file("extdata", "math-example.md", package = "tinkr")
+  m <- tinkr::yarn$new(ex, sourcepos = TRUE)
+  m$protect_math()
+  # the source positions exist
+  expect_true(has_sourcepos(m$body))
+  # protection gives us protected nodes
+  protected <- length(get_protected_nodes(m$body))
+  expect_gt(protected, 0)
+  # ---- splitting --------------------------------------------
+  # splitting transforms those nodes into split text nodes
+  split_body <- split_protected_nodes(m$body)
+  # no protected nodes exist
+  expect_false(identical(split_body, m$body))
+  expect_length(get_protected_nodes(split_body), 0)
+
+  # there should be the same number of unique ids as the protected
+  splits <- xml2::xml_find_all(split_body, ".//node()[@split-id]")
+  expect_gt(length(splits), 0)
+  ids <- unique(xml2::xml_attr(splits, "split-id"))
+  expect_equal(length(ids), protected)
+
+  # The effect is the same
+  h1 <- m$head(10)
+  m$body <- split_body
+  h2 <- m$head(10)
+  expect_equal(h1, h2)
+  # ---- joining ----------------------------------------------
+  # joining is done in place
+  join_split_nodes(m$body)
+  expect_identical(m$body, split_body)
+  h3 <- m$head(10)
+  expect_equal(h1, h3)
+  expect_equal(length(get_protected_nodes(m$body)), protected)
+
+})

From febfa2f8dd73f2419622c662eb5f814e2c36f610 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Wed, 1 May 2024 16:56:28 -0700
Subject: [PATCH 28/37] fix doc booboo

---
 R/node-protection.R     | 4 +++-
 man/protected_ranges.Rd | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/node-protection.R b/R/node-protection.R
index af83f8b..6b6e449 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -4,12 +4,14 @@
 #' @param start `\[integer\]` a vector of starting indices of a set of ranges
 #' @param end `\[integer\]` a vector of ending indices that are paired with
 #'   `start`
+#' @param body an XML document
 #' @return 
 #'   - `add_protected_ranges()`: the modified node
 #'   - `remove_protected_ranges()`: the modified node
 #'   - `is_protected()`: `TRUE` if the node has protection attributes
 #'   - `get_protected_ranges()` a list containing integer vectors `start` and
 #'     `end` if the node is protected, otherwise, it returns NULL
+#'   - `get_protected_nodes()` a nodelist
 #' @rdname protected_ranges
 #' @export
 #' @examples
@@ -49,6 +51,7 @@
 #' get_protected_ranges(nodes[[1]])
 #'
 #' wool$show() # the first row and "\e", "\g", and "\h" are protected
+#' get_protected_nodes(wool$body) # showing the nodes that are protected
 #' 
 #' # REMOVING PROTECTION --------
 #' remove_protected_ranges(nodes[[2]])
@@ -115,7 +118,6 @@ remove_protected_ranges <- function(node) {
 }
 
 #' @rdname protected_ranges
-#' @param find all nodes that 
 #' @export
 get_protected_nodes <- function(body) {
   xml2::xml_find_all(body, xpath_protected)
diff --git a/man/protected_ranges.Rd b/man/protected_ranges.Rd
index 54dad63..123d891 100644
--- a/man/protected_ranges.Rd
+++ b/man/protected_ranges.Rd
@@ -33,7 +33,7 @@ get_protected_nodes(body)
 \item{end}{\verb{\[integer\]} a vector of ending indices that are paired with
 \code{start}}
 
-\item{find}{all nodes that}
+\item{body}{an XML document}
 }
 \value{
 \itemize{
@@ -42,6 +42,7 @@ get_protected_nodes(body)
 \item \code{is_protected()}: \code{TRUE} if the node has protection attributes
 \item \code{get_protected_ranges()} a list containing integer vectors \code{start} and
 \code{end} if the node is protected, otherwise, it returns NULL
+\item \code{get_protected_nodes()} a nodelist
 }
 }
 \description{
@@ -84,6 +85,7 @@ add_protected_ranges(nodes[[1]], start = 1, end = 4)
 get_protected_ranges(nodes[[1]])
 
 wool$show() # the first row and "\e", "\g", and "\h" are protected
+get_protected_nodes(wool$body) # showing the nodes that are protected
 
 # REMOVING PROTECTION --------
 remove_protected_ranges(nodes[[2]])

From 70612418bbe47f5c78cc40acbf0d63f871f98641 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 09:31:25 -0700
Subject: [PATCH 29/37] ensure complete round trip

The previous iteration was not quite correct because it had assumed that
the sourcepos would match up exactly with the protection ranges, but
these were two separate numbers.

This does the following:

1. when a protected range spans the entire node, then it is labeled "asis"
2. `split_sourcepos()` now reflects the actual end of the sourcepos
   instead of the computed end
3. an awkward catch for single nodes in `join_split_nodes()` is now
   eliminated
4. `join_split_nodes()` no longer re-comuputes the protected ranges from
   the sourcepos
---
 R/asis-nodes.R                        | 42 ++-----------
 R/node-protection.R                   | 13 +++-
 R/node-splits.R                       | 87 ++++++++++++++++++---------
 R/utils-sourcepos.R                   |  9 ++-
 man/split_protected_nodes.Rd          |  3 +
 tests/testthat/_snaps/asis-nodes.md   |  2 +-
 tests/testthat/_snaps/attr-nodes.md   |  2 +-
 tests/testthat/test-asis-nodes.R      | 16 ++---
 tests/testthat/test-node-protection.R | 16 ++---
 tests/testthat/test-node-splits.R     |  4 ++
 10 files changed, 106 insertions(+), 88 deletions(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 8b55f4b..ac874c6 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -32,6 +32,10 @@ set_asis <- function(nodes) {
   xml2::xml_set_attr(nodes[xml2::xml_name(nodes) != "softbreak"], "asis", "true")
 }
 
+is_asis <- function(node) {
+  xml2::xml_has_attr(node, "asis")
+}
+
 # INLINE MATH ------------------------------------------------------------------
 
 # finding inline math consists of searching for $ and excluding $$
@@ -223,44 +227,6 @@ label_fully_inline <- function(math) {
   add_protected_ranges(math, start, end)
 }
 
-#' Transform a character vector of XML into text nodes
-#'
-#' This is useful in the case where we want to modify some text content to
-#' split it and label a portion of it 'asis' to protect it from commonmark's
-#' escape processing.
-#'
-#' `fix_fully_inline()` uses `make_text_nodes()` to modify a single text node
-#' into several text nodes. It first takes a string of a single text node like
-#' below...
-#'
-#' ```html
-#' <text>this is $\LaTeX$ text</text>
-#' ```
-#'
-#' ...and splits it into three text nodes, surrounding the LaTeX math with text
-#' tags that have the 'asis' attribute.
-#'
-#' ```html
-#' <text>this is </text><text asis='true'>$\LaTeX$</text><text> text</text>
-#' ```
-#'
-#' The `make_text_nodes()` function takes the above text string and converts it
-#' into nodes so that the original text node can be replaced.
-#'
-#' @param a character vector of modified text nodes
-#' @return a nodeset with no associated namespace
-#' @noRd
-make_text_nodes <- function(txt) {
-  # We are hijacking commonmark here to produce an XML markdown document with
-  # a single element: {paste(txt, collapse = ''). This gets passed to glue where
-  # it is expanded into nodes that we can read in via {xml2}, strip the
-  # namespace, and extract all nodes below
-  doc <- glue::glue(commonmark::markdown_xml("{paste(txt, collapse = '')}"))
-  nodes <- xml2::xml_ns_strip(xml2::read_xml(doc))
-  xml2::xml_find_all(nodes, ".//paragraph/text/*")
-}
-
-
 # BLOCK MATH ------------------------------------------------------------------
 
 find_block_math <- function(body, ns) {
diff --git a/R/node-protection.R b/R/node-protection.R
index 6b6e449..0a6c339 100644
--- a/R/node-protection.R
+++ b/R/node-protection.R
@@ -63,8 +63,9 @@
 #' if (file.exists(temp_file)) unlink(temp_file)
 add_protected_ranges <- function(node, start, end) {
   no_beginning <- length(start) == 0 || any(start < 1)
-  if (no_beginning || not_text_node(node)) {
-    # return early if there are no ranges to protect
+  if (is_asis(node) || no_beginning || not_text_node(node)) {
+    # return early if node is already protected
+    # or there are no ranges to protect
     return(node)
   }
   if (is_protected(node)) {
@@ -75,6 +76,14 @@ add_protected_ranges <- function(node, start, end) {
     start <- new$start
     end <- new$end
   }
+  n <- nchar(xml2::xml_text(node))
+  if (length(end) == 1 && start == 1 && end == n ) {
+    # if the protection ends up spanning the entire node, just return asis
+    xml2::xml_set_attr(node, "asis", "true")
+    xml2::xml_set_attr(node, "protect.start", NULL)
+    xml2::xml_set_attr(node, "protect.end", NULL)
+    return(node)
+  }
   xml2::xml_set_attr(node, "protect.start", paste(start, collapse = " "))
   xml2::xml_set_attr(node, "protect.end", paste(end, collapse = " "))
   return(node)
diff --git a/R/node-splits.R b/R/node-splits.R
index ed8e876..a5ecfb7 100644
--- a/R/node-splits.R
+++ b/R/node-splits.R
@@ -1,3 +1,40 @@
+#' Transform a character vector of XML into text nodes
+#'
+#' This is useful in the case where we want to modify some text content to
+#' split it and label a portion of it 'asis' to protect it from commonmark's
+#' escape processing.
+#'
+#' `fix_fully_inline()` uses `make_text_nodes()` to modify a single text node
+#' into several text nodes. It first takes a string of a single text node like
+#' below...
+#'
+#' ```html
+#' <text>this is $\LaTeX$ text</text>
+#' ```
+#'
+#' ...and splits it into three text nodes, surrounding the LaTeX math with text
+#' tags that have the 'asis' attribute.
+#'
+#' ```html
+#' <text>this is </text><text asis='true'>$\LaTeX$</text><text> text</text>
+#' ```
+#'
+#' The `make_text_nodes()` function takes the above text string and converts it
+#' into nodes so that the original text node can be replaced.
+#'
+#' @param a character vector of modified text nodes
+#' @return a nodeset with no associated namespace
+#' @noRd
+make_text_nodes <- function(txt) {
+  # We are hijacking commonmark here to produce an XML markdown document with
+  # a single element: {paste(txt, collapse = ''). This gets passed to glue where
+  # it is expanded into nodes that we can read in via {xml2}, strip the
+  # namespace, and extract all nodes below
+  doc <- glue::glue(commonmark::markdown_xml("{paste(txt, collapse = '')}"))
+  nodes <- xml2::xml_ns_strip(xml2::read_xml(doc))
+  xml2::xml_find_all(nodes, ".//paragraph/text/*")
+}
+
 #' Split and Join nodes that have been protected
 #'
 #' @param body an `xml_document` class object
@@ -31,6 +68,9 @@
 #' m$head(10)
 #' get_protected_nodes(m$body)
 #' get_protected_nodes(original_body)
+#' 
+#' # the context is identical even after transformation
+#' identical(as.character(m$body), as.character(original_body))
 split_protected_nodes <- function(body) {
   body <- copy_xml(body)
   protected <- get_protected_nodes(body)
@@ -47,6 +87,14 @@ split_node <- function(node, id) {
     return(node)
   }
   frag <- split_node_text(node)
+  if (length(frag$string) == 1) {
+    # If we have a single fragment, we can label it "asis" and be on our way
+    # This is likely the situation where a math equation contains 
+    # two underscores, which the interpreter reads as <emph> nodes.
+    xml2::xml_set_attr(node, "asis", "true")
+    remove_protected_ranges(node)
+    return(node)
+  }
   new_nodes <- glue::glue("<text>{frag$string}</text>")
   new_nodes <- make_text_nodes(new_nodes)
   attrs <- xml2::xml_attrs(node)
@@ -96,44 +144,25 @@ join_split_nodes <- function(body) {
 join_text_nodes <- function(nodes) {
   nodetxt <- xml2::xml_text(nodes)
   asis_nodes <- xml2::xml_has_attr(nodes, "asis")
-  # if there is a single node (e.g. it was part of a multiline math problem)
-  # and it was _all_ protected, then we need to catch it and return it early
-  if (length(nodes) == 1) {
-    new_node <- nodes[[1]]
-    if (has_sourcepos(nodes)) {
-      start <- 1
-      end <- get_colend(nodes)[asis_nodes] - get_colstart(nodes)[asis_nodes] + 1
-    } else {
-      start <- 1
-      end <- nchar(nodetxt)
-    }
-    add_protected_ranges(new_node, start, end)
-    xml2::xml_set_attr(new_node, "split-id", NULL)
-    xml2::xml_set_attr(new_node, "asis", NULL)
-    return(new_node)
-  }
   # In this nodeset, we need to make sure to relabel the asis nodes 
   txt <- paste(nodetxt, collapse = "")
   # our new node is the donor for all other nodes
   new_node <- nodes[[1]]
   xml2::xml_set_text(new_node, txt)
   if (has_sourcepos(new_node)) {
+    # restore the sourcepos of the original nodes
     pos <- join_sourcepos(nodes)
-    offset <- get_colstart(new_node) - 1
-    start <- get_colstart(nodes)[asis_nodes] - offset
-    end <- get_colend(nodes)[asis_nodes] - offset
     xml2::xml_set_attr(new_node, "sourcepos", pos)
-  } else {
-    # compute the protection from the string lengths
-    n <- cumsum(nchar(nodetxt))
-    start <- n[!asis_nodes] + 1
-    end <- n[asis_nodes]
-    # if the first node is an asis node, th
-    if (isTRUE(asis_nodes[1])) {
-      start <- c(1, n[!asis_nodes] + 1)
-    }
-    start <- start[seq_along(end)]
   }
+  # compute the protection from the string lengths
+  n <- cumsum(nchar(nodetxt))
+  start <- n[!asis_nodes] + 1
+  end <- n[asis_nodes]
+  # if the first node is an asis node, th
+  if (isTRUE(asis_nodes[1])) {
+    start <- c(1, n[!asis_nodes] + 1)
+  }
+  start <- start[seq_along(end)]
   add_protected_ranges(new_node, start, end)
   xml2::xml_set_attr(new_node, "split-id", NULL)
   xml2::xml_set_attr(new_node, "asis", NULL)
diff --git a/R/utils-sourcepos.R b/R/utils-sourcepos.R
index 02f25da..33396f2 100644
--- a/R/utils-sourcepos.R
+++ b/R/utils-sourcepos.R
@@ -32,10 +32,17 @@ make_sourcepos <- function(pos) {
 
 split_sourcepos <- function(node) {
   pos <- get_sourcepos(node)
+  # sometimes the source position will run beyond the ranges of the node
+  # this preserves the original end for the source position
+  original_end <- pos$colend
   ranges <- get_full_ranges(node)
+  # the colstart and ends should be offset by the original colstart 
+  # in the case of lists or indented paragraphs
   offset <- pos$colstart - 1L
   pos$colstart <- ranges$start + offset
-  pos$colend <- ranges$end
+  pos$colend <- ranges$end + offset
+  # we append the original end
+  pos$colend[length(pos$colend)] <- original_end
   make_sourcepos(pos)
 }
 
diff --git a/man/split_protected_nodes.Rd b/man/split_protected_nodes.Rd
index 2503c87..86a8597 100644
--- a/man/split_protected_nodes.Rd
+++ b/man/split_protected_nodes.Rd
@@ -46,4 +46,7 @@ join_split_nodes(m$body)
 m$head(10)
 get_protected_nodes(m$body)
 get_protected_nodes(original_body)
+
+# the context is identical even after transformation
+identical(as.character(m$body), as.character(original_body))
 }
diff --git a/tests/testthat/_snaps/asis-nodes.md b/tests/testthat/_snaps/asis-nodes.md
index 18f62d9..3935f69 100644
--- a/tests/testthat/_snaps/asis-nodes.md
+++ b/tests/testthat/_snaps/asis-nodes.md
@@ -92,7 +92,7 @@
     Code
       show_user(wool$show(), force = TRUE)
     Output
-      example
+      example inline $b_{ij}$
       
       $a_{ij}$
       
diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index bd2becb..d6a39fc 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -22,7 +22,7 @@
           <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
             <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
           </image>
-          <text sourcepos="10:52-10:88" xml:space="preserve" protect.start="1" protect.end="37" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+          <text sourcepos="10:52-10:88" xml:space="preserve" asis="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
         </paragraph>
         <paragraph sourcepos="12:1-13:10">
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R
index 2a1bb2d..2d381ca 100644
--- a/tests/testthat/test-asis-nodes.R
+++ b/tests/testthat/test-asis-nodes.R
@@ -125,7 +125,7 @@ test_that("protect_unescaped() will throw a warning if no sourcpos is available"
 
 test_that("(105) protection of one element does not impede protection of another", {
   
-  expected <- "example\n\n$a_{ij}$ \n"
+  expected <- "example inline $b_{ij}$\n\n$a_{ij}$ \n"
 
   temp_file <- withr::local_tempfile()
   brio::write_lines(expected, temp_file)
@@ -137,17 +137,17 @@ test_that("(105) protection of one element does not impede protection of another
   wool$protect_curly()
 
   # protection exists
-  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.start]", ns = md_ns())
-  expect_length(n, 1)
+  the_nodes <- xml2::xml_find_all(wool$body, ".//node()[@protect.start]")
+  expect_length(the_nodes, 2)
   # the ranges are initially betwen the curly braces
-  expect_equal(get_protected_ranges(n[[1]]), list(start = 4L, end = 7L))
+  expect_equal(get_protected_ranges(the_nodes[[1]]), list(start = 19L, end = 22L))
+  expect_equal(get_protected_ranges(the_nodes[[2]]), list(start = 4L, end = 7L))
 
   # protecting for math does not throw an error
   expect_no_error(wool$protect_math())
-  n <- xml2::xml_find_all(wool$body, ".//md:text[@protect.start]", ns = md_ns())
-  expect_length(n, 1)
-  # the protected range now extends to the whole line
-  expect_equal(get_protected_ranges(n[[1]]), list(start = 1L, end = 8L))
+  # the protected range now extends to the whole line without the trailing space
+  expect_equal(get_protected_ranges(the_nodes[[1]]), list(start = 16L, end = 23L))
+  expect_equal(get_protected_ranges(the_nodes[[2]]), NULL)
   expect_snapshot(show_user(wool$show(), force = TRUE))
 })
 
diff --git a/tests/testthat/test-node-protection.R b/tests/testthat/test-node-protection.R
index 716b4bd..fd8e867 100644
--- a/tests/testthat/test-node-protection.R
+++ b/tests/testthat/test-node-protection.R
@@ -115,14 +115,14 @@ test_that("protection can be added and removed", {
 
   add_protected_ranges(nodes[[2]], start = 1, end = 8)
 
-  expect_true(is_protected(nodes[[2]]))
+  expect_false(is_protected(nodes[[2]]))
+  expect_true(is_asis(nodes[[2]]))
 
   expect_equal(get_protected_ranges(nodes[[1]]),
     list(start = c(3, 7), end = c(4, 8))
   )
-  expect_equal(get_protected_ranges(nodes[[2]]),
-    list(start = 1, end = 8)
-  )
+  expect_equal(get_protected_ranges(nodes[[2]]), NULL)
+
   some_protection <- wool$show()
   # we expect all but the first and third entities to be protected
   some_expected <- gsub("\\\\([ac])", "\\\\\\\\\\1", expected)
@@ -135,10 +135,10 @@ test_that("protection can be added and removed", {
     list(start = c(3, 7), end = c(4, 8))
   )
   # adding completely overlapping protections does not cause an error
-  add_protected_ranges(nodes[[1]], start = c(3, 7), end = c(4, 8))
-  expect_equal(get_protected_ranges(nodes[[2]]),
-    list(start = 1, end = 8)
-  )
+  add_protected_ranges(nodes[[2]], start = c(3, 7), end = c(4, 8))
+  expect_true(is_asis(nodes[[2]]))
+  expect_equal(get_protected_ranges(nodes[[2]]), NULL)
+
   some_protection <- wool$show()
   # we expect all but the first and third entities to be protected
   some_expected <- gsub("\\\\([ac])", "\\\\\\\\\\1", expected)
diff --git a/tests/testthat/test-node-splits.R b/tests/testthat/test-node-splits.R
index a44577b..2ba79b6 100644
--- a/tests/testthat/test-node-splits.R
+++ b/tests/testthat/test-node-splits.R
@@ -6,6 +6,7 @@ test_that("splitting and joining protected nodes will work round trip", {
   m$protect_math()
   # protection gives us protected nodes
   protected <- length(get_protected_nodes(m$body))
+  orig <- m$body
   expect_gt(protected, 0)
   # ---- splitting --------------------------------------------
   # splitting transforms those nodes into split text nodes
@@ -32,6 +33,7 @@ test_that("splitting and joining protected nodes will work round trip", {
   h3 <- m$head(10)
   expect_equal(h1, h3)
   expect_equal(length(get_protected_nodes(m$body)), protected)
+  expect_equal(as.character(m$body), as.character(orig))
 
 })
 
@@ -41,6 +43,7 @@ test_that("splitting and joining protected nodes will work round trip with sourc
   ex <- system.file("extdata", "math-example.md", package = "tinkr")
   m <- tinkr::yarn$new(ex, sourcepos = TRUE)
   m$protect_math()
+  orig <- m$body
   # the source positions exist
   expect_true(has_sourcepos(m$body))
   # protection gives us protected nodes
@@ -71,5 +74,6 @@ test_that("splitting and joining protected nodes will work round trip with sourc
   h3 <- m$head(10)
   expect_equal(h1, h3)
   expect_equal(length(get_protected_nodes(m$body)), protected)
+  expect_equal(object = as.character(m$body), expected = as.character(orig))
 
 })

From 9e7de7281293f254a8f2c0628f824ca891c9b544 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 10:58:14 -0700
Subject: [PATCH 30/37] add find_between_nodes from pegboard

This allows us to search for internal nodes using their identities
---
 R/utils.R | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/R/utils.R b/R/utils.R
index 741c099..70f7709 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -3,6 +3,22 @@ show_user <- function(out, force = FALSE) {
   invisible(out)
 }
 
+# taken from pegboard
+find_between_nodes <- function(a, b, include = TRUE) {
+  the_parent <- xml2::xml_parent(a)
+  if (!identical(the_parent, xml2::xml_parent(b))) {
+    # we cannot return a space between nodes on different levels
+    return(xml2::xml_missing())
+  }
+  the_children <- xml2::xml_children(the_parent)
+  # find the node in question by testing for identity since they represent the
+  # same object, they will be identical. 
+  offset <- if (include) 0 else 1
+  ida <- which(purrr::map_lgl(the_children, identical, a)) + offset
+  idb <- which(purrr::map_lgl(the_children, identical, b)) - offset
+  the_children[seq(ida, idb)]
+}
+
 unbalanced_math_error <- function(bmath, endless, headless, le, lh) {
   no_end <- xml2::xml_text(bmath[endless])
   no_beginning <- xml2::xml_text(bmath[headless])

From acd560f4e9bd7b99651b14082941d2a8959a6696 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 10:59:05 -0700
Subject: [PATCH 31/37] use helpers for asis

---
 R/node-splits.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/node-splits.R b/R/node-splits.R
index a5ecfb7..0fbf0f9 100644
--- a/R/node-splits.R
+++ b/R/node-splits.R
@@ -91,7 +91,7 @@ split_node <- function(node, id) {
     # If we have a single fragment, we can label it "asis" and be on our way
     # This is likely the situation where a math equation contains 
     # two underscores, which the interpreter reads as <emph> nodes.
-    xml2::xml_set_attr(node, "asis", "true")
+    set_asis(node)
     remove_protected_ranges(node)
     return(node)
   }
@@ -143,7 +143,7 @@ join_split_nodes <- function(body) {
 
 join_text_nodes <- function(nodes) {
   nodetxt <- xml2::xml_text(nodes)
-  asis_nodes <- xml2::xml_has_attr(nodes, "asis")
+  asis_nodes <- is_asis(nodes)
   # In this nodeset, we need to make sure to relabel the asis nodes 
   txt <- paste(nodetxt, collapse = "")
   # our new node is the donor for all other nodes

From f22d2aac7270098a5fec0138a8a72be55fa3830c Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 10:59:24 -0700
Subject: [PATCH 32/37] remove "curly" attr when setting a math node

---
 R/asis-nodes.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index ac874c6..8709362 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -208,6 +208,9 @@ label_partial_inline <- function(math) {
     begin <- 1
     end <- nchar(char)
   }
+  # if any of the nodes have been set as curly, unset
+  xml2::xml_set_attr(math, "curly", NULL)
+  xml2::xml_set_attr(math, "curly-id", NULL)
   add_protected_ranges(math, begin, end)
 }
 
@@ -224,6 +227,9 @@ label_fully_inline <- function(math) {
   # <text protect.start='9' protect.end='16'>this is $\LaTeX$ text</text>
   start <- locations[[1]]
   end <- start + attr(locations[[1]], "match.len") - 1L
+  # if any of the nodes have been set as curly, unset
+  xml2::xml_set_attr(math, "curly", NULL)
+  xml2::xml_set_attr(math, "curly-id", NULL)
   add_protected_ranges(math, start, end)
 }
 
@@ -242,6 +248,9 @@ protect_block_math <- function(body, ns) {
   bm <- find_block_math(body, ns)
   # get all of the internal nodes
   bm <- xml2::xml_find_all(bm, ".//descendant-or-self::md:*", ns = ns)
+  # if any of the nodes have been set as curly, unset
+  xml2::xml_set_attr(bm, "curly", NULL)
+  xml2::xml_set_attr(math, "curly-id", NULL)
   set_asis(bm)
 }
 

From 7875dea9bc64bf62d9f3909aa36f6bec42b100fa Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 11:00:28 -0700
Subject: [PATCH 33/37] update curly processing to better handle alt text

---
 R/attr-nodes.R                            | 75 +++++++++++++++++------
 tests/testthat/_snaps/attr-nodes.md       | 18 +++---
 tests/testthat/_snaps/attr-nodes/yarn.Rmd |  2 -
 tests/testthat/_snaps/attr-nodes/yarn.md  |  2 -
 tests/testthat/test-node-splits.R         |  6 +-
 5 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/R/attr-nodes.R b/R/attr-nodes.R
index 2d1ca1e..b6ce752 100644
--- a/R/attr-nodes.R
+++ b/R/attr-nodes.R
@@ -6,46 +6,85 @@ find_curly <- function(body, ns) {
   attr_texts <- xml2::xml_text(curlies)
   no_closing <- !grepl("[}]", attr_texts)
   if (any(no_closing)) {
+    curlies <- lapply(curlies, function(i) i)
     close_xpath <- "self::*/following-sibling::md:text[contains(text(), '}')]"
-    for (not_closed in curlies[no_closing]) {
-      closing <- xml2::xml_find_all(
+    for (i in which(no_closing)) {
+      not_closed <- curlies[[i]]
+      closing <- xml2::xml_find_first(
         not_closed,
         glue::glue("./{close_xpath}"),
         ns
       )
-      xml2::xml_text(not_closed) <- paste(
-        xml2::xml_text(not_closed),
-        xml2::xml_text(closing),
-        sep = "\n"
-      )
-      xml2::xml_remove(closing)
+      all_nodes <- find_between_nodes(not_closed, closing, include = TRUE)
+      curlies[[i]] <- all_nodes
+      # xml2::xml_text(not_closed) <- paste(
+      #   xml2::xml_text(all_nodes),
+      #   collapse = "\n"
+      # )
+      # xml2::xml_remove(all_nodes[-1])
     }
   }
   curlies
 }
 
-digest_curly <- function(curly, ns) {
-  label_curly_nodes(curly)
-  char <- xml2::xml_text(curly)
+digest_curly <- function(curly, id, ns) {
+  xml2::xml_set_attr(curly, "curly-id", id)
+  if (inherits(curly, "xml_node")) {
+    label_curly_node(curly)
+  } else {
+    n <- length(curly)
+    label_curly_node(curly[[1]], type = "start")
+    label_curly_node(curly[[n]], type = "stop")
+    if (n > 2) {
+      set_asis(curly[-c(1, n)])
+    }
+  }
+  label_alt(curly)
+  return(curly)
+}
+
+label_alt <- function(curly) {
+  char <- paste(trimws(xml2::xml_text(curly)), collapse = " ")
+  res <- if (inherits(curly, "xml_node")) curly else curly[[1]]
   alt_fragment <- regmatches(char, gregexpr("alt=['\"].*?['\"]", char))[[1]]
   if (length(alt_fragment) > 0) {
     alt_text <- sub("^alt=", "", alt_fragment)
-    xml2::xml_set_attr(curly, "alt", alt_text)
+    xml2::xml_set_attr(res, "alt", alt_text)
   }
 }
 
-label_curly_nodes <- function(node) {
+label_curly_node <- function(node, type = c("full", "start", "stop")) {
   char <- xml2::xml_text(node)
   # Find the locations of inline chars that is complete
+  pattern <- switch(match.arg(type),
+    full = "\\{.*?\\}",
+    start = "\\{[^}]*?",
+    stop = "[^{]*?\\}",
+  )
   locations <- gregexpr(
-    pattern = "\\{.*?\\}",
+    pattern = pattern,
     char,
     perl = TRUE
   )
   start <- locations[[1]]
-  end   <- start + attr(locations[[1]], "match.len") - 1L
-  return(add_protected_ranges(node, start, end))
-
+  end   <- if (match.arg(type) == "start") {
+    nchar(char)
+  } else {
+    start + attr(locations[[1]], "match.len") - 1L
+  }
+  add_protected_ranges(node, start, end)
+  if (is_asis(node)) {
+    xml2::xml_set_attr(node, "curly", "true")
+  } else {
+    xml2::xml_set_attr(node, "curly",
+      paste(
+        paste(start, collapse = " "),
+        paste(end, collapse = " "),
+        sep = ":"
+      )
+    )
+  }
+  return(node)
 }
 
 #' Protect curly elements for further processing
@@ -74,6 +113,6 @@ label_curly_nodes <- function(node) {
 #' xml2::xml_child(m$body)
 protect_curly <- function(body, ns = md_ns()) {
   curly <- find_curly(body, ns)
-  purrr::walk(curly, digest_curly, ns = ns)
+  purrr::iwalk(curly, digest_curly, ns = ns)
   return(body)
 }
diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index d6a39fc..f33e365 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -7,13 +7,13 @@
       <!DOCTYPE document SYSTEM "CommonMark.dtd">
       <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-16:20">
         <heading sourcepos="2:1-2:33" level="1">
-          <text sourcepos="2:3-2:33" xml:space="preserve" protect.start="9" protect.end="31">preface {#pre-face .unnumbered}</text>
+          <text sourcepos="2:3-2:33" xml:space="preserve" curly-id="1" protect.start="9" protect.end="31" curly="9:31">preface {#pre-face .unnumbered}</text>
         </heading>
         <paragraph sourcepos="4:1-4:5">
           <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
         </paragraph>
         <paragraph sourcepos="6:1-6:51">
-          <text sourcepos="6:1-6:51" xml:space="preserve" protect.start="8 29" protect.end="13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
+          <text sourcepos="6:1-6:51" xml:space="preserve" curly-id="2" protect.start="8 29" protect.end="13 35" curly="8 29:13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
         </paragraph>
         <paragraph sourcepos="8:1-8:110">
           <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
@@ -22,20 +22,20 @@
           <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
             <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
           </image>
-          <text sourcepos="10:52-10:88" xml:space="preserve" asis="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+          <text sourcepos="10:52-10:88" xml:space="preserve" curly-id="3" asis="true" curly="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
         </paragraph>
         <paragraph sourcepos="12:1-13:10">
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
             <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
           </image>
-          <text sourcepos="12:48-12:68" xml:space="preserve" alt="&quot;a picture&#10;of a dog&quot;">{#dog alt="a picture
-      of a dog"}</text>
-          <softbreak/>
+          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture  of a dog&quot;">{#dog alt="a picture</text>
+          <softbreak curly-id="4"/>
+          <text sourcepos="13:1-13:10" xml:space="preserve" curly-id="4" asis="true" curly="true">of a dog"}</text>
         </paragraph>
         <paragraph sourcepos="15:1-16:20">
-          <text sourcepos="15:1-15:47" xml:space="preserve" protect.start="1 24" protect.end="1 24">[a span with attributes]{.span-with-attributes
-      style='color: red;'}</text>
-          <softbreak/>
+          <text sourcepos="15:1-15:47" xml:space="preserve" protect.start="1 24 25" protect.end="1 24 46" curly-id="5" curly="25:46">[a span with attributes]{.span-with-attributes</text>
+          <softbreak curly-id="5"/>
+          <text sourcepos="16:1-16:20" xml:space="preserve" curly-id="5" asis="true" curly="true">style='color: red;'}</text>
         </paragraph>
       </document>
 
diff --git a/tests/testthat/_snaps/attr-nodes/yarn.Rmd b/tests/testthat/_snaps/attr-nodes/yarn.Rmd
index ef2353e..bd8c761 100644
--- a/tests/testthat/_snaps/attr-nodes/yarn.Rmd
+++ b/tests/testthat/_snaps/attr-nodes/yarn.Rmd
@@ -22,9 +22,7 @@ Images that use pandoc style will have curlies with content that should be trans
 ![a pretty puppy](https://placedog.net/200/300){#dog alt="a picture
 of a dog"}
 
-
 [a span with attributes]{.span-with-attributes
 style='color: red;'}
 
 
-
diff --git a/tests/testthat/_snaps/attr-nodes/yarn.md b/tests/testthat/_snaps/attr-nodes/yarn.md
index 972133a..811ef46 100644
--- a/tests/testthat/_snaps/attr-nodes/yarn.md
+++ b/tests/testthat/_snaps/attr-nodes/yarn.md
@@ -15,9 +15,7 @@ Images that use pandoc style will have curlies with content that should be trans
 ![a pretty puppy](https://placedog.net/200/300){#dog alt="a picture
 of a dog"}
 
-
 [a span with attributes]{.span-with-attributes
 style='color: red;'}
 
 
-
diff --git a/tests/testthat/test-node-splits.R b/tests/testthat/test-node-splits.R
index 2ba79b6..17209fc 100644
--- a/tests/testthat/test-node-splits.R
+++ b/tests/testthat/test-node-splits.R
@@ -33,6 +33,8 @@ test_that("splitting and joining protected nodes will work round trip", {
   h3 <- m$head(10)
   expect_equal(h1, h3)
   expect_equal(length(get_protected_nodes(m$body)), protected)
+
+  # CONFIRM ROUND TRIP ----------------------------------------
   expect_equal(as.character(m$body), as.character(orig))
 
 })
@@ -49,7 +51,7 @@ test_that("splitting and joining protected nodes will work round trip with sourc
   # protection gives us protected nodes
   protected <- length(get_protected_nodes(m$body))
   expect_gt(protected, 0)
-  # ---- splitting --------------------------------------------
+  # ---- splitting ---------------------------------------------
   # splitting transforms those nodes into split text nodes
   split_body <- split_protected_nodes(m$body)
   # no protected nodes exist
@@ -74,6 +76,8 @@ test_that("splitting and joining protected nodes will work round trip with sourc
   h3 <- m$head(10)
   expect_equal(h1, h3)
   expect_equal(length(get_protected_nodes(m$body)), protected)
+
+  # CONFIRM ROUND TRIP ----------------------------------------
   expect_equal(object = as.character(m$body), expected = as.character(orig))
 
 })

From 37332f70d05cda3890091801162ba0120a861547 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 11:11:20 -0700
Subject: [PATCH 34/37] update processing of alt text to remove breaks

---
 R/attr-nodes.R                      |  3 +-
 tests/testthat/_snaps/attr-nodes.md | 58 ++++++++++++++++++++++++++++-
 tests/testthat/test-attr-nodes.R    | 22 +++++++++++
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/R/attr-nodes.R b/R/attr-nodes.R
index b6ce752..36d5c5f 100644
--- a/R/attr-nodes.R
+++ b/R/attr-nodes.R
@@ -44,7 +44,8 @@ digest_curly <- function(curly, id, ns) {
 }
 
 label_alt <- function(curly) {
-  char <- paste(trimws(xml2::xml_text(curly)), collapse = " ")
+  char <- trimws(xml2::xml_text(curly))
+  char <- paste(char[char != ""], collapse = " ")
   res <- if (inherits(curly, "xml_node")) curly else curly[[1]]
   alt_fragment <- regmatches(char, gregexpr("alt=['\"].*?['\"]", char))[[1]]
   if (length(alt_fragment) > 0) {
diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index f33e365..5f8e94e 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -28,7 +28,7 @@
           <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
             <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
           </image>
-          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture  of a dog&quot;">{#dog alt="a picture</text>
+          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture of a dog&quot;">{#dog alt="a picture</text>
           <softbreak curly-id="4"/>
           <text sourcepos="13:1-13:10" xml:space="preserve" curly-id="4" asis="true" curly="true">of a dog"}</text>
         </paragraph>
@@ -39,3 +39,59 @@
         </paragraph>
       </document>
 
+# multiline alt text can be processed
+
+    Code
+      cat(as.character(protect_curly(curly$body)))
+    Output
+      <?xml version="1.0" encoding="UTF-8"?>
+      <!DOCTYPE document SYSTEM "CommonMark.dtd">
+      <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-21:64">
+        <heading sourcepos="2:1-2:33" level="1">
+          <text sourcepos="2:3-2:33" xml:space="preserve" curly-id="1" protect.start="9" protect.end="31" curly="9:31">preface {#pre-face .unnumbered}</text>
+        </heading>
+        <paragraph sourcepos="4:1-4:5">
+          <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
+        </paragraph>
+        <paragraph sourcepos="6:1-6:51">
+          <text sourcepos="6:1-6:51" xml:space="preserve" curly-id="2" protect.start="8 29" protect.end="13 35" curly="8 29:13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
+        </paragraph>
+        <paragraph sourcepos="8:1-8:110">
+          <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
+        </paragraph>
+        <paragraph sourcepos="10:1-10:88">
+          <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
+            <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
+          </image>
+          <text sourcepos="10:52-10:88" xml:space="preserve" curly-id="3" asis="true" curly="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+        </paragraph>
+        <paragraph sourcepos="12:1-13:10">
+          <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
+            <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
+          </image>
+          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture of a dog&quot;">{#dog alt="a picture</text>
+          <softbreak curly-id="4"/>
+          <text sourcepos="13:1-13:10" xml:space="preserve" curly-id="4" asis="true" curly="true">of a dog"}</text>
+        </paragraph>
+        <paragraph sourcepos="15:1-19:23">
+          <text sourcepos="15:1-15:47" xml:space="preserve" protect.start="1 24 25" protect.end="1 24 46" curly-id="5" curly="25:46">[a span with attributes]{.span-with-attributes</text>
+          <softbreak curly-id="5"/>
+          <text sourcepos="16:1-16:20" xml:space="preserve" curly-id="5" asis="true" curly="true">style='color: red;'}</text>
+          <softbreak/>
+          <image sourcepos="17:1-17:38" destination="image.png" title="">
+            <text sourcepos="17:3-17:26" xml:space="preserve">image with long alt text</text>
+          </image>
+          <text sourcepos="17:39-17:58" xml:space="preserve" curly-id="6" asis="true" curly="true" alt="'this is long alt text that should be all included in the image'">{#image alt='this is</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="18:1-18:32" xml:space="preserve" curly-id="6" asis="true">long alt text that should be all</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="19:1-19:23" xml:space="preserve" curly-id="6" asis="true" curly="true">included in the image'}</text>
+        </paragraph>
+        <paragraph sourcepos="21:1-21:64">
+          <image sourcepos="21:1-21:37" destination="img.png" title="">
+            <text sourcepos="21:3-21:27" xml:space="preserve">image with short alt text</text>
+          </image>
+          <text sourcepos="21:38-21:64" xml:space="preserve" curly-id="7" asis="true" curly="true" alt="'short alt text'">{#img alt='short alt text'}</text>
+        </paragraph>
+      </document>
+
diff --git a/tests/testthat/test-attr-nodes.R b/tests/testthat/test-attr-nodes.R
index f35a200..201538d 100644
--- a/tests/testthat/test-attr-nodes.R
+++ b/tests/testthat/test-attr-nodes.R
@@ -4,6 +4,28 @@ test_that("protect_curly() works", {
   expect_snapshot(cat(as.character(protect_curly(curly$body))))
 })
 
+
+test_that("multiline alt text can be processed", {
+  pathcurly <- system.file("extdata", "basic-curly.md", package = "tinkr")
+  tmpfile <- withr::local_tempfile()
+  file.copy(pathcurly, tmpfile, overwrite = TRUE)
+  cat(c("![image with long alt text](image.png){#image alt='this is",
+    "long alt text that should be all",
+    "included in the image'}", 
+    "",
+    "![image with short alt text](img.png){#img alt='short alt text'}",
+    ""
+    ),
+    sep = "\n",
+    file = tmpfile,
+    append = TRUE
+  )
+  curly <- yarn$new(tmpfile, sourcepos = TRUE)
+  expect_snapshot(cat(as.character(protect_curly(curly$body))))
+})
+
+
+
 test_that("a curly-protected yarn object can be written back to markdown", {
   md_pathcurly <- system.file("extdata", "basic-curly.md", package = "tinkr")
   rmd_pathcurly <- system.file("extdata", "basic-curly2.Rmd", package = "tinkr")

From bb0032cf5adc99252c6e717a1af2b3dcbfe3bc35 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 11:12:44 -0700
Subject: [PATCH 35/37] fix typo

---
 R/asis-nodes.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/asis-nodes.R b/R/asis-nodes.R
index 8709362..0ab7258 100644
--- a/R/asis-nodes.R
+++ b/R/asis-nodes.R
@@ -250,7 +250,7 @@ protect_block_math <- function(body, ns) {
   bm <- xml2::xml_find_all(bm, ".//descendant-or-self::md:*", ns = ns)
   # if any of the nodes have been set as curly, unset
   xml2::xml_set_attr(bm, "curly", NULL)
-  xml2::xml_set_attr(math, "curly-id", NULL)
+  xml2::xml_set_attr(bm, "curly-id", NULL)
   set_asis(bm)
 }
 

From 4db21899bae6189b6c9c5366ae3024f5ff1860de Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 13:50:02 -0700
Subject: [PATCH 36/37] simplify node joinery

---
 R/node-splits.R | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/R/node-splits.R b/R/node-splits.R
index 0fbf0f9..b76ccd0 100644
--- a/R/node-splits.R
+++ b/R/node-splits.R
@@ -142,30 +142,32 @@ join_split_nodes <- function(body) {
 }
 
 join_text_nodes <- function(nodes) {
-  nodetxt <- xml2::xml_text(nodes)
-  asis_nodes <- is_asis(nodes)
-  # In this nodeset, we need to make sure to relabel the asis nodes 
-  txt <- paste(nodetxt, collapse = "")
+  txt <- paste(xml2::xml_text(nodes), collapse = "")
+  prtct <- protected_ranges_from_fragments(nodes)
   # our new node is the donor for all other nodes
   new_node <- nodes[[1]]
+  xml2::xml_set_attr(new_node, "asis", NULL)
+  add_protected_ranges(new_node, prtct$start, prtct$end)
   xml2::xml_set_text(new_node, txt)
   if (has_sourcepos(new_node)) {
     # restore the sourcepos of the original nodes
     pos <- join_sourcepos(nodes)
     xml2::xml_set_attr(new_node, "sourcepos", pos)
   }
-  # compute the protection from the string lengths
-  n <- cumsum(nchar(nodetxt))
-  start <- n[!asis_nodes] + 1
-  end <- n[asis_nodes]
-  # if the first node is an asis node, th
-  if (isTRUE(asis_nodes[1])) {
-    start <- c(1, n[!asis_nodes] + 1)
-  }
-  start <- start[seq_along(end)]
-  add_protected_ranges(new_node, start, end)
+  # need to remove 'asis' before setting the protected range
   xml2::xml_set_attr(new_node, "split-id", NULL)
-  xml2::xml_set_attr(new_node, "asis", NULL)
   xml2::xml_remove(nodes[-1])
 }
 
+protected_ranges_from_fragments <- function(nodes) {
+  txt <- xml2::xml_text(nodes)
+  asis_nodes <- is_asis(nodes)
+  # number of characters
+  N <- nchar(txt)
+  # ending positions of the characters
+  P <- cumsum(N)
+  start <- P[asis_nodes] - N[asis_nodes] + 1
+  end <- P[asis_nodes]
+  return(list(start = start, end = end))
+}
+

From 37fc979930ac77769aa19755badba7ea69231b02 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 2 May 2024 14:10:02 -0700
Subject: [PATCH 37/37] update tests for curly

---
 tests/testthat/_snaps/attr-nodes.md | 124 +++++++++++++++++++++++++++-
 tests/testthat/test-attr-nodes.R    |  47 ++++++++++-
 2 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/_snaps/attr-nodes.md b/tests/testthat/_snaps/attr-nodes.md
index 5f8e94e..b8e6fbe 100644
--- a/tests/testthat/_snaps/attr-nodes.md
+++ b/tests/testthat/_snaps/attr-nodes.md
@@ -1,7 +1,7 @@
 # protect_curly() works
 
     Code
-      cat(as.character(protect_curly(curly$body)))
+      cat(as.character(protec))
     Output
       <?xml version="1.0" encoding="UTF-8"?>
       <!DOCTYPE document SYSTEM "CommonMark.dtd">
@@ -39,6 +39,128 @@
         </paragraph>
       </document>
 
+# protect_curly() can be reversed
+
+    Code
+      cat(as.character(splitsville))
+    Output
+      <?xml version="1.0" encoding="UTF-8"?>
+      <!DOCTYPE document SYSTEM "CommonMark.dtd">
+      <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-22:64">
+        <heading sourcepos="2:1-2:33" level="1">
+          <text sourcepos="2:3-2:10" xml:space="preserve" curly-id="1" curly="9:31" split-id="1">preface </text>
+          <text sourcepos="2:11-2:33" xml:space="preserve" curly-id="1" curly="9:31" split-id="1" asis="true">{#pre-face .unnumbered}</text>
+        </heading>
+        <paragraph sourcepos="4:1-4:5">
+          <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
+        </paragraph>
+        <paragraph sourcepos="6:1-6:51">
+          <text sourcepos="6:1-6:7" xml:space="preserve" curly-id="2" curly="8 29:13 35" split-id="2">I like </text>
+          <text sourcepos="6:8-6:13" xml:space="preserve" curly-id="2" curly="8 29:13 35" split-id="2" asis="true">{xml2}</text>
+          <text sourcepos="6:14-6:28" xml:space="preserve" curly-id="2" curly="8 29:13 35" split-id="2"> but of course </text>
+          <text sourcepos="6:29-6:35" xml:space="preserve" curly-id="2" curly="8 29:13 35" split-id="2" asis="true">{tinkr}</text>
+          <text sourcepos="6:36-6:51" xml:space="preserve" curly-id="2" curly="8 29:13 35" split-id="2"> is even cooler!</text>
+        </paragraph>
+        <paragraph sourcepos="8:1-8:110">
+          <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
+        </paragraph>
+        <paragraph sourcepos="10:1-10:88">
+          <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
+            <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
+          </image>
+          <text sourcepos="10:52-10:88" xml:space="preserve" curly-id="3" asis="true" curly="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+        </paragraph>
+        <paragraph sourcepos="12:1-13:10">
+          <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
+            <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
+          </image>
+          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture of a dog&quot;">{#dog alt="a picture</text>
+          <softbreak curly-id="4"/>
+          <text sourcepos="13:1-13:10" xml:space="preserve" curly-id="4" asis="true" curly="true">of a dog"}</text>
+        </paragraph>
+        <paragraph sourcepos="15:1-16:20">
+          <text sourcepos="15:1-15:1" xml:space="preserve" curly-id="5" curly="25:46" split-id="3" asis="true">[</text>
+          <text sourcepos="15:2-15:23" xml:space="preserve" curly-id="5" curly="25:46" split-id="3">a span with attributes</text>
+          <text sourcepos="15:24-15:24" xml:space="preserve" curly-id="5" curly="25:46" split-id="3" asis="true">]</text>
+          <text sourcepos="15:25-15:47" xml:space="preserve" curly-id="5" curly="25:46" split-id="3" asis="true">{.span-with-attributes</text>
+          <softbreak curly-id="5"/>
+          <text sourcepos="16:1-16:20" xml:space="preserve" curly-id="5" asis="true" curly="true">style='color: red;'}</text>
+        </paragraph>
+        <paragraph sourcepos="18:1-20:23">
+          <image sourcepos="18:1-18:38" destination="image.png" title="">
+            <text sourcepos="18:3-18:26" xml:space="preserve">image with long alt text</text>
+          </image>
+          <text sourcepos="18:39-18:58" xml:space="preserve" curly-id="6" asis="true" curly="true" alt="'this is long alt text that should be all included in the image'">{#image alt='this is</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="19:1-19:32" xml:space="preserve" curly-id="6" asis="true">long alt text that should be all</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="20:1-20:23" xml:space="preserve" curly-id="6" asis="true" curly="true">included in the image'}</text>
+        </paragraph>
+        <paragraph sourcepos="22:1-22:64">
+          <image sourcepos="22:1-22:37" destination="img.png" title="">
+            <text sourcepos="22:3-22:27" xml:space="preserve">image with short alt text</text>
+          </image>
+          <text sourcepos="22:38-22:64" xml:space="preserve" curly-id="7" asis="true" curly="true" alt="'short alt text'">{#img alt='short alt text'}</text>
+        </paragraph>
+      </document>
+
+---
+
+    Code
+      cat(as.character(joinsville))
+    Output
+      <?xml version="1.0" encoding="UTF-8"?>
+      <!DOCTYPE document SYSTEM "CommonMark.dtd">
+      <document xmlns="http://commonmark.org/xml/1.0" sourcepos="1:1-22:64">
+        <heading sourcepos="2:1-2:33" level="1">
+          <text sourcepos="2:3-2:33" xml:space="preserve" curly-id="1" curly="9:31" protect.start="9" protect.end="31">preface {#pre-face .unnumbered}</text>
+        </heading>
+        <paragraph sourcepos="4:1-4:5">
+          <text sourcepos="4:1-4:5" xml:space="preserve">hello</text>
+        </paragraph>
+        <paragraph sourcepos="6:1-6:51">
+          <text sourcepos="6:1-6:51" xml:space="preserve" curly-id="2" curly="8 29:13 35" protect.start="8 29" protect.end="13 35">I like {xml2} but of course {tinkr} is even cooler!</text>
+        </paragraph>
+        <paragraph sourcepos="8:1-8:110">
+          <text sourcepos="8:1-8:110" xml:space="preserve">Images that use pandoc style will have curlies with content that should be translated and should be protected.</text>
+        </paragraph>
+        <paragraph sourcepos="10:1-10:88">
+          <image sourcepos="10:1-10:51" destination="https://placekitten.com/200/300" title="">
+            <text sourcepos="10:3-10:17" xml:space="preserve">a pretty kitten</text>
+          </image>
+          <text sourcepos="10:52-10:88" xml:space="preserve" curly-id="3" asis="true" curly="true" alt="'a picture of a kitten'">{#kitteh alt='a picture of a kitten'}</text>
+        </paragraph>
+        <paragraph sourcepos="12:1-13:10">
+          <image sourcepos="12:1-12:47" destination="https://placedog.net/200/300" title="">
+            <text sourcepos="12:3-12:16" xml:space="preserve">a pretty puppy</text>
+          </image>
+          <text sourcepos="12:48-12:68" xml:space="preserve" curly-id="4" asis="true" curly="true" alt="&quot;a picture of a dog&quot;">{#dog alt="a picture</text>
+          <softbreak curly-id="4"/>
+          <text sourcepos="13:1-13:10" xml:space="preserve" curly-id="4" asis="true" curly="true">of a dog"}</text>
+        </paragraph>
+        <paragraph sourcepos="15:1-16:20">
+          <text sourcepos="15:1-15:47" xml:space="preserve" curly-id="5" curly="25:46" protect.start="1 24 25" protect.end="1 24 46">[a span with attributes]{.span-with-attributes</text>
+          <softbreak curly-id="5"/>
+          <text sourcepos="16:1-16:20" xml:space="preserve" curly-id="5" asis="true" curly="true">style='color: red;'}</text>
+        </paragraph>
+        <paragraph sourcepos="18:1-20:23">
+          <image sourcepos="18:1-18:38" destination="image.png" title="">
+            <text sourcepos="18:3-18:26" xml:space="preserve">image with long alt text</text>
+          </image>
+          <text sourcepos="18:39-18:58" xml:space="preserve" curly-id="6" asis="true" curly="true" alt="'this is long alt text that should be all included in the image'">{#image alt='this is</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="19:1-19:32" xml:space="preserve" curly-id="6" asis="true">long alt text that should be all</text>
+          <softbreak curly-id="6"/>
+          <text sourcepos="20:1-20:23" xml:space="preserve" curly-id="6" asis="true" curly="true">included in the image'}</text>
+        </paragraph>
+        <paragraph sourcepos="22:1-22:64">
+          <image sourcepos="22:1-22:37" destination="img.png" title="">
+            <text sourcepos="22:3-22:27" xml:space="preserve">image with short alt text</text>
+          </image>
+          <text sourcepos="22:38-22:64" xml:space="preserve" curly-id="7" asis="true" curly="true" alt="'short alt text'">{#img alt='short alt text'}</text>
+        </paragraph>
+      </document>
+
 # multiline alt text can be processed
 
     Code
diff --git a/tests/testthat/test-attr-nodes.R b/tests/testthat/test-attr-nodes.R
index 201538d..b881610 100644
--- a/tests/testthat/test-attr-nodes.R
+++ b/tests/testthat/test-attr-nodes.R
@@ -1,10 +1,55 @@
 test_that("protect_curly() works", {
   pathcurly <- system.file("extdata", "basic-curly.md", package = "tinkr")
   curly <- yarn$new(pathcurly, sourcepos = TRUE)
-  expect_snapshot(cat(as.character(protect_curly(curly$body))))
+  protec <- protect_curly(curly$body)
+  expect_snapshot(cat(as.character(protec)))
+})
+
+
+test_that("protect_curly() can be reversed", {
+  pathcurly <- system.file("extdata", "basic-curly.md", package = "tinkr")
+  tmpfile <- withr::local_tempfile()
+  file.copy(pathcurly, tmpfile, overwrite = TRUE)
+  cat(c(
+    "",
+    "![image with long alt text](image.png){#image alt='this is",
+    "long alt text that should be all",
+    "included in the image'}", 
+    "",
+    "![image with short alt text](img.png){#img alt='short alt text'}",
+    ""
+    ),
+    sep = "\n",
+    file = tmpfile,
+    append = TRUE
+  )
+  curly <- yarn$new(tmpfile, sourcepos = TRUE)
+  curly$protect_curly()
+  orig <- copy_xml(curly$body)
+
+  expect_length(get_protected_nodes(curly$body), 3)
+  expect_length(xml2::xml_find_all(curly$body, ".//node()[@curly]"), 10)
+
+  # when split the protected nodes go away
+  splitsville <- split_protected_nodes(curly$body)
+  expect_snapshot(cat(as.character(splitsville)))
+  expect_length(get_protected_nodes(splitsville), 0)
+
+  joinsville <- join_split_nodes(splitsville)
+  expect_identical(joinsville, splitsville)
+  # joining restores these nodes
+  sprotec <- get_protected_nodes(splitsville)
+  expect_length(sprotec, 3)
+  expect_equal(
+    lapply(sprotec, get_protected_ranges),
+    lapply(get_protected_nodes(orig), get_protected_ranges)
+  )
+  expect_length(xml2::xml_find_all(splitsville, ".//node()[@curly]"), 10)
+  expect_snapshot(cat(as.character(joinsville)))
 })
 
 
+
 test_that("multiline alt text can be processed", {
   pathcurly <- system.file("extdata", "basic-curly.md", package = "tinkr")
   tmpfile <- withr::local_tempfile()