replace inf, cleanup

smucclaw · Oct 7, 2023 · 2b1fe71 · 2b1fe71
1 parent 77b2cac
commit 2b1fe71
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 44 deletions.
diff --git a/lib/haskell/natural4/src/LS/Utils.hs b/lib/haskell/natural4/src/LS/Utils.hs
@@ -9,6 +9,7 @@ module LS.Utils
     runMonoidValidate,
     swallowErrs,
     MonoidValidate,
+    compose,
     (<||>),
     (<&&>)
   )
@@ -22,8 +23,8 @@ import Control.Monad.Validate
   )
 import Data.Coerce (coerce)
 import Data.Either (rights, partitionEithers)
-import Data.Monoid (Ap (Ap))
-import Flow ((|>))
+import Data.Monoid (Ap (Ap), Endo (Endo))
+import Flow ((|>), (.>))
 
 infixl 0 |$>
 
@@ -77,6 +78,10 @@ swallowErrs = mapThenSwallowErrs id
 runMonoidValidate :: MonoidValidate e a -> Either e a
 runMonoidValidate x = x |> coerce |> runValidate 
 
+-- | Function composition via the endomorphism monoid.
+compose :: forall a. [a -> a] -> a -> a
+compose = (coerce :: [a -> a] -> [Endo a]) .> mconcat .> coerce
+
 -- | A simple lifted ('||'), copied from Control.Bool
 (<||>) :: Applicative f => f Bool -> f Bool -> f Bool
 (<||>) = liftA2 (||)

diff --git a/lib/haskell/natural4/src/LS/XPile/CoreL4.hs b/lib/haskell/natural4/src/LS/XPile/CoreL4.hs
@@ -211,6 +211,7 @@ import Prettyprinter.Interpolate (__di, di)
 import Text.Regex.TDFA (AllTextMatches (getAllTextMatches), (=~))
 import Text.XML.HXT.Core qualified as HXT
 import ToDMN.FromL4 (genXMLTreeNoType)
+import LS.Utils (compose)
 
 -- type ExprM a = Either String (Expr a)
 type ExprM ann a = MonoidValidate (Doc ann) (Expr a)
@@ -716,7 +717,7 @@ prettyDefnCs rname cs = do
       replacements =
         [ T.replace (T.pack t) $ T.pack $ show n
         | (t, n) <- zip (nub myterms) x123 ]
-      outstr = chain replacements $ mt2text rhs
+      outstr = compose replacements $ mt2text rhs
       returntype = "Integer"
 
   pure $ if null myterms
@@ -743,9 +744,6 @@ prettyDefnCs rname cs = do
       <> Prettyprinter.line <> commentShow "#" cl
     -- defn aPlusB : Integer -> Integer -> Integer = \x : Integer -> \y : Integer -> x + y
   where
-    -- Function composition via the endomorphism monoid
-    chain :: [a -> a] -> a -> a
-    chain = (coerce :: [a -> a] -> [Endo a]) .> mconcat .> coerce
 
     x123 = [[di|x#{n}|] | (n :: Int) <- [1..]]
 

diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs
@@ -96,55 +96,62 @@ a config file that is kept in sync with the downstream stuff
 (since have to do this kind of replacement in the converse direction when generating justification)
 -}
 replaceTxt :: T.Text -> T.Text
-replaceTxt =
-  replaceClauseNums . replacePeriod .
-  toStrict . replaceWithTrie replacements . fromStrict
+replaceTxt = replacePeriod . replaceTxtPlain
+
+replaceTxtPlain :: T.Text -> T.Text
+replaceTxtPlain = toStrict . replaceWithTrie replacements . fromStrict
+  where
+    replacements = listToTrie $ mconcat [replaceCommaPercent, replaceInf]
+
+    replaceInf =
+      [ Replace inf " inf "
+        | inf <- [" infinity ", " INFINITY ", " INF "]
+      ]
+
+    replaceCommaPercent =
+      [ Replace "," " COMMA",
+        Replace "%" " PERCENT"
+      ]
+      {- ^ it's cleaner not to put a space after `percent`
+        because it's usually something like "100% blah blah" in the encoding
+        So if you add a space after, you end up getting "100 percent  blah blah", which doesn't look as nice.
+        And similarly with `comma`.
+
+        Couldn't figure out quickly how to get doc tests to work for this function, so not bothering with that for now. (TODO)
+        >>> replaceTxt ""
+        ""
+        >>> replaceTxt "100% guarantee"
+        "100 PERCENT guarantee"
+
+        >>> replaceTxt "rocks, stones, and trees"
+        "rocks COMMA stones COMMA and trees"
+      -}
+
+-- LE has no trouble parsing dots that appear in numbers, ie things like
+-- "clause 2.1 applies" is fine.
+-- However, dots used as a full-stop, as in "The car is blue." is not ok
+-- and so that "." needs to be turned into "PERIOD".
+-- Also, references to clause numbers of the form "14.1.3" are not ok and so
+-- must be replaced with "14.1 PERIOD 3".
+replacePeriod :: T.Text -> T.Text
+replacePeriod = replaceClauseNums . replaceFullStop
   where
-    replacements =
-      listToTrie
-        [ Replace "," " COMMA",
-          Replace "%" " PERCENT"
-          {- ^ it's cleaner not to put a space after `percent`
-           because it's usually something like "100% blah blah" in the encoding
-           So if you add a space after, you end up getting "100 percent  blah blah", which doesn't look as nice.
-           And similarly with `comma`.
-
-           Couldn't figure out quickly how to get doc tests to work for this function, so not bothering with that for now. (TODO)
-            >>> replaceTxt ""
-            ""
-
-            >>> replaceTxt ("100.5 * 2" :: T.Text)
-            "100 DOT 5 * 2"
-
-            >>> replaceTxt "100% guarantee"
-            "100 PERCENT guarantee"
-
-            >>> replaceTxt "rocks, stones, and trees"
-            "rocks COMMA stones COMMA and trees"
-          -}
-        ]
-
-    -- LE has no trouble parsing dots that appear in numbers, ie things like
-    -- "clause 2.1 applies" is fine.
-    -- However, dots used as a full-stop, as in "The car is blue." is not ok
-    -- and so that "." needs to be turned into "PERIOD".
-    replacePeriod =
+    replaceFullStop =
       PCRE.gsub
         -- https://stackoverflow.com/a/45616898 
         [PCRE.re|[a-zA-z] + [^0-9\s.]+|\.(?!\d)|]
         (" PERIOD " :: T.Text)
 
-    -- Replace references to clause numbers of the form "14.1.3" with "14.1 PERIOD 3".
     replaceClauseNums =
       PCRE.gsub
         [PCRE.re|(\d+\.\d+)\.(\d+)|]
         \(x:y:_ :: [T.Text]) -> [i|#{x} PERIOD #{y}|] :: T.Text
 
-    -- replaceHyphen =
-    --   PCRE.gsub
-    --     -- https://stackoverflow.com/a/31911114
-    --     [PCRE.re|(?=\S*[-])([a-zA-Z]+)\-([a-zA-Z]+)|]
-    --     \(s0:s1:_) -> mconcat [s0, " HYPHEN ", s1] :: T.Text
+-- replaceHyphen =
+--   PCRE.gsub
+--     -- https://stackoverflow.com/a/31911114
+--     [PCRE.re|(?=\S*[-])([a-zA-Z]+)\-([a-zA-Z]+)|]
+--     \(s0:s1:_) -> mconcat [s0, " HYPHEN ", s1] :: T.Text
 
 {- | Convert a SimplifiedL4 Cell to a VCell
 The code for simplifying L4 AST has established these invariants: