Skip to content

Commit

Permalink
LE: Don't generate NLA for conditions with cells that end with "'s" (…
Browse files Browse the repository at this point in the history
…but do add a 'a ' prefix for a cell that matches GIVEN var and ends with "'s")
  • Loading branch information
ym-han committed Oct 6, 2023
1 parent 5605b82 commit 77b2cac
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 59 deletions.
9 changes: 7 additions & 2 deletions lib/haskell/natural4/src/LS/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ module LS.Utils
runMonoidValidate,
swallowErrs,
MonoidValidate,
(<||>)
(<||>),
(<&&>)
)
where

Expand Down Expand Up @@ -79,4 +80,8 @@ runMonoidValidate x = x |> coerce |> runValidate
-- | A simple lifted ('||'), copied from Control.Bool
(<||>) :: Applicative f => f Bool -> f Bool -> f Bool
(<||>) = liftA2 (||)
{-# INLINE (<||>) #-}
{-# INLINE (<||>) #-}

(<&&>) :: Applicative f => f Bool -> f Bool -> f Bool
(<&&>) = liftA2 (&&)
{-# INLINE (<&&>) #-}
7 changes: 3 additions & 4 deletions lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenLEHCs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ module LS.XPile.LogicalEnglish.GenLEHCs (leHCFromVarsHC) where
import Data.Text qualified as T
import Data.HashSet qualified as HS
-- import Data.Foldable (toList)
-- import Debug.Trace (trace)
import Data.Coerce (coerce)
-- import Data.String.Interpolate ( i )
import Data.Traversable
Expand Down Expand Up @@ -184,9 +183,9 @@ simplifyVAtomicP = fmap simplifyVCells

simplifyVCells :: VCell -> LEhcCell
simplifyVCells = \case
PredOrNonAposAtom txt -> NotVar txt
AposAtom prefix -> NotVar prefix <> aposSuffix
TempVar tv -> tvar2lecell tv
NonVarOrNonAposAtom txt -> NotVar txt
AposAtom prefix -> NotVar $ prefix <> aposSuffix
TempVar tv -> tvar2lecell tv

tvar2lecell :: TemplateVar -> LEhcCell
tvar2lecell = \case
Expand Down
27 changes: 18 additions & 9 deletions lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenNLAs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import Data.Maybe (catMaybes)
-- import Debug.Trace (trace)
import Data.Coerce (coerce)

import LS.Utils ((<&&>))
import LS.XPile.LogicalEnglish.Types
( -- Common types
BoolPropn(..)
Expand All @@ -53,8 +54,9 @@ import LS.XPile.LogicalEnglish.Types
, VarsRule
, AtomicPWithVars
, VCell(..)
, _TempVar
, _TempVar, _AposAtom
, _EndsInApos
, aposSuffix
)
import LS.XPile.LogicalEnglish.Utils (setInsert)
import Data.String (IsString)
Expand Down Expand Up @@ -149,9 +151,11 @@ regexifyVCells :: NE (Seq VCell) -> Either String Regex
regexifyVCells = makeRegex . textify strdelimitr regexf . fromNonEmpty
where
strdelimitr :: String = " "
escapeTxt = PCRE.escape . T.unpack
regexf = \case
TempVar tvar -> tvar2WordsOrVIregex tvar
Pred nonvartxt -> (PCRE.escape . T.unpack $ nonvartxt)
AposAtom prefix -> escapeTxt $ prefix <> aposSuffix
NonVarOrNonAposAtom txt -> escapeTxt txt


textify :: (Foldable t, Monoid c, SemiSequence (t c), Functor t) => Element (t c) -> (a -> c) -> t a -> c
Expand Down Expand Up @@ -367,25 +371,30 @@ nlasFromBody varsABP =

-- | Keeps only those VCells that we do want to generate an NLA from
keepVCells :: (Foldable f) => f VCell -> Maybe (f VCell)
keepVCells vcells = if wantToGenNLAFromTheseVCells vcells then Just vcells else Nothing
where
wantToGenNLAFromTheseVCells = allOf folded (isn't $ _TempVar % _EndsInApos)
keepVCells vcells = if allOf folded notEndInApos vcells then Just vcells else Nothing
where
notEndInApos = isn't (_TempVar % _EndsInApos) <&&>
isn't _AposAtom

nlaLoneFromVAtomicP :: AtomicPWithVars -> Maybe NLA
nlaLoneFromVAtomicP = \case
ABPatomic vcells -> keepVCells vcells >>= mkNLA
ABPIsOpSuchTt _ _ vcells -> mkNLA vcells
ABPatomic φvcs -> selectivelyMkNLA φvcs
ABPIsOpSuchTt _ _ φvcs -> selectivelyMkNLA φvcs

-- the other cases are accounted for by lib NLAs/templates, or are just built into LE
ABPBaseIs{} -> Nothing
ABPIsIn{} -> Nothing
ABPIsDiffFr{} -> Nothing
ABPIsOpOf{} -> Nothing

where
selectivelyMkNLA φvcells = keepVCells φvcells >>= mkNLA

vcell2NLAtxt :: VCell -> NLATxt
vcell2NLAtxt = \case
TempVar tvar -> tvar2NLAtxt tvar
Pred nonvartxt -> coerce nonvartxt
TempVar tvar -> tvar2NLAtxt tvar
AposAtom prefix -> coerce $ prefix <> aposSuffix
NonVarOrNonAposAtom txt -> coerce txt

tvar2NLAtxt :: TemplateVar -> NLATxt
tvar2NLAtxt = \case
Expand Down
41 changes: 19 additions & 22 deletions lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ import LS.XPile.LogicalEnglish.Types
, AtomicBPropn(..)
, L4AtomicP

-- Intermediate representation types
-- Intermediate representation types, prisms, and constants
, TemplateVar(..)
-- , _TempVar
--, _AposAtom, _NonVarOrNonAposAtom
-- , aposSuffix
, VarsHC(MkVarsFact,
MkVarsRule,
vfhead,
Expand Down Expand Up @@ -83,8 +86,9 @@ idVarsInBody gvars = fmap (postprocAP . idVarsInAP gvars)
-- | Replace text in VCells
replaceTxtVCell :: VCell -> VCell
replaceTxtVCell = \case
tv@(TempVar _) -> tv
Pred txt -> Pred $ replaceTxt txt
tv@(TempVar _) -> tv
apAtm@(AposAtom _) -> apAtm
NonVarOrNonAposAtom txt -> NonVarOrNonAposAtom (replaceTxt txt)

{- |
TODO: Would be better to read in a dictionary of what/how to replace from some config file,
Expand Down Expand Up @@ -152,29 +156,22 @@ In other words, we can convert an arbitrary Cell to a VCell as long as we know t
-}
cell2vcell :: GVarSet -> Cell -> VCell
cell2vcell gvars = \case
MkCellT celltxt ->
if txtIsAGivenVar gvars celltxt
then TempVar (MatchGVar celltxt)
else
let (prefix, isAposV) = isAposVar gvars celltxt
in if isAposV
then TempVar (EndsInApos prefix)
else Pred celltxt
MkCellT celltxt -> celltxt2vcell gvars celltxt
MkCellIsNum numtxt -> TempVar (IsNum numtxt)

celltxt2vcell :: GVarSet -> T.Text -> VCell
celltxt2vcell gvars (T.stripSuffix "'s" -> Just prefix) =
-- NOTE / TODO: this matching on "'s" is a bit brittle cos unicode
if txtIsAGivenVar gvars prefix
then TempVar (EndsInApos prefix)
else AposAtom prefix
celltxt2vcell gvars celltxt
| txtIsAGivenVar gvars celltxt = TempVar (MatchGVar celltxt)
| otherwise = NonVarOrNonAposAtom celltxt

txtIsAGivenVar :: GVarSet -> T.Text -> Bool
txtIsAGivenVar gvars txt = HS.member (coerce txt) gvars

type PrefixAposVar = T.Text
isAposVar :: GVarSet -> T.Text -> (PrefixAposVar, Bool)
isAposVar gvs (T.stripSuffix "'s" -> Just prefix) =
if txtIsAGivenVar gvs prefix
then (prefix, True)
else ("", False)
isAposVar _ _ = ("", False)
-- TODO: this matching on "'s" is a bit brittle cos unicode



-- {- Deprecating this and the next fn b/c the encoding suggests terms other than the args for op of might not just be either MatchGVar or EndsInApos --- they can also be atoms / non-variables
-- -}
Expand All @@ -186,7 +183,7 @@ isAposVar _ _ = ("", False)
-- whichTVar :: T.Text -> TemplateVar
-- whichTVar trm
-- | txtIsAGivenVar gvars trm = MatchGVar trm
-- | isAposVar gvars trm = EndsInApos trm
-- | checkApos gvars trm = EndsInApos trm
-- | otherwise = error "shouldn't be anything else"
-- -- TODO: add a check upfront for this
-- optOfArg :: Cell -> TemplateVar
Expand Down
41 changes: 24 additions & 17 deletions lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ module LS.XPile.LogicalEnglish.Types (
, pattern MkIsDiffFr
, pattern MkIsIn

-- Intermediate representation types
-- Intermediate representation types, prisms, and consts
, TemplateVar(..)
, _MatchGVar, _EndsInApos, _IsNum
, aposSuffix
, OrigVarPrefix
, OrigVarSeq
, VarsHC(MkVarsFact,
Expand All @@ -50,7 +51,7 @@ module LS.XPile.LogicalEnglish.Types (
, VarsRule
, AtomicPWithVars
, VCell(..)
, _TempVar, _Pred
, _TempVar, _NonVarOrNonAposAtom, _AposAtom

-- LE-related types
, LEhcCell(..)
Expand Down Expand Up @@ -260,6 +261,27 @@ from https://hackage.haskell.org/package/hashable-generics-1.1.7/docs/Data-Hasha

type OrigVarSeq = [TemplateVar] -- TODO: Look into replacing [] with a more general Sequence type?

aposSuffix :: T.Text
aposSuffix = "'s"

{-| This is best understood in the context of the other VarsX data types -}
data VCell = TempVar TemplateVar
| AposAtom !OrigVarPrefix
| NonVarOrNonAposAtom !T.Text
deriving stock (Eq, Ord, Show)
deriving (Generic, Hashable)
makePrisms ''VCell

{- | This might seem a bit confusing, because now there can be template variables both within a VCell and outside of it (e.g., if it's a ABPIsOpSuchTt).
But I wanted to retain information about what the original variant of AtomicBPropn was for p printing afterwards.
Also, it's helpful to have tt info for generating NLAs,
since the only time we need to generate an NLA is when we have a `baseprop` / `VCell` --- we don't need to do tt for, e.g., ABPIsDiffFr and ABPIsOpOf.
TODO: add more comments / references to the relevant code
-}
type AtomicPWithVars = AtomicBPropn VCell


{-| Intermediate representation from which we can generate either LE natl lang annotations or LE rules.
Things to note / think about:
Expand Down Expand Up @@ -291,21 +313,6 @@ pattern MkVarsRule{vrhead, vrbody}
, rbody = vrbody})
{-# COMPLETE MkVarsFact, MkVarsRule #-}

{- | This might seem a bit confusing, because now there can be template variables both within a VCell and outside of it (e.g., if it's a ABPIsOpSuchTt).
But I wanted to retain information about what the original variant of AtomicBPropn was for p printing afterwards.
Also, it's helpful to have tt info for generating NLAs,
since the only time we need to generate an NLA is when we have a `baseprop` / `VCell` --- we don't need to do tt for, e.g., ABPIsDiffFr and ABPIsOpOf.
TODO: add more comments / references to the relevant code
-}
type AtomicPWithVars = AtomicBPropn VCell

{-| This is best understood in the context of the other VarsX data types -}
data VCell = TempVar TemplateVar
| Pred !T.Text
deriving stock (Eq, Ord, Show)
deriving (Generic, Hashable)
makePrisms ''VCell

{-------------------------------------------------------------------------------
LE data types
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
the target language is: prolog.

the templates are:
not gvar's one two three four five six is val,

*a number* <= *a number*,
*a date* is before *a date*,
*a date* is after *a date*,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
GIVEN,bleh,IS A,Blehy
,buh,IS A,Blehy
DECIDE,not gvar's,one two three four five six,is,val,
// not gvar and a lot of words in field pos, so would expect this to generate an NLA, even tho there's an apos
IF,bleh's,one two three four five six,is,val,
// gvar, so the second one shouldn't generate NLA even tho there's also more than the max num of words in field pos
IF,bleh's,one two three four five six,is,val,
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
the target language is: prolog.

the templates are:
not gvar's one two three four five six is val,

*a number* <= *a number*,
*a date* is before *a date*,
*a date* is after *a date*,
Expand Down

0 comments on commit 77b2cac

Please sign in to comment.