From fb6a7bd10d7b9f821ef4a363130ed28c8082eca3 Mon Sep 17 00:00:00 2001 From: Valentin Robert Date: Wed, 27 Sep 2023 11:41:01 -0700 Subject: [PATCH] implement prototype type inference discovery loop This commit factors out the discovery / type inference code, so that it may be run repeatedly in a loop until we reach a fixed point. At each iteration, the type inference may be able to identify certain addresses as function pointer and figure out their types, which means we could now recover them and explore more code. This is not the final version of the algorithm, but a working minimal version. More care should be taken in the function type reconstruction. --- deps/macaw | 2 +- reopt-explore/LLVM.hs | 2 +- reopt-explore/Residual.hs | 2 +- reopt.cabal | 1 + reopt/Main_reopt.hs | 148 +++----- src/Reopt.hs | 198 ++++++++-- src/Reopt/CFG/FnRep.hs | 51 ++- src/Reopt/CFG/LLVM.hs | 401 ++++++++++++--------- src/Reopt/CFG/Recovery.hs | 16 +- src/Reopt/TypeInference/ConstraintGen.hs | 32 +- src/Reopt/TypeInference/Header.hs | 25 +- src/Reopt/TypeInference/HeaderTypes.hs | 5 + src/Reopt/TypeInference/Solver.hs | 22 ++ src/Reopt/TypeInference/Solver/Finalise.hs | 4 + src/Reopt/TypeInference/Solver/Types.hs | 42 ++- tests/ReoptTests.hs | 2 +- 16 files changed, 606 insertions(+), 347 deletions(-) diff --git a/deps/macaw b/deps/macaw index 21e3b8f4..e05a9db2 160000 --- a/deps/macaw +++ b/deps/macaw @@ -1 +1 @@ -Subproject commit 21e3b8f461ac4eb64287ca696443d5e421420d32 +Subproject commit e05a9db243ce28cc9115d33d490bdd56c3a44dc1 diff --git a/reopt-explore/LLVM.hs b/reopt-explore/LLVM.hs index 746d0ca2..d212a813 100644 --- a/reopt-explore/LLVM.hs +++ b/reopt-explore/LLVM.hs @@ -182,7 +182,7 @@ exploreBinary args opts totalCount (index, fPath) = do recoverLogEvent summaryRef statsRef let annDecl = emptyAnnDeclarations hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs - (os, _, recovOut, constraints) <- + (os, _, recovOut, _, constraints) <- -- (os, _, recMod, constraints, _, logEvents) <- handleEitherWithExit =<< runReoptM logger (recoverX86Elf lOpts opts annDecl unnamedFunPrefix hdrInfo) diff --git a/reopt-explore/Residual.hs b/reopt-explore/Residual.hs index 64dd7a35..5f0cd5ab 100644 --- a/reopt-explore/Residual.hs +++ b/reopt-explore/Residual.hs @@ -151,7 +151,7 @@ performRecovery residualOpts reoptOpts (_idx, fPath) = do >>= either (error . show) return hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs logger <- createLogger reoptOpts fPath - (_os, ds, recovOut, _) <- + (_os, ds, recovOut, _, _) <- handleEitherWithExit =<< runReoptM logger diff --git a/reopt.cabal b/reopt.cabal index 47446664..1137e4c5 100644 --- a/reopt.cabal +++ b/reopt.cabal @@ -158,6 +158,7 @@ executable reopt containers, directory, elf-edit, + extra, filepath, generic-lens, lens, diff --git a/reopt/Main_reopt.hs b/reopt/Main_reopt.hs index fc248b2b..77f567c4 100644 --- a/reopt/Main_reopt.hs +++ b/reopt/Main_reopt.hs @@ -18,44 +18,32 @@ import Data.ElfEdit ( ) import Data.ElfEdit qualified as Elf import Data.Generics.Labels () -import Data.IORef ( - IORef, - modifyIORef', - newIORef, - readIORef, - ) -import Data.List ( - intercalate, - nub, - stripPrefix, - (\\), - ) -import Data.Macaw.Analysis.RegisterUse ( - ppRegisterUseErrorReason, - ruReason, - ) -import Data.Macaw.DebugLogging -import Data.Macaw.Discovery ( - DiscoveryOptions (..), - defaultDiscoveryOptions, - memory, - ppDiscoveryStateBlocks, - ) -import Data.Maybe ( - fromMaybe, - isJust, - isNothing, - ) -import Data.Parameterized.Some (Some (Some)) +import Data.IORef (IORef, modifyIORef', newIORef, readIORef) +import Data.List qualified as List +import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text qualified as T import Data.Version (Version (versionBranch)) import Data.Word (Word64) import GHC.Generics (Generic) import Numeric (readHex) import Options.Applicative -import Paths_reopt (version) import Prettyprinter qualified as PP import Prettyprinter.Render.Text qualified as PP +import System.Exit (exitFailure) +import System.FilePath (splitFileName) +import System.IO qualified as IO +import System.IO.Error ( + ioeGetErrorString, + ioeGetErrorType, + isUserError, + ) +import Text.Printf (printf) + +import Data.Macaw.Analysis.RegisterUse qualified as Macaw +import Data.Macaw.DebugLogging qualified as Macaw +import Data.Macaw.Discovery qualified as Macaw +import Data.Parameterized.Some (Some (Some)) + import Reopt import Reopt.ELFArchInfo (getElfArchInfo) import Reopt.EncodeInvariants ( @@ -77,11 +65,7 @@ import Reopt.Occam ( toOccamManifest, ) import Reopt.Server (runServer) -import Reopt.TypeInference.ConstraintGen ( - ModuleConstraints (mcNamedTypes, mcWarnings), - genModuleConstraints, - showInferredTypes, - ) +import Reopt.TypeInference.ConstraintGen (ModuleConstraints (..), genModuleConstraints) import Reopt.TypeInference.Pretty (ppFunction) import Reopt.Utils.Exit ( checkedReadFile, @@ -96,20 +80,13 @@ import Reopt.X86 ( osLinkName, osPersonality, ) -import System.Exit (exitFailure) -import System.FilePath (splitFileName) -import System.IO qualified as IO -import System.IO.Error ( - ioeGetErrorString, - ioeGetErrorType, - isUserError, - ) -import Text.Printf (printf) + +import Paths_reopt (version) reoptVersion :: String reoptVersion = printf "Reopt binary reoptimizer (reopt) %s" v where - v = intercalate "." $ map (printf "%d") $ versionBranch version + v = List.intercalate "." $ map (printf "%d") $ versionBranch version -- | Write a builder object to a file if defined or standard out if not. writeOutput :: Maybe FilePath -> (IO.Handle -> IO a) -> IO a @@ -125,7 +102,7 @@ unintercalate punct = reverse . go [] "" go acc "" [] = acc go acc thisAcc [] = reverse thisAcc : acc go acc thisAcc str'@(x : xs) - | Just sfx <- stripPrefix punct str' = go (reverse thisAcc : acc) "" sfx + | Just sfx <- List.stripPrefix punct str' = go (reverse thisAcc : acc) "" sfx | otherwise = go acc (x : thisAcc) xs ------------------------------------------------------------------------ @@ -155,7 +132,7 @@ data Action -- | Command line arguments. data Args = Args { reoptAction :: !Action - , debugKeys :: [DebugClass] + , debugKeys :: [Macaw.DebugClass] -- ^ Debug information TODO: See if we can omit this. , outputPath :: !(Maybe FilePath) -- ^ Path to output @@ -199,7 +176,7 @@ data Args = Args -- ^ List of function entry points that we exclude for translation. , loadBaseAddress :: !(Maybe Word64) -- ^ Address to load binary at if relocatable. - , discOpts :: !DiscoveryOptions + , discOpts :: !Macaw.DiscoveryOptions -- ^ Options affecting discovery , unnamedFunPrefix :: !BS.ByteString -- ^ Prefix for unnamed functions identified in code discovery. @@ -359,22 +336,22 @@ llvmVersionP = Just c -> pure c Nothing -> Left $ printf "Unsupported LLVM version %s" s -parseDebugFlags :: [DebugClass] -> String -> Either String [DebugClass] +parseDebugFlags :: [Macaw.DebugClass] -> String -> Either String [Macaw.DebugClass] parseDebugFlags oldKeys cl = case cl of '-' : cl' -> do ks <- getKeys cl' - return (oldKeys \\ ks) + return (oldKeys List.\\ ks) cl' -> do ks <- getKeys cl' - return (nub $ oldKeys ++ ks) + return (List.nub $ oldKeys ++ ks) where - getKeys "all" = Right allDebugKeys - getKeys s = case parseDebugKey s of + getKeys "all" = Right Macaw.allDebugKeys + getKeys s = case Macaw.parseDebugKey s of Nothing -> Left $ "Unknown debug key `" ++ s ++ "'" Just k -> Right [k] -debugKeysP :: Parser [DebugClass] +debugKeysP :: Parser [Macaw.DebugClass] debugKeysP = option (eitherReader validate) $ long "debug" @@ -387,7 +364,7 @@ debugKeysP = ++ "with comma-separated keys. Keys may be preceded by a '-' which " ++ "means disable that key.\n" ++ "Supported keys: all, " - ++ intercalate ", " (map debugKeyName allDebugKeys) + ++ List.intercalate ", " (map Macaw.debugKeyName Macaw.allDebugKeys) ) where validate s = do @@ -624,9 +601,9 @@ arguments = <*> many includeAddrP <*> many excludeAddrP <*> optional loadBaseAddressP - <*> ( DiscoveryOptions + <*> ( Macaw.DiscoveryOptions -- This was never exposed to the CLI - (exploreFunctionSymbols defaultDiscoveryOptions) + (Macaw.exploreFunctionSymbols Macaw.defaultDiscoveryOptions) <$> exploreCodeAddrInMemP <*> logAtAnalyzeFunctionP <*> logAtAnalyzeBlockP @@ -727,7 +704,7 @@ showCFG args elfPath = do initState <- reoptRunInit $ doInit (loadOptions args) hdrInfo ainfo pltFn reoptOpts (_, discState) <- doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts -- Print discovery - pure $ show $ ppDiscoveryStateBlocks discState + pure $ show $ Macaw.ppDiscoveryStateBlocks discState handleEitherWithExit mr -- | Show the constraints generated by the type inference step. @@ -760,7 +737,7 @@ showConstraints args elfPath = do doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState let recMod = recoveredModule recoverX86Output - pure $ genModuleConstraints recMod (memory discState) (traceTypeUnification args) (traceConstraintOrigins args) + pure $ genModuleConstraints recMod (Macaw.memory discState) (traceTypeUnification args) (traceConstraintOrigins args) mc <- handleEitherWithExit mr @@ -803,7 +780,7 @@ collectInvariants ref evt = do let enc = encodeInvariantMsg addr invMap seq enc $ modifyIORef' ref (enc :) ReoptFunStepFailed InvariantInference (FunId addr _mnm) e -> do - let enc = encodeInvariantFailedMsg addr (ppRegisterUseErrorReason (ruReason e)) + let enc = encodeInvariantFailedMsg addr (Macaw.ppRegisterUseErrorReason (Macaw.ruReason e)) seq enc $ modifyIORef' ref (enc :) _ -> do pure () @@ -856,28 +833,33 @@ performReopt args elfPath = do funPrefix :: BSC.ByteString funPrefix = unnamedFunPrefix args - (os, initState) <- reoptX86Init (loadOptions args) rOpts origElf - let symAddrMap = initDiscSymAddrMap initState + (os, symAddrMap, debugTypeMap, discState) <- + reoptPrepareForRecovery (loadOptions args) rOpts hdrAnn funPrefix origElf when (shouldRecover args) $ checkSymbolUnused funPrefix symAddrMap - let ainfo = osArchitectureInfo os - (debugTypeMap, discState) <- doDiscovery hdrAnn origElf ainfo initState rOpts - case cfgExportPath args of Nothing -> pure () Just path -> do reoptWrite CfgFileType path $ \h -> do - PP.hPutDoc h (ppDiscoveryStateBlocks discState) + PP.hPutDoc h (Macaw.ppDiscoveryStateBlocks discState) unless (shouldRecover args) $ reoptEndNow () let sysp = osPersonality os - recoverX86Output <- - doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState - let recMod = recoveredModule recoverX86Output + (_, recoverX86Output, recMod, moduleConstraints) <- + reoptRecoveryLoop symAddrMap rOpts funPrefix sysp debugTypeMap discState + + -- forM_ (recoveredDefs recMod) $ \ f -> do + -- trace "FUNCTION" (pure ()) + -- trace (show (PP.pretty f)) (pure ()) + let relinkerInfo = mergeRelations recoverX86Output + case relinkerInfoExportPath args of + Nothing -> pure () + Just path -> do + reoptWriteByteString RelinkerInfoFileType path (Aeson.encode relinkerInfo) case fnsExportPath args of Nothing -> pure () @@ -893,19 +875,6 @@ performReopt args elfPath = do let buffer = AE.encodingToLazyByteString (AE.list id invariants) reoptWriteByteString AnnotationsFileType path buffer - case relinkerInfoExportPath args of - Nothing -> pure () - Just path -> do - reoptWriteByteString RelinkerInfoFileType path (Aeson.encode relinkerInfo) - - -- Generate constraints - let moduleConstraints = - genModuleConstraints - recMod - (memory discState) - (traceTypeUnification args) - (traceConstraintOrigins args) - -- FIXME: move let prettyDefs = @@ -1053,17 +1022,18 @@ displayConstraintsInformation :: ModuleConstraints arch -> IO () displayConstraintsInformation moduleConstraints = do putStrLn "Warnings" putStrLn (unlines (map ((++) "\t" . show) (mcWarnings moduleConstraints))) - -- putStrLn "Constraints (generated)" - -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcConstraints moduleConstraints))) - -- putStrLn "Constraints (solving)" - -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcTyConstraints moduleConstraints))) - putStrLn "Inferred types" - putStrLn (showInferredTypes moduleConstraints) + +-- putStrLn "Constraints (generated)" +-- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcConstraints moduleConstraints))) +-- putStrLn "Constraints (solving)" +-- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcTyConstraints moduleConstraints))) +-- putStrLn "Inferred types" +-- putStrLn (showInferredTypes moduleConstraints) main' :: IO () main' = do args <- getCommandLineArgs - setDebugKeys (args ^. #debugKeys) + Macaw.setDebugKeys (args ^. #debugKeys) case args ^. #reoptAction of DumpDisassembly file -> dumpDisassembly args file ShowCFG file -> diff --git a/src/Reopt.hs b/src/Reopt.hs index a148b32b..df7f9408 100644 --- a/src/Reopt.hs +++ b/src/Reopt.hs @@ -41,6 +41,7 @@ module Reopt ( InitDiscovery, initDiscSymAddrMap, doDiscovery, + reoptRunDiscovery, -- * Debug info discovery reoptHomeDir, @@ -54,6 +55,8 @@ module Reopt ( Reopt.TypeInference.HeaderTypes.emptyAnnDeclarations, RecoveredModule, recoveredDefs, + reoptPrepareForRecovery, + reoptRecoveryLoop, resolveHeader, updateRecoveredModule, @@ -235,7 +238,9 @@ import Reopt.ArgResolver ( ) import Reopt.CFG.FnRep ( FnArchStmt, - FnValue (FnFunctionEntryValue), + FnBlock (fbStmts), + FnStmt (FnCall), + FnValue (FnCodePointer, FnFunctionEntryValue), FoldFnValue (foldFnValue), Function (fnAddr, fnName), FunctionDecl ( @@ -309,7 +314,7 @@ import Reopt.TypeInference.FunTypeMaps ( ) import Reopt.TypeInference.Header (parseHeader) import Reopt.TypeInference.HeaderTypes ( - AnnDeclarations (funDecls), + AnnDeclarations (..), AnnFunArg (..), AnnFunType (..), AnnType (..), @@ -361,6 +366,9 @@ import Text.LLVM.PP qualified as LPP import Text.PrettyPrint.HughesPJ qualified as HPJ import Text.Printf (printf) +import Control.Monad.Extra (concatForM, concatMapM) +import Data.Macaw.CFG qualified as Macaw +import Debug.Trace import Reopt.ELFArchInfo ( InitDiscM, ProcessPLTEntries, @@ -1717,6 +1725,11 @@ headerTypeMap :: Map (ArchSegmentOff arch) Macaw.NoReturnFunStatus -> ReoptM arch r (FunTypeMaps (Macaw.ArchAddrWidth arch)) headerTypeMap hdrAnn dynDepsTypeMap symAddrMap noretMap = do + -- trace "typeDefs" $ forM_ (Map.assocs (typeDefs hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) + -- trace "funDecls" $ forM_ (Map.assocs (funDecls hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) + globalStepStarted Events.HeaderTypeInference let voidPtrType = PtrAnnType VoidAnnType @@ -2099,6 +2112,8 @@ resolveArgType nm tp0 = addGPReg64 nm TypedefAnnType _ tp -> resolveArgType nm tp + FunPtrAnnType _ret _args -> + addGPReg64 nm -- | This parses the types extracted from header function arguments to the -- machine code registers that the function will expect. @@ -2109,15 +2124,15 @@ argsToRegisters :: V.Vector AnnFunArg -> ArgResolver m () argsToRegisters args = go 0 - where - go :: Int -> ArgResolver m () - go argIx - | argIx >= V.length args = pure () - | otherwise = do - let arg = args V.! argIx - let nm = fromMaybe ("arg" ++ show argIx) (funArgName arg) - resolveArgType nm (funArgType arg) - go (argIx + 1) + where + go :: Int -> ArgResolver m () + go argIx + | argIx >= V.length args = pure () + | otherwise = do + let arg = args V.! argIx + let nm = fromMaybe ("arg" ++ show argIx) (funArgName arg) + resolveArgType nm (funArgType arg) + go (argIx + 1) parseReturnType :: AnnType -> Either ArgResolverError [Some X86RetInfo] parseReturnType tp0 = @@ -2130,6 +2145,7 @@ parseReturnType tp0 = DoubleAnnType -> Right [Some (RetZMM ZMMDouble 0)] PtrAnnType _ -> Right [Some (RetBV64 F.RAX)] TypedefAnnType _ tp -> parseReturnType tp + FunPtrAnnType{} -> Left $ UnsupportedReturnType (ppAnnType tp0) resolveAnnFunType :: Monad m => @@ -2263,7 +2279,7 @@ x86ArgumentAnalysis :: -- | Map from addresses to function name. (MemSegmentOff 64 -> Maybe BSC.ByteString) -> -- | Map from address to the name at that address along with type - (BSC.ByteString -> Maybe X86FunTypeInfo) -> + Map BSC.ByteString (MemSegmentOff 64, X86FunTypeInfo) -> Macaw.DiscoveryState X86_64 -> ReoptM X86_64 @@ -2271,7 +2287,8 @@ x86ArgumentAnalysis :: ( Map (MemSegmentOff 64) X86FunTypeInfo , Map (MemSegmentOff 64) (FunctionArgAnalysisFailure 64) ) -x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do +x86ArgumentAnalysis sysp resolveFunName funTypeMap discState = do + let resolveFunType fnm = snd <$> Map.lookup fnm funTypeMap -- Generate map from symbol names to known type. let mem = Macaw.memory discState -- Compute only those functions whose types are not known. @@ -2292,6 +2309,8 @@ x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do RegState X86Reg (Value X86_64 ids) -> Either String [Some (Value X86_64 ids)] resolveFn callSite callRegs = do + -- trace ("[!!!] Resolving " <> show callSite <> ", regs: " <> show callRegs) $ + -- trace ("FunMap:\n" <> show funTypeMap) $ case x86CallRegs mem resolveFunName resolveFunType callSite callRegs of Left rsn -> Left (ppRegisterUseErrorReason rsn) Right r -> Right (callArgValues r) @@ -2311,6 +2330,12 @@ x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do printf "%s: Could not determine signature at callsite %s:\n %s" (Events.ppFnEntry dnm faddr) (Events.ppSegOff callSite) msg globalStepFinished Events.FunctionArgInference () + -- traceM "Demand set: " + -- forM_ (Map.assocs dems) $ \(off, dem) -> + -- traceM (show off <> " ↦ " <> show dem) + -- let fty = inferFunctionTypeFromDemands dems + -- traceM $ "Inferred function type: " <> show fty + pure (inferFunctionTypeFromDemands dems, summaryFails) data RecoverX86Output = RecoverX86Output @@ -2330,6 +2355,10 @@ doRecoverX86 :: Macaw.DiscoveryState X86_64 -> ReoptM X86_64 r RecoverX86Output doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do + -- trace "Potential fun type map:" $ + -- forM_ (Map.assocs (addrTypeMap debugTypeMap)) $ \ (k, v) -> do + -- trace (show k <> " ↦ " <> show v) (pure ()) + -- Map names to known function types when we have explicit information. let knownFunTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) @@ -2366,8 +2395,17 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do -- Infer registers each function demands. (fDems, summaryFailures) <- do let resolveFunName a = Map.lookup a funNameMap - let resolveFunType fnm = snd <$> Map.lookup fnm knownFunTypeMap - x86ArgumentAnalysis sysp resolveFunName resolveFunType discState + x86ArgumentAnalysis sysp resolveFunName knownFunTypeMap discState + + -- let explored = + -- [ nm + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- ] + + -- trace "Functions explored by Macaw:" $ forM_ explored $ \ nm -> + -- trace (" → " <> show nm) (pure ()) let funTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) @@ -2381,9 +2419,27 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do , tp <- maybeToList $ Map.lookup faddr fDems ] + -- trace "fDems:" $ forM_ (Map.assocs fDems) $ \ (k, v) -> + -- trace (show k <> " ↦ " <> show v) (pure ()) + -- trace "Candidates were:" (pure ()) + -- forM_ + -- [ (faddr, nm) + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- -- , tp <- maybeToList $ Map.lookup faddr fDems + -- ] $ \ (a, nm) -> + -- trace (show a <> " ↦ " <> show nm) (pure ()) + + -- trace "Actual fun type map:" $ + -- forM_ (Map.assocs funTypeMap) $ \ kv -> do + -- trace (show kv) (pure ()) + fnDefsAndLogEvents <- fmap catMaybes $ forM (Macaw.exploredFunctions discState) $ \(Some finfo) -> do + -- trace ("Considering recovering " <> show (Macaw.discoveredFunAddr finfo)) $ do let faddr = Macaw.discoveredFunAddr finfo + let _ = trace ("2: " <> show faddr) () let dnm = Macaw.discoveredFunSymbol finfo let fnId = Events.funId faddr dnm let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap @@ -2574,7 +2630,89 @@ checkSymbolUnused unnamedFunPrefix symAddrMap = do "No symbol in the binary may start with the prefix %d." (BSC.unpack unnamedFunPrefix) --- | Analyze an elf binary to extract information. +-- | Checks whether a given `FnStmt` has a potential code pointer address we +-- want to try and investigate. We intended for such addresses to be identified +-- via type reconstruction, but it turns out that we can just get away with +-- identifying code-pointer-sized values pointing into an executable segment. +-- However, we could double-check with the results of type reconstruction to +-- potentially avoid some spurious pointers. +fnStmtHasCandidate :: + MemWidth (Macaw.ArchAddrWidth arch) => + Monad m => + FnStmt arch -> + m [Macaw.ArchMemAddr arch] +fnStmtHasCandidate (FnCall _fn args _mRet) = do + concatForM args $ \(Some fnValue) -> + case fnValue of + FnCodePointer addr -> return [addr] + _ -> return [] +fnStmtHasCandidate _ = return [] + +-- | Repeatedly perform Macaw recovery and discover new potential function entry +-- points. +reoptRecoveryLoop :: + SymAddrMap 64 -> + ReoptOptions -> + BSC.ByteString -> + SyscallPersonality -> + FunTypeMaps 64 -> + Macaw.DiscoveryState X86_64 -> + ReoptM + X86_64 + r + ( Macaw.DiscoveryState X86_64 + , RecoverX86Output + , RecoveredModule X86_64 + , ModuleConstraints X86_64 + ) +reoptRecoveryLoop symAddrMap rOpts funPrefix sysp debugTypeMap = go + where + go previousDiscState = do + discState <- + reoptRunDiscovery (getAddrSymMap symAddrMap) $ + Macaw.incCompleteDiscovery previousDiscState (roDiscoveryOptions rOpts) + + recoverX86Output <- doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState + let recMod = recoveredModule recoverX86Output + + let moduleConstraints = + genModuleConstraints + recMod + (Macaw.memory discState) + (roTraceUnification rOpts) + (roTraceConstraintOrigins rOpts) + + -- Search for new candidate function entry points + let allBlocks = concatMap fnBlocks (recoveredDefs recMod) + let allStmts = concatMap fbStmts allBlocks + candidateAddresses <- concatMapM fnStmtHasCandidate allStmts + let candidateAddressesAsSegOffs = mapMaybe (asSegmentOff (Macaw.memory discState)) candidateAddresses + -- NOTE: if we mark addresses that have already been tried (even if they + -- have failed), Macaw will not add them to the unexplored frontier, so + -- there is no risk here. + let newDiscState = Macaw.markAddrsAsFunction Macaw.UserRequest candidateAddressesAsSegOffs discState + let unexplored = newDiscState ^. Macaw.unexploredFunctions + + if null unexplored + then traceM "NOLOOP" >> return (newDiscState, recoverX86Output, recMod, moduleConstraints) + else traceM "LOOP" >> go newDiscState + +reoptPrepareForRecovery :: + LoadOptions -> + ReoptOptions -> + AnnDeclarations -> + BSC.ByteString -> + Elf.ElfHeaderInfo 64 -> + ReoptM X86_64 r (X86OS, SymAddrMap 64, FunTypeMaps 64, Macaw.DiscoveryState X86_64) +reoptPrepareForRecovery loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo = do + (os, initState) <- reoptX86Init loadOpts reoptOpts hdrInfo + let symAddrMap = initDiscSymAddrMap initState + checkSymbolUnused unnamedFunPrefix symAddrMap + let ainfo = osArchitectureInfo os + (debugTypeMap, discState) <- doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts + return (os, symAddrMap, debugTypeMap, discState) + +-- | Analyze an ELF binary to extract information. recoverX86Elf :: -- | Option to load the binary at the given address LoadOptions -> @@ -2590,30 +2728,15 @@ recoverX86Elf :: ( X86OS , Macaw.DiscoveryState X86_64 , RecoverX86Output + , RecoveredModule X86_64 , ModuleConstraints X86_64 ) recoverX86Elf loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo = do - (os, initState) <- reoptX86Init loadOpts reoptOpts hdrInfo - let symAddrMap = initDiscSymAddrMap initState - checkSymbolUnused unnamedFunPrefix symAddrMap - - let ainfo = osArchitectureInfo os - (debugTypeMap, discState) <- - doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts - + (os, symAddrMap, debugTypeMap, discState) <- reoptPrepareForRecovery loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo let sysp = osPersonality os - recoverX86Output <- - doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState - - let recMod = recoveredModule recoverX86Output - let constraints = - genModuleConstraints - recMod - (Macaw.memory discState) - (roTraceUnification reoptOpts) - (roTraceConstraintOrigins reoptOpts) - - pure (os, discState, recoverX86Output, constraints) + (finalDiscState, recoverX86Output, recMod, moduleConstraints) <- + reoptRecoveryLoop symAddrMap reoptOpts unnamedFunPrefix sysp debugTypeMap discState + pure (os, finalDiscState, recoverX86Output, recMod, moduleConstraints) -------------------------------------------------------------------------------- -- Compile the LLVM @@ -2725,7 +2848,8 @@ renderLLVMIR llvmGenOpt llvmConfig os recMod constraints = -- Generate LLVM module let archOps = LLVM.x86LLVMArchOps (show os) - (m, ann, ext, logEvents) = moduleForFunctions archOps llvmGenOpt recMod constraints + aInfo = osArchitectureInfo os + (m, ann, ext, logEvents) = moduleForFunctions aInfo archOps llvmGenOpt recMod constraints -- Render into LLVM out = HPJ.fullRender HPJ.PageMode 10000 1 pp mempty (ppLLVM llvmConfig m) in diff --git a/src/Reopt/CFG/FnRep.hs b/src/Reopt/CFG/FnRep.hs index 639dc2a0..49605a6c 100644 --- a/src/Reopt/CFG/FnRep.hs +++ b/src/Reopt/CFG/FnRep.hs @@ -69,7 +69,7 @@ import Data.Macaw.CFG ( ppApp, sexpr, ) -import Data.Macaw.Memory +import Data.Macaw.Memory qualified as Macaw import Data.Macaw.Types ( BVType, BoolType, @@ -234,11 +234,18 @@ data FnValue (arch :: Type) (tp :: M.Type) where -- | Symbol name of this function. BSC.ByteString -> FnValue arch (BVType (ArchAddrWidth arch)) - -- | Value is a function. - -- - -- The int should be in the range @[0..argCount)@, and the type repr - -- is the type of the argument. + -- | Value is a function argument. The int should be in the range + -- @[0..argCount)@, and the type repr is the type of the argument. FnArg :: !Int -> !(TypeRepr tp) -> FnValue arch tp + -- | Value is a constant pointer into the executable segment + FnCodePointer :: + Macaw.MemAddr (ArchAddrWidth arch) -> + FnValue arch (BVType (ArchAddrWidth arch)) + -- | Value is a constant pointer into the executable segment + FnTypedCodePointer :: + Macaw.MemAddr (ArchAddrWidth arch) -> + !(FunctionType arch) -> + FnValue arch (BVType (ArchAddrWidth arch)) ------------------------------------------------------------------------ -- FoldFnValue @@ -260,12 +267,12 @@ class FoldFnValue (v :: Type -> Type) where type FnArchConstraints arch = ( IsArchFn (ArchFn arch) , IsArchStmt (FnArchStmt arch) - , MemWidth (ArchAddrWidth arch) + , Macaw.MemWidth (ArchAddrWidth arch) , HasRepr (ArchFn arch (FnValue arch)) TypeRepr , HasRepr (ArchReg arch) TypeRepr ) -instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where +instance Macaw.MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where pretty (FnUndefined{}) = "undef" pretty (FnConstantBool b) = if b then "true" else "false" pretty (FnConstantValue w i) @@ -276,6 +283,8 @@ instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where pretty (FnReturn var) = PP.pretty var pretty (FnFunctionEntryValue _ n) = PP.pretty (BSC.unpack n) pretty (FnArg i _) = "arg" <> PP.pretty i + pretty (FnCodePointer addr) = "codeptr" <> PP.pretty addr + pretty (FnTypedCodePointer addr _fty) = "typedcodeptr" <> PP.pretty addr instance FnArchConstraints arch => PP.Pretty (FnAssignRhs arch (FnValue arch) tp) where pretty rhs = @@ -289,7 +298,7 @@ instance FnArchConstraints arch => PP.Pretty (FnAssignRhs arch (FnValue arch) tp | i >= 0 -> PP.parens ( "0x" <> PP.pretty (showHex i "") <> " : " <> "bv" - PP.<+> PP.pretty (8 * addrSize (Proxy :: Proxy (ArchAddrWidth arch))) + PP.<+> PP.pretty (8 * Macaw.addrSize (Proxy :: Proxy (ArchAddrWidth arch))) ) | otherwise -> error ("FnAddrWidthConstant given negative value: " ++ show i) @@ -307,13 +316,13 @@ instance FnArchConstraints arch => ShowF (FnAssignment arch) archWidthTypeRepr :: forall p arch. - MemWidth (ArchAddrWidth arch) => + Macaw.MemWidth (ArchAddrWidth arch) => p arch -> TypeRepr (BVType (ArchAddrWidth arch)) -archWidthTypeRepr _ = BVTypeRepr (addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch)))) +archWidthTypeRepr _ = BVTypeRepr (Macaw.addrWidthNatRepr (Macaw.addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch)))) instance - (MemWidth (ArchAddrWidth arch), HasRepr (ArchFn arch f) TypeRepr) => + (Macaw.MemWidth (ArchAddrWidth arch), HasRepr (ArchFn arch f) TypeRepr) => HasRepr (FnAssignRhs arch f) TypeRepr where typeRepr rhs = @@ -336,6 +345,8 @@ instance FnArchConstraints arch => HasRepr (FnValue arch) TypeRepr where FnReturn ret -> frReturnType ret FnFunctionEntryValue{} -> archWidthTypeRepr (Proxy :: Proxy arch) FnArg _ tp -> tp + FnCodePointer{} -> archWidthTypeRepr (Proxy :: Proxy arch) + FnTypedCodePointer{} -> archWidthTypeRepr (Proxy :: Proxy arch) ------------------------------------------------------------------------ -- FnStmt @@ -406,20 +417,20 @@ instance FoldFnValue FnStmt where -- FnBlockLabel -- | A block label -newtype FnBlockLabel w = FnBlockLabel {fnBlockLabelAddr :: MemSegmentOff w} +newtype FnBlockLabel w = FnBlockLabel {fnBlockLabelAddr :: Macaw.MemSegmentOff w} deriving (Eq, Ord) -- | Render block label from segment offset. -fnBlockLabelFromAddr :: MemSegmentOff w -> FnBlockLabel w +fnBlockLabelFromAddr :: Macaw.MemSegmentOff w -> FnBlockLabel w fnBlockLabelFromAddr = FnBlockLabel instance PP.Pretty (FnBlockLabel w) where pretty (FnBlockLabel s) = let - a = segoffAddr s - o = memWordToUnsigned (addrOffset a) + a = Macaw.segoffAddr s + o = Macaw.memWordToUnsigned (Macaw.addrOffset a) in - "block_" <> PP.pretty (addrBase a) <> "_" <> PP.pretty (showHex o "") + "block_" <> PP.pretty (Macaw.addrBase a) <> "_" <> PP.pretty (showHex o "") -- | Render block label as a string fnBlockLabelString :: FnBlockLabel w -> String @@ -439,7 +450,7 @@ data FnJumpTarget arch = FnJumpTarget -- These must match the type of the jump target. } -instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnJumpTarget arch) where +instance Macaw.MemWidth (ArchAddrWidth arch) => PP.Pretty (FnJumpTarget arch) where pretty tgt = PP.pretty (fnJumpLabel tgt) PP.<+> PP.encloseSep PP.lbracket PP.rbracket " " phiVals where phiVals = V.toList $ viewSome PP.pretty <$> fnJumpPhiValues tgt @@ -514,7 +525,7 @@ data FnBlockInvariant arch where -- -- @o@ is typically negative on processors whose stacks grow down. FnStackOff :: - !(MemInt (ArchAddrWidth arch)) -> + !(Macaw.MemInt (ArchAddrWidth arch)) -> !(BoundLoc (ArchReg arch) (BVType (ArchAddrWidth arch))) -> FnBlockInvariant arch @@ -595,7 +606,7 @@ instance FoldFnValue FnBlock where -- This currently isn't the case, as Phi nodes still use `ArchReg` to index the -- nodes. However, this will be changed. data Function arch = Function - { fnAddr :: !(MemSegmentOff (ArchAddrWidth arch)) + { fnAddr :: !(Macaw.MemSegmentOff (ArchAddrWidth arch)) -- ^ The address for this function , fnType :: !(FunctionType arch) -- ^ Type of this function @@ -641,7 +652,7 @@ instance -- | A function declaration that has type information, but no recovered definition. data FunctionDecl arch = FunctionDecl - { funDeclAddr :: !(MemSegmentOff (ArchAddrWidth arch)) + { funDeclAddr :: !(Macaw.MemSegmentOff (ArchAddrWidth arch)) -- ^ Address of function in binary. , funDeclName :: !BSC.ByteString -- ^ Symbol name for function. diff --git a/src/Reopt/CFG/LLVM.hs b/src/Reopt/CFG/LLVM.hs index a633cc76..744937e5 100644 --- a/src/Reopt/CFG/LLVM.hs +++ b/src/Reopt/CFG/LLVM.hs @@ -87,7 +87,7 @@ import GHC.TypeLits #if __GLASGOW_HASKELL__ < 902 import Numeric.Natural (Natural) #endif -import Prettyprinter (pretty, viaShow) +import Prettyprinter qualified as PP import Text.LLVM qualified as L import Text.LLVM.PP qualified as L (ppType) import Text.PrettyPrint.HughesPJ qualified as HPJ @@ -96,7 +96,7 @@ import Text.Printf import Data.Macaw.Analysis.RegisterUse (BoundLoc (..)) import Data.Macaw.CFG import Data.Macaw.Types -import Data.Macaw.X86 (X86BlockPrecond (..), X86Reg (..), X86_64) +import Data.Macaw.X86 (ArchitectureInfo, X86BlockPrecond (..), X86Reg (..), X86_64, archAddrWidth) import Data.Bits (testBit) import Reopt.CFG.FnRep @@ -110,8 +110,10 @@ import Reopt.TypeInference.Solver ( TyVar, tyToLLVMType, pattern FConflictTy, + pattern FFunPtrTy, pattern FNumTy, pattern FPtrTy, + pattern FUnknownFunPtrTy, pattern FUnknownTy, ) import Reopt.VCG.Annotations qualified as Ann @@ -228,10 +230,12 @@ llvmMaskedLoad :: L.Type -> Intrinsic llvmMaskedLoad n tp tpv = do - let vstr = "v" ++ show n ++ tp - mnem = "llvm.masked.load." ++ vstr ++ ".p0" ++ vstr - args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] - in intrinsic mnem (L.Vector n tpv) args + let + vstr = "v" ++ show n ++ tp + mnem = "llvm.masked.load." ++ vstr ++ ".p0" ++ vstr + args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] + in + intrinsic mnem (L.Vector n tpv) args -- | @llvm.masked.store.*@ intrinsic llvmMaskedStore :: @@ -243,10 +247,12 @@ llvmMaskedStore :: L.Type -> Intrinsic llvmMaskedStore n tp tpv = do - let vstr = "v" ++ show n ++ tp - mnem = "llvm.masked.store." ++ vstr ++ ".p0" ++ vstr - args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] - in intrinsic mnem (L.Vector n tpv) args + let + vstr = "v" ++ show n ++ tp + mnem = "llvm.masked.store." ++ vstr ++ ".p0" ++ vstr + args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] + in + intrinsic mnem (L.Vector n tpv) args llvmIntrinsics :: [Intrinsic] llvmIntrinsics = @@ -507,7 +513,8 @@ type LLVMArchConstraints arch = -- -- This information is the same for all blocks within the function. data FunLLVMContext arch = FunLLVMContext - { archFns :: !(LLVMArchSpecificOps arch) + { archInfo :: !(ArchitectureInfo arch) + , archFns :: !(LLVMArchSpecificOps arch) -- ^ Architecture-specific functions , funLLVMGenOptions :: !LLVMGenOptions -- ^ Options for generating LLVM @@ -628,7 +635,7 @@ setAssignIdValue :: setAssignIdValue fid v = do m <- gets bbAssignValMap case Map.lookup fid m of - Just{} -> error $ "internal: Assign id " ++ show (pretty fid) ++ " already assigned." + Just{} -> error $ "internal: Assign id " ++ show (PP.pretty fid) ++ " already assigned." Nothing -> pure () modify' $ \s -> s{bbAssignValMap = Map.insert fid v (bbAssignValMap s)} @@ -646,6 +653,7 @@ valueToLLVM :: FnValue arch tp -> BBLLVM arch (L.Typed L.Value) valueToLLVM ctx avmap val = withArchConstraints ctx $ do + aInfo <- asks archInfo let ptrWidth = addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) case val of -- A value that is actually undefined, like a non-argument register at @@ -659,7 +667,7 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do case Map.lookup lhs avmap of Just v -> pure v Nothing -> - error $ "Could not find assignment value " ++ show (pretty lhs) + error $ "Could not find assignment value " ++ show (PP.pretty lhs) -- Value from a phi node FnPhiValue phiVar -> do case Map.lookup (unFnPhiVar phiVar) avmap of @@ -671,14 +679,14 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do case Map.lookup lhs avmap of Just v -> pure v Nothing -> - error $ "Could not find return variable " ++ show (pretty lhs) - -- The entry pointer to a function. We do the cast as a const - -- expr as function addresses appear as constants in e.g. phi - -- nodes + error $ "Could not find return variable " ++ show (PP.pretty lhs) + -- The entry pointer to a function. We do the cast as a const expr as + -- function addresses appear as constants in e.g. phi nodes FnFunctionEntryValue ftp nm -> do let typ = natReprToLLVMType ptrWidth - let fptr :: L.Typed L.Value - fptr = L.Typed (functionTypeToLLVM ftp) (L.ValSymbol (L.Symbol (BSC.unpack nm))) + let + fptr :: L.Typed L.Value + fptr = L.Typed (functionTypeToLLVM ftp) (L.ValSymbol (L.Symbol (BSC.unpack nm))) logEvent $ LLVMLogEvent "FnFunctionEntryValue" $ LogInfoPtrToInt $ @@ -688,6 +696,39 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do FnArg i _tp | 0 <= i, i < V.length (funArgs ctx) -> pure $ funArgs ctx V.! i | otherwise -> error $ "Illegal argument index " ++ show i + FnCodePointer addr -> do + let + ty = + L.ptrT $ + L.FunTy + L.Opaque -- TODO + [] -- TODO + False + -- FIXME: should use the base and the offset + value = + L.ValConstExpr $ + L.ConstConv + L.IntToPtr + ( L.Typed + (L.PrimType (L.Integer (fromInteger (8 * toInteger (addrWidthReprByteCount (archAddrWidth aInfo)))))) + (L.ValInteger (toInteger (addrOffset addr))) + ) + ty + return $ L.Typed ty value + FnTypedCodePointer addr fty -> do + let + ty = functionTypeToLLVM fty + -- FIXME: should use the base and the offset + value = + L.ValConstExpr $ + L.ConstConv + L.IntToPtr + ( L.Typed + (L.PrimType (L.Integer (fromInteger (8 * toInteger (addrWidthReprByteCount (archAddrWidth aInfo)))))) + (L.ValInteger (toInteger (addrOffset addr))) + ) + ty + return $ L.Typed ty value mkLLVMValue :: HasCallStack => @@ -849,15 +890,16 @@ appToLLVM :: BBLLVM arch (L.Typed L.Value) appToLLVM lhs app = bbArchConstraints $ do let typ = typeToLLVMType $ typeRepr app - let binop :: - (L.Typed L.Value -> L.Value -> BBLLVM arch (L.Typed L.Value)) -> - FnValue arch utp -> - FnValue arch utp -> - BBLLVM arch (L.Typed L.Value) - binop f x y = do - x' <- mkLLVMValue x - y' <- mkLLVMValue y - f x' (L.typedValue y') + let + binop :: + (L.Typed L.Value -> L.Value -> BBLLVM arch (L.Typed L.Value)) -> + FnValue arch utp -> + FnValue arch utp -> + BBLLVM arch (L.Typed L.Value) + binop f x y = do + x' <- mkLLVMValue x + y' <- mkLLVMValue y + f x' (L.typedValue y') case app of Eq x y -> binop (icmpop L.Ieq) x y Mux _tp c t f -> do @@ -876,23 +918,25 @@ appToLLVM lhs app = bbArchConstraints $ do MkTuple fieldTypes fields -> do let structType = L.Struct (toListFC typeToLLVMType fieldTypes) let initUndef = L.Typed structType L.ValUndef - let f :: - forall utp. - FnValue arch utp -> - (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) -> - (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) - f fld c i s = do - llvmFieldValue <- mkLLVMValue fld - s' <- insertValue s llvmFieldValue i - c (i + 1) s' + let + f :: + forall utp. + FnValue arch utp -> + (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) -> + (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) + f fld c i s = do + llvmFieldValue <- mkLLVMValue fld + s' <- insertValue s llvmFieldValue i + c (i + 1) s' foldrFC f (\_ r -> pure r) fields 0 initUndef -- :: !(P.List TypeRepr l) -> !(f (TupleType l)) -> !(P.Index l r) -> App f r TupleField _fieldTypes macawStruct idx -> do -- Make a struct llvmStruct <- mkLLVMValue macawStruct -- Get index as an Int32 - let idxVal :: Integer - idxVal = PL.indexValue idx + let + idxVal :: Integer + idxVal = PL.indexValue idx when (idxVal >= toInteger (maxBound :: Int32)) $ error $ "Index out of range " ++ show idxVal ++ "." @@ -1112,8 +1156,9 @@ llvmGEPFromPtr :: BBLLVM arch (L.Typed L.Value) llvmGEPFromPtr pointeeType ofs ptrV = do let pointerType = L.PtrTo pointeeType - let zeroV = L.Typed (L.iT 32) (L.int 0) - ofsV = L.Typed (L.iT 32) (L.int ofs) + let + zeroV = L.Typed (L.iT 32) (L.int 0) + ofsV = L.Typed (L.iT 32) (L.int ofs) -- https://llvm.org/docs/GetElementPtr.html#what-is-the-first-index-of-the-gep-instruction L.Typed pointerType <$> evalInstr (L.GEP False ptrV [zeroV, ofsV]) @@ -1208,7 +1253,7 @@ pointerForMemOp ctx ptr pointeeType = do getInferredType ptr >>= \case Just FPtrTy{} -> llvmGEPFromPtr pointeeType 0 ptrV Just FConflictTy{} -> llvmAsPtr ctx pointeeType ptrV - t -> error $ "Unexpected type at pointerForMemOp " ++ show (pretty t) + t -> error $ "Unexpected type at pointerForMemOp " ++ show (PP.pretty t) -- | Convert an assignment to a llvm expression rhsToLLVM :: @@ -1289,9 +1334,10 @@ resolveFunctionEntry dest = fromMaybe (error "fnTypes 1") (Map.lookup nm (mcExtFunTypes constraints)) - let resolvetv tv = Map.lookup tv (mcTypeMap constraints) - args = map resolvetv (fttvArgs fty) - retty = fmap resolvetv (fttvRet fty) + let + resolvetv tv = Map.lookup tv (mcTypeMap constraints) + args = map resolvetv (fttvArgs fty) + retty = fmap resolvetv (fttvRet fty) return ( L.Typed (functionTypeToLLVM' dest_ftp args retty) (L.ValSymbol sym) , args @@ -1312,7 +1358,8 @@ stmtToLLVM :: FnStmt arch -> BBLLVM arch () stmtToLLVM stmt = bbArchConstraints $ do - comment (show $ pretty stmt) + -- This prints the FnStmt alongside the LLVM code, for debugging purposes + -- comment (show $ PP.pretty stmt) case stmt of FnComment _ -> return () FnAssignStmt (FnAssignment lhs rhs) -> do @@ -1390,16 +1437,18 @@ coerceForSubtype m_vTy m_tgtTy v = do (FPtrTy{}, _) -> llvmPtrAsBV "coerceForSubtype" v (_, FPtrTy ty) -> llvmAsPtr "coerceForSubtype" (tyToLLVMType ptrWidth ty) v + (FUnknownTy, FFunPtrTy{}) -> pure v -- TODO (val) ? + (FUnknownTy, FUnknownFunPtrTy{}) -> pure v -- TODO (val) ? (t, t') -> do thisLabel <- gets bbThisLabel :: BBLLVM arch L.BlockLabel error $ show $ "Type mismatch at " - <> viaShow thisLabel + <> PP.viaShow thisLabel <> ": " - <> pretty t + <> PP.pretty t <> " and " - <> pretty t' + <> PP.pretty t' mkLLVMSubtypeValue :: forall arch tp. @@ -1414,8 +1463,9 @@ mkLLVMSubtypeValue v m_tgtTy = do addTargetPhiValues :: forall arch. FnJumpTarget arch -> BBLLVM arch () addTargetPhiValues tgt = do thisLabel <- gets bbThisLabel :: BBLLVM arch L.BlockLabel - let tgtLbl :: FnBlockLabel (ArchAddrWidth arch) - tgtLbl = fnJumpLabel tgt + let + tgtLbl :: FnBlockLabel (ArchAddrWidth arch) + tgtLbl = fnJumpLabel tgt tgtPhis <- asks (fromMaybe (error "Missing block") . Map.lookup tgtLbl . funBlockPhis) let doSubtype (Some v) (Some phiv) = @@ -1423,19 +1473,20 @@ addTargetPhiValues tgt = do values <- V.zipWithM doSubtype (fnJumpPhiValues tgt) tgtPhis -- Add this block value to phi assignment - let updateVar :: - Map L.BlockLabel (Int, L.Value) -> - L.Typed L.Value -> - BBLLVM arch (Map L.BlockLabel (Int, L.Value)) - -- updateVar prevVars (Some v@(FnPhiValue phiVar)) = do - -- constraints <- asks moduleConstraints - -- fn <- asks funName - -- let tyV = mcAssignTyVars constraints Map.! fn Map.! unFnPhiVar phiVar - -- let inferredType = mcTypeMap constraints Map.! tyV - -- thisVal <- mkLLVMValue v - -- pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue thisVal) prevVars - updateVar prevVars v = do - pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue v) prevVars + let + updateVar :: + Map L.BlockLabel (Int, L.Value) -> + L.Typed L.Value -> + BBLLVM arch (Map L.BlockLabel (Int, L.Value)) + -- updateVar prevVars (Some v@(FnPhiValue phiVar)) = do + -- constraints <- asks moduleConstraints + -- fn <- asks funName + -- let tyV = mcAssignTyVars constraints Map.! fn Map.! unFnPhiVar phiVar + -- let inferredType = mcTypeMap constraints Map.! tyV + -- thisVal <- mkLLVMValue v + -- pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue thisVal) prevVars + updateVar prevVars v = do + pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue v) prevVars m <- BBLLVM $ use $ funStateLens . funBlockPhiMapLens let curEntries = phiAssignmentForBlock m tgtLbl newEntries <- V.zipWithM updateVar curEntries values @@ -1646,9 +1697,10 @@ callAsm :: [L.Typed L.Value] -> BBLLVM arch (L.Typed L.Value) callAsm attrs resType asmCode asmArgs args = do - let argTypes = L.typedType <$> args - ftp = L.PtrTo (L.FunTy resType argTypes False) - f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs + let + argTypes = L.typedType <$> args + ftp = L.PtrTo (L.FunTy resType argTypes False) + f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs L.Typed resType <$> evalInstr (L.Call False ftp f args) -- | Call some inline assembly that does not return a value. @@ -1665,8 +1717,9 @@ callAsm_ :: callAsm_ attrs asmCode asmArgs args = do let argTypes = L.typedType <$> args let ftp = L.PtrTo (L.FunTy (L.PrimType L.Void) argTypes False) - let f :: L.Value - f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs + let + f :: L.Value + f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs call_ (L.Typed ftp f) args ------------------------------------------------------------------------ @@ -1708,9 +1761,11 @@ mkStackExpr o in Ann.BVSub (Ann.Var Ann.StackHigh) oExpr | o == 0 = Ann.Var Ann.StackHigh | otherwise = - let oExpr :: Ann.Expr Ann.BlockVar - oExpr = Ann.BVDecimal (fromInteger o) 64 - in Ann.BVAdd (Ann.Var Ann.StackHigh) oExpr + let + oExpr :: Ann.Expr Ann.BlockVar + oExpr = Ann.BVDecimal (fromInteger o) 64 + in + Ann.BVAdd (Ann.Var Ann.StackHigh) oExpr newtype BlockAnnGen a = BlockAnnGen (Except String a) deriving (Functor, Applicative, Monad, MonadError String) @@ -1735,8 +1790,9 @@ mkBoundLocExpr (StackOffLoc o tp) = if o < 0 then case tp of BVMemRepr byteCount LittleEndian -> do - let stackExpr = mkStackExpr (toInteger o) - bitCount = 8 * natValue byteCount + let + stackExpr = mkStackExpr (toInteger o) + bitCount = 8 * natValue byteCount pure $! Ann.Var (Ann.MCStack stackExpr bitCount) _ -> throwError $ "Do not support stack references with type " ++ show tp @@ -1749,10 +1805,11 @@ addPhiPrecond b prev0 = case phiFnRepVar b of Some phiVar -> do -- Get expression representing LLVM value. - let phiExpr :: Ann.Expr Ann.BlockVar - phiExpr = Ann.Var (Ann.LLVMVar (Text.pack (phiLLVMIdent b))) - -- Assert pfi expression is equal to each machine location. - vars = fnPhiVarRep phiVar : fnPhiVarLocations phiVar + let + phiExpr :: Ann.Expr Ann.BlockVar + phiExpr = Ann.Var (Ann.LLVMVar (Text.pack (phiLLVMIdent b))) + -- Assert pfi expression is equal to each machine location. + vars = fnPhiVarRep phiVar : fnPhiVarLocations phiVar let fn v prev = seq prev $ do e <- mkBoundLocExpr v let expr = Ann.Eq phiExpr e @@ -1821,6 +1878,7 @@ getBlockAnn fnm blockRes = do defineFunction :: forall arch. (LLVMArchConstraints arch, arch ~ X86_64) => + ArchitectureInfo arch -> -- | Architecture specific operations LLVMArchSpecificOps arch -> -- | Options for generating LLVM @@ -1830,19 +1888,21 @@ defineFunction :: -- | Function to translate Function arch -> LLVMTrans (L.Define, Either String Ann.FunctionAnn) -defineFunction archOps genOpts constraints f = do +defineFunction aInfo archOps genOpts constraints f = do let ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) - let mkInputReg :: (Some TypeRepr, TyVar) -> Int -> L.Typed L.Ident - mkInputReg (Some tp, tyv) i = - case Map.lookup tyv (mcTypeMap constraints) of - Just (FPtrTy pointee) -> L.Typed (L.PtrTo (tyToLLVMType ptrWidth pointee)) (argIdent i) - _ -> L.Typed (typeToLLVMType tp) (argIdent i) + let + mkInputReg :: (Some TypeRepr, TyVar) -> Int -> L.Typed L.Ident + mkInputReg (Some tp, tyv) i = + case Map.lookup tyv (mcTypeMap constraints) of + Just (FPtrTy pointee) -> L.Typed (L.PtrTo (tyToLLVMType ptrWidth pointee)) (argIdent i) + _ -> L.Typed (typeToLLVMType tp) (argIdent i) let fty = fromMaybe (error "fty") (Map.lookup (fnName f) (mcExtFunTypes constraints)) let argsWithTyVars = zip (fnArgTypes (fnType f)) (fttvArgs fty) - let inputArgs :: [L.Typed L.Ident] - inputArgs = zipWith mkInputReg argsWithTyVars [0 ..] + let + inputArgs :: [L.Typed L.Ident] + inputArgs = zipWith mkInputReg argsWithTyVars [0 ..] let fret = fmap (\tv -> Map.lookup tv (mcTypeMap constraints)) (fttvRet fty) @@ -1851,31 +1911,35 @@ defineFunction archOps genOpts constraints f = do id' :: (LLVMArchConstraints arch => a) -> a id' a = a - let ctx :: FunLLVMContext arch - ctx = - FunLLVMContext - { archFns = archOps - , funLLVMGenOptions = genOpts - , funAddr = fnAddr f - , funName = fnName f - , funArgs = V.fromList $ fmap L.ValIdent <$> inputArgs - , funRetType = fret - , funAllocaCount = 0 - , moduleConstraints = constraints - , funBlockPhis = phiMapFromFunction f - , withArchConstraints = id' - } + let + ctx :: FunLLVMContext arch + ctx = + FunLLVMContext + { archInfo = aInfo + , archFns = archOps + , funLLVMGenOptions = genOpts + , funAddr = fnAddr f + , funName = fnName f + , funArgs = V.fromList $ fmap L.ValIdent <$> inputArgs + , funRetType = fret + , funAllocaCount = 0 + , moduleConstraints = constraints + , funBlockPhis = phiMapFromFunction f + , withArchConstraints = id' + } -- Create ordinary blocks m0 <- gets llvmTransIntrinsicMap - let initFunState :: FunState arch - initFunState = - FunState - { nmCounter = 0 - , funIntrinsicMap = m0 - , needSwitchFailLabel = False - , funBlockPhiMap = initResolvePhiMap f - } + let + initFunState :: FunState arch + initFunState = + FunState + { nmCounter = 0 + , funIntrinsicMap = m0 + , needSwitchFailLabel = False + , funBlockPhiMap = initResolvePhiMap f + } + -- trace (show (PP.pretty (fnEntryBlock f))) (pure ()) let (postEntryFunState, entryBlockRes) = addLLVMBlock ctx initFunState (fnEntryBlock f) @@ -1892,11 +1956,13 @@ defineFunction archOps genOpts constraints f = do ++ llvmTransLogEvents s } - let entryLLVMBlock :: L.BasicBlock - entryLLVMBlock = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) entryBlockRes + let + entryLLVMBlock :: L.BasicBlock + entryLLVMBlock = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) entryBlockRes - let blocks :: [L.BasicBlock] - blocks = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) <$> finalBlocks + let + blocks :: [L.BasicBlock] + blocks = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) <$> finalBlocks let finBlocks | needSwitchFailLabel finalFunState = entryLLVMBlock : (blocks ++ [failBlock]) @@ -1916,22 +1982,23 @@ defineFunction archOps genOpts constraints f = do , L.defMetadata = Map.empty , L.defComdat = Nothing } - let funAnn :: Either String Ann.FunctionAnn - funAnn = do - blockAnnEntries <- mapM (getBlockAnn (fnName f)) (V.fromList (entryBlockRes : finalBlocks)) - let finBlockAnnMap - | needSwitchFailLabel finalFunState = - V.snoc blockAnnEntries (switchFailLabel, Ann.UnreachableBlock) - | otherwise = - blockAnnEntries - let blockObjMap = uncurry Ann.blockAnnToJSON <$> finBlockAnnMap - let addr = fromIntegral $ addrOffset $ segoffAddr $ fnAddr f - pure $! - Ann.FunctionAnn - { Ann.llvmFunName = BSC.unpack (fnName f) - , Ann.faStartAddr = addr - , Ann.blocks = blockObjMap - } + let + funAnn :: Either String Ann.FunctionAnn + funAnn = do + blockAnnEntries <- mapM (getBlockAnn (fnName f)) (V.fromList (entryBlockRes : finalBlocks)) + let finBlockAnnMap + | needSwitchFailLabel finalFunState = + V.snoc blockAnnEntries (switchFailLabel, Ann.UnreachableBlock) + | otherwise = + blockAnnEntries + let blockObjMap = uncurry Ann.blockAnnToJSON <$> finBlockAnnMap + let addr = fromIntegral $ addrOffset $ segoffAddr $ fnAddr f + pure $! + Ann.FunctionAnn + { Ann.llvmFunName = BSC.unpack (fnName f) + , Ann.faStartAddr = addr + , Ann.blocks = blockObjMap + } pure (funDef, funAnn) -- | Create function annotation from declaration. @@ -1971,12 +2038,12 @@ declareIntrinsic i = -- behavior. moduleForFunctions :: forall arch. - ( LLVMArchConstraints arch - , Show (FunctionType arch) - , FoldableFC (ArchFn arch) - , FoldableF (FnArchStmt arch) - , arch ~ X86_64 - ) => + arch ~ X86_64 => + FoldableF (FnArchStmt arch) => + FoldableFC (ArchFn arch) => + LLVMArchConstraints arch => + Show (FunctionType arch) => + ArchitectureInfo arch -> -- | Architecture specific functions LLVMArchSpecificOps arch -> -- | Options for generating LLVM @@ -1989,37 +2056,39 @@ moduleForFunctions :: , [Ann.ExternalFunctionAnn] , [LLVMLogEvent] ) -moduleForFunctions archOps genOpts recMod constraints = - let (dynIntrinsics, logEvents, definesAndAnn) = runLLVMTrans $ - forM (recoveredDefs recMod) $ \f -> do - let fId = funId (fnAddr f) (Just (fnName f)) - (d, ma) <- defineFunction archOps genOpts constraints f - pure (d, (fId, ma)) - -- FIXME: this is repeated in a bunch of places - ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) - namedTypes = - [ L.TypeDecl (L.Ident s) (tyToLLVMType ptrWidth ty) - | (s, ty) <- mcNamedTypes constraints - ] - llvmMod = - L.Module - { L.modSourceName = Nothing - , L.modDataLayout = [] - , L.modTypes = namedTypes - , L.modNamedMd = [] - , L.modUnnamedMd = [] - , L.modGlobals = [] - , L.modDeclares = - fmap declareIntrinsic llvmIntrinsics - ++ fmap declareIntrinsic dynIntrinsics - ++ fmap declareFunction (recoveredDecls recMod) - , L.modDefines = fst <$> definesAndAnn - , L.modInlineAsm = [] - , L.modAliases = [] - , L.modComdat = Map.empty - } - annDecls = mkExternalFunctionAnn <$> recoveredDecls recMod - in (llvmMod, snd <$> definesAndAnn, annDecls, logEvents) +moduleForFunctions aInfo archOps genOpts recMod constraints = + let + (dynIntrinsics, logEvents, definesAndAnn) = runLLVMTrans $ + forM (recoveredDefs recMod) $ \f -> do + let fId = funId (fnAddr f) (Just (fnName f)) + (d, ma) <- defineFunction aInfo archOps genOpts constraints f + pure (d, (fId, ma)) + -- FIXME: this is repeated in a bunch of places + ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) + namedTypes = + [ L.TypeDecl (L.Ident s) (tyToLLVMType ptrWidth ty) + | (s, ty) <- mcNamedTypes constraints + ] + llvmMod = + L.Module + { L.modSourceName = Nothing + , L.modDataLayout = [] + , L.modTypes = namedTypes + , L.modNamedMd = [] + , L.modUnnamedMd = [] + , L.modGlobals = [] + , L.modDeclares = + fmap declareIntrinsic llvmIntrinsics + ++ fmap declareIntrinsic dynIntrinsics + ++ fmap declareFunction (recoveredDecls recMod) + , L.modDefines = fst <$> definesAndAnn + , L.modInlineAsm = [] + , L.modAliases = [] + , L.modComdat = Map.empty + } + annDecls = mkExternalFunctionAnn <$> recoveredDecls recMod + in + (llvmMod, snd <$> definesAndAnn, annDecls, logEvents) -- | Returns the type that was inferred for the given value via constraint -- solving, if any. @@ -2034,6 +2103,8 @@ getInferredType (FnReturn (FnReturnVar retVar _)) = getInferredTypeForAssignIdBB getInferredType (FnArg arg _typ) = do fn <- asks funName getInferredFunctionArgType fn arg +getInferredType (FnCodePointer{}) = pure Nothing -- TODO +getInferredType (FnTypedCodePointer _args _ret) = error "TODO" -- TODO getInferredFunctionArgType :: BSC.ByteString -> Int -> BBLLVM arch (Maybe FTy) getInferredFunctionArgType fn arg = do diff --git a/src/Reopt/CFG/Recovery.hs b/src/Reopt/CFG/Recovery.hs index 4b9bb2d4..488f24f7 100644 --- a/src/Reopt/CFG/Recovery.hs +++ b/src/Reopt/CFG/Recovery.hs @@ -89,7 +89,6 @@ import Data.Macaw.Discovery.State import Data.Macaw.Memory.Permissions qualified as Perm import Data.Macaw.Types hiding (Type) import Data.Macaw.Types qualified as M (Type) - import Data.Macaw.X86 ( x86DemandContext, x86_64CallParams, @@ -547,7 +546,8 @@ recoverCValue cv = do RelocatableCValue _w addr | Just addrRef <- asSegmentOff mem addr , Perm.isExecutable (segmentFlags (segoffSegment addrRef)) -> do - throwErrorAt ReoptUnsupportedFnValueTag "Cannot lift code pointers." + pure $ FnCodePointer addr + -- throwErrorAt ReoptUnsupportedFnValueTag "Cannot lift code pointers." | otherwise -> case asAbsoluteAddr addr of Just absAddr -> emitNewAssign (toInteger absAddr) @@ -928,6 +928,7 @@ recoverStmt :: Stmt X86_64 ids -> Recover ids () recoverStmt stmtIdx stmt = do + -- trace ("Recovering " <> show (ppStmt pretty stmt)) (pure ()) case stmt of AssignStmt asgn -> do recoverAssign stmtIdx asgn @@ -955,11 +956,12 @@ recoverStmt stmtIdx stmt = do Comment msg -> do addFnStmt $ FnComment msg ExecArchStmt astmt0 -> do - -- Architecture-specific statements are assumed to always - -- have side effects. + -- Architecture-specific statements are assumed to always have side + -- effects. astmt <- traverseF recoverValue astmt0 addFnStmt (FnArchStmt (X86FnStmt astmt)) - InstructionStart o _ -> do + InstructionStart o asm -> do + addFnStmt $ FnComment asm -- added by val -- Set recovery instruction offset modify $ \s -> s{rsBlockOff = o} ArchState _ _ -> do @@ -1868,8 +1870,7 @@ x86CallRegs mem funNameMap funTypeMap _callSite regs = do Left $ Reason CallTargetNotFunctionEntryPoint (memWordValue (addrOffset faddr)) SymbolValue _ (SymbolRelocation nm _ver) -> do pure nm - _ -> - Left $ Reason IndirectCallTarget () + _ -> Left $ Reason IndirectCallTarget () case funTypeMap nm of Just tp -> x86TranslateCallType mem nm regs tp Nothing -> Left $ Reason UnknownCallTargetArguments nm @@ -1969,7 +1970,6 @@ recoverFunction sysp mem fInfo invMap nm curArgs curRets = do } runFunRecover funCtx $ do let entryBlk = fromJust $ Map.lookup entryAddr (fInfo ^. parsedBlocks) - -- Insert uninitialized register into initial block location map. let insUninit :: diff --git a/src/Reopt/TypeInference/ConstraintGen.hs b/src/Reopt/TypeInference/ConstraintGen.hs index dd5c96e9..2da43f22 100644 --- a/src/Reopt/TypeInference/ConstraintGen.hs +++ b/src/Reopt/TypeInference/ConstraintGen.hs @@ -55,13 +55,9 @@ import Data.Macaw.Types ( typeRepr, ) import Data.Parameterized (FoldableF, FoldableFC) -import Data.Parameterized.NatRepr ( - NatRepr, - intValue, - testEquality, - widthVal, - ) +import Data.Parameterized.NatRepr (NatRepr, intValue, testEquality, widthVal) import Data.Parameterized.Some (Some (Some), viewSome) +import Data.Parameterized.TraversableFC (toListFC) import Reopt.CFG.FnRep ( FnArchConstraints, FnArchStmt, @@ -89,9 +85,10 @@ import Reopt.TypeInference.Solver ( RowVar, SolverM, StructName, - Ty, + Ty (Ty), TyVar, eqTC, + funPtrTy, isNumTC, numTy, ptrAddTC, @@ -99,14 +96,16 @@ import Reopt.TypeInference.Solver ( ptrTC, runSolverM, subTypeTC, + tupleTy, unifyConstraints, - varTy, + varTy, vecTy, ) import Reopt.TypeInference.Solver qualified as S import Reopt.TypeInference.Solver.Constraints ( ConstraintProvenance (..), FnRepProvenance (..), ) +import Reopt.TypeInference.Solver.Types (TyF (..)) -- This algorithm proceeds in stages: -- 1. Give type variables to the arguments to all functions @@ -505,6 +504,16 @@ emitPtr prov pointee pointer = -- ----------------------------------------------------------------------------- -- Core algorithm +macawTypeToReoptTy :: Some TypeRepr -> Ty +macawTypeToReoptTy = viewSome go + where + go :: TypeRepr ty -> Ty + go BoolTypeRepr = numTy 1 + go (BVTypeRepr n) = numTy (fromInteger (intValue n)) + go (FloatTypeRepr _flt) = error "TODO: support float in type inference" + go (TupleTypeRepr s) = tupleTy $ toListFC go s + go (VecTypeRepr w tp) = vecTy (fromInteger (intValue w)) (go tp) + genFnValue :: FnArchConstraints arch => FnValue arch tp -> CGenM CGenBlockContext arch Ty genFnValue v = case v of @@ -516,6 +525,13 @@ genFnValue v = FnReturn frv -> funRetType frv FnFunctionEntryValue{} -> punt FnArg i _ -> argumentType i + FnCodePointer _addr -> pure $ Ty UnknownFunPtrTy + -- NOTE: not sure what to do about varags yet + FnTypedCodePointer _addr fty -> + pure $ + funPtrTy + (map macawTypeToReoptTy (fnArgTypes fty)) + (maybe (error "No return type, investigate...") macawTypeToReoptTy (fnReturnType fty)) -- FIXME (val) type? where punt = do warn "Punting on FnValue" diff --git a/src/Reopt/TypeInference/Header.hs b/src/Reopt/TypeInference/Header.hs index 81eb6915..d2194ac8 100644 --- a/src/Reopt/TypeInference/Header.hs +++ b/src/Reopt/TypeInference/Header.hs @@ -145,15 +145,34 @@ parseStructUnion (C.CStruct tag _mi _mdecl _attrs n) = C.CStructTag -> errorAt n "Struct is not supported." C.CUnionTag -> errorAt n "Union is not supported." --- | Parser derived declarators. +parseTypeDecl :: C.NodeInfo -> C.CDeclarationSpecifier C.NodeInfo -> CParser C.CTypeSpec +parseTypeDecl _ (C.CTypeSpec spec) = return spec +parseTypeDecl n _ = errorAt n "Expected type specification" + +parseTypeDecls :: C.CDeclaration C.NodeInfo -> CParser C.CTypeSpec +parseTypeDecls (C.CDecl [decl] _ n) = parseTypeDecl n decl +-- NOTE (val) So far I'm only seeing singleton lists here, not sure why. +parseTypeDecls (C.CDecl _decls _ n) = errorAt n "Expected single type spec, investigate." +parseTypeDecls (C.CStaticAssert _ _ n) = errorAt n "TODO" + +-- | Parse derived declarators. parseTypeDerivedDecl :: [C.CDerivedDeclarator C.NodeInfo] -> AnnType -> CParser AnnType parseTypeDerivedDecl [] tp = pure tp parseTypeDerivedDecl (C.CPtrDeclr _ _ : rest) tp = do parseTypeDerivedDecl rest $! PtrAnnType tp parseTypeDerivedDecl (C.CArrDeclr _ _ n : _) _tp = do errorAt n "Arrays are not supported." -parseTypeDerivedDecl (C.CFunDeclr _ _ n : _) _tp = do - errorAt n "Function declarations are not supported in this context." +parseTypeDerivedDecl (C.CFunDeclr (Right (decls, False)) _attrs _n : rest) tp = do + typeSpecs <- mapM parseTypeDecls decls + args <- mapM (parseType emptyQualMods) typeSpecs + parseTypeDerivedDecl rest $! FunPtrAnnType tp args + -- errorAt n $ "decls: " ++ show decls ++ "\nattrs: " ++ show attrs ++ "\ntp: " ++ show tp +parseTypeDerivedDecl (C.CFunDeclr (Right (_, True)) _boop n : _) _tp = do + errorAt n "True" +parseTypeDerivedDecl (C.CFunDeclr (Left {}) _boop n : _) _tp = do + errorAt n "Left" +-- parseTypeDerivedDecl (C.CFunDeclr _ _ n : _) _tp = do +-- errorAt n "Function declarations are not supported in this context." parseFullType :: [C.CDeclarationSpecifier C.NodeInfo] -> diff --git a/src/Reopt/TypeInference/HeaderTypes.hs b/src/Reopt/TypeInference/HeaderTypes.hs index d08204b6..d6d5744e 100644 --- a/src/Reopt/TypeInference/HeaderTypes.hs +++ b/src/Reopt/TypeInference/HeaderTypes.hs @@ -11,6 +11,7 @@ module Reopt.TypeInference.HeaderTypes ( ) where import Data.ByteString.Char8 qualified as BSC +import Data.List (intercalate) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Vector qualified as V @@ -33,6 +34,7 @@ data AnnType PtrAnnType !AnnType | -- | A typedef with the name and resolved right hand side. TypedefAnnType !BSC.ByteString !AnnType + | FunPtrAnnType !AnnType ![AnnType] deriving (Eq, Show, Read) -- | Pretty print the header type for the end user. @@ -44,6 +46,7 @@ ppAnnType = \case DoubleAnnType -> "double" PtrAnnType tp -> ppAnnType tp ++ "*" TypedefAnnType nm _ -> BSC.unpack nm + FunPtrAnnType ret args -> ppAnnType ret ++ "(*?)(" ++ intercalate ", " (map ppAnnType args) ++ ")" instance PP.Pretty AnnType where pretty = \case @@ -53,6 +56,8 @@ instance PP.Pretty AnnType where DoubleAnnType -> "double" PtrAnnType tp -> PP.pretty tp <> "*" TypedefAnnType nm _ -> PP.pretty (BSC.unpack nm) + FunPtrAnnType ret args -> + PP.pretty ret PP.<+> PP.parens (PP.hcat (PP.punctuate PP.comma (map PP.pretty args))) -- | Information about function argument with optional name -- information. diff --git a/src/Reopt/TypeInference/Solver.hs b/src/Reopt/TypeInference/Solver.hs index d52b61c2..cec475c0 100644 --- a/src/Reopt/TypeInference/Solver.hs +++ b/src/Reopt/TypeInference/Solver.hs @@ -4,10 +4,13 @@ module Reopt.TypeInference.Solver ( Ty (..), TyVar, RowVar, + funPtrTy, numTy, ptrTy, ptrTy', + tupleTy, varTy, + vecTy, SolverM, runSolverM, eqTC, @@ -28,6 +31,8 @@ module Reopt.TypeInference.Solver ( FTy, pattern FNumTy, pattern FPtrTy, + pattern FFunPtrTy, + pattern FUnknownFunPtrTy, pattern FUnknownTy, pattern FNamedStruct, pattern FStructTy, @@ -96,9 +101,18 @@ ptrTy = Ty . PtrTy ptrTy' :: Ty -> Ty ptrTy' = Ty . PtrTy . singletonFieldMap 0 +tupleTy :: [Ty] -> Ty +tupleTy = Ty . TupleTy + +vecTy :: Int -> Ty -> Ty +vecTy i = Ty . VecTy i + varTy :: TyVar -> Ty varTy = Var +funPtrTy :: [Ty] -> Ty -> Ty +funPtrTy args ret = Ty (FunPtrTy args ret) + -------------------------------------------------------------------------------- -- Compilers from Ty into ITy @@ -114,6 +128,8 @@ compileTy (Ty ty) = PtrTy fm -> do fm' <- traverse nameTy fm PtrTy . RowExprVar <$> freshRowVarFM fm' + UnknownFunPtrTy -> pure UnknownFunPtrTy + FunPtrTy args ret -> FunPtrTy <$> mapM nameTy args <*> nameTy ret ConflictTy n -> pure (ConflictTy n) TupleTy ts -> TupleTy <$> traverse nameTy ts VecTy n ty' -> VecTy n <$> nameTy ty' @@ -374,6 +390,12 @@ pattern FNumTy sz = FTy (NumTy sz) pattern FPtrTy :: FTy -> FTy pattern FPtrTy ty = FTy (PtrTy ty) +pattern FFunPtrTy :: [FTy] -> FTy -> FTy +pattern FFunPtrTy args ret = FTy (FunPtrTy args ret) + +pattern FUnknownFunPtrTy :: FTy +pattern FUnknownFunPtrTy = FTy UnknownFunPtrTy + pattern FUnknownTy :: FTy pattern FUnknownTy = UnknownTy diff --git a/src/Reopt/TypeInference/Solver/Finalise.hs b/src/Reopt/TypeInference/Solver/Finalise.hs index 47cee445..3b49e4e7 100644 --- a/src/Reopt/TypeInference/Solver/Finalise.hs +++ b/src/Reopt/TypeInference/Solver/Finalise.hs @@ -78,6 +78,8 @@ finalizeTypeDefs = do if off == 0 then pure (PtrTy (rowExprVar re')) else PtrTy <$> freshRowVarFM (dropFieldMap off fm) + UnknownFunPtrTy -> pure UnknownFunPtrTy + FunPtrTy args ret -> FunPtrTy <$> mapM lookupTyVarRep args <*> lookupTyVarRep ret NumTy n -> pure (NumTy n) ConflictTy n -> pure (ConflictTy n) TupleTy ts -> TupleTy <$> traverse lookupTyVarRep ts @@ -149,6 +151,8 @@ finaliseTyF (ty, tv, _) r = where norm = \case PtrTy rv -> FTy (PtrTy (Map.findWithDefault (StructTy emptyFieldMap) rv (csRowVars r))) + UnknownFunPtrTy -> FTy UnknownFunPtrTy + FunPtrTy args ret -> FTy (FunPtrTy (map normTy args) (normTy ret)) NumTy n -> FTy (NumTy n) ConflictTy n -> FTy (ConflictTy n) TupleTy ts -> FTy (TupleTy (map normTy ts)) diff --git a/src/Reopt/TypeInference/Solver/Types.hs b/src/Reopt/TypeInference/Solver/Types.hs index 7655a37c..f618c6d7 100644 --- a/src/Reopt/TypeInference/Solver/Types.hs +++ b/src/Reopt/TypeInference/Solver/Types.hs @@ -30,6 +30,10 @@ data TyF rvar f TupleTy [f] | -- | A vector VecTy Int f + | -- | An unknown function pointer type + UnknownFunPtrTy + | -- | A known function pointer type + FunPtrTy [f] f deriving (Eq, Ord, Show, Functor, Foldable, Traversable) -- | An unrolled ITy @@ -69,8 +73,8 @@ recTyByteWidth ptrSz = offsetAfterLast . last where offsetAfterLast (o, ty) = fromIntegral o + tyByteWidth ptrSz ty --- | This shoold only be called on types which can occur within a --- RecTy, i.e., not records. +-- | This should only be called on types which can occur within a RecTy, i.e., +-- not records. tyByteWidth :: Int -> FTy -> Integer tyByteWidth ptrSz UnknownTy = fromIntegral ptrSz `div` 8 tyByteWidth _ptrSz StructTy{} = error "Saw a StructTy in tyByteWidth" @@ -79,6 +83,8 @@ tyByteWidth ptrSz (FTy ty) = case ty of NumTy n -> fromIntegral n `div` 8 PtrTy _ -> fromIntegral ptrSz `div` 8 + UnknownFunPtrTy -> fromIntegral ptrSz `div` 8 + FunPtrTy{} -> fromIntegral ptrSz `div` 8 ConflictTy n -> fromIntegral n `div` 8 TupleTy{} -> error "Saw a TupleTy in tyByteWidth" VecTy{} -> error "Saw a VecTy in tyByteWidth" @@ -98,17 +104,21 @@ recTyToLLVMType ptrSz fields = L.Struct (go 0 fields) -- c.f. typeToLLVMType tyToLLVMType :: Int -> FTy -> L.Type -tyToLLVMType ptrSz UnknownTy = - L.PrimType (L.Integer (fromIntegral ptrSz)) -tyToLLVMType _ptrSz (NamedStruct s) = L.Alias (L.Ident s) -tyToLLVMType ptrSz (StructTy fm) = recTyToLLVMType ptrSz (Map.assocs (getFieldMap fm)) -tyToLLVMType ptrSz (FTy ty) = - case ty of - NumTy n -> L.PrimType (L.Integer (fromIntegral n)) - PtrTy ty' -> L.PtrTo $ tyToLLVMType ptrSz ty' - ConflictTy n -> L.PrimType (L.Integer (fromIntegral n)) - TupleTy ts -> L.Struct (map (tyToLLVMType ptrSz) ts) - VecTy n ty' -> L.Vector (fromIntegral n) (tyToLLVMType ptrSz ty') +tyToLLVMType ptrSz = go + where + go :: FTy -> L.Type + go UnknownTy = L.PrimType (L.Integer (fromIntegral ptrSz)) + go (NamedStruct s) = L.Alias (L.Ident s) + go (StructTy fm) = recTyToLLVMType ptrSz (Map.assocs (getFieldMap fm)) + go (FTy ty) = + case ty of + NumTy n -> L.PrimType (L.Integer (fromIntegral n)) + PtrTy ty' -> L.PtrTo $ tyToLLVMType ptrSz ty' + UnknownFunPtrTy -> L.PtrTo L.Opaque + FunPtrTy args ret -> L.PtrTo $ L.FunTy (go ret) (map go args) False + ConflictTy n -> L.PrimType (L.Integer (fromIntegral n)) + TupleTy ts -> L.Struct (map go ts) + VecTy n ty' -> L.Vector (fromIntegral n) (go ty') -------------------------------------------------------------------------------- -- Instances @@ -124,6 +134,8 @@ instance (PP.Pretty f, PP.Pretty rv) => PP.Pretty (TyF rv f) where pretty = \case NumTy sz -> "i" <> PP.pretty sz PtrTy t -> "ptr " <> PP.pretty t + UnknownFunPtrTy -> "? (???)*" + FunPtrTy args ret -> PP.pretty ret <> " (" <> PP.hcat (PP.punctuate PP.comma (map PP.pretty args)) <> ")*" ConflictTy n -> "![" <> PP.pretty n <> "]" TupleTy ts -> PP.tupled (map PP.pretty ts) VecTy n ty -> "< " <> PP.pretty n <> " x " <> PP.pretty ty <> " >" @@ -150,6 +162,8 @@ instance (FreeTyVars rvar, FreeTyVars f) => FreeTyVars (TyF rvar f) where freeTyVars = \case NumTy _ -> Set.empty PtrTy t -> freeTyVars t + UnknownFunPtrTy -> Set.empty + FunPtrTy args ret -> freeTyVars ret `Set.union` Set.unions (map freeTyVars args) ConflictTy{} -> Set.empty TupleTy ts -> foldMap freeTyVars ts VecTy _ ty -> freeTyVars ty @@ -181,6 +195,8 @@ instance (FreeRowVars r, FreeRowVars f) => FreeRowVars (TyF r f) where freeRowVars = \case NumTy _ -> Set.empty PtrTy t -> freeRowVars t + UnknownFunPtrTy -> Set.empty + FunPtrTy args ret -> freeRowVars ret `Set.union` Set.unions (map freeRowVars args) ConflictTy{} -> Set.empty TupleTy ts -> foldMap freeRowVars ts VecTy _ ty -> freeRowVars ty diff --git a/tests/ReoptTests.hs b/tests/ReoptTests.hs index fd060815..db309c45 100644 --- a/tests/ReoptTests.hs +++ b/tests/ReoptTests.hs @@ -51,7 +51,7 @@ mkTest fp = T.testCase fp $ do mr <- runReoptM logger $ do recoverX86Elf loadOpts reoptOpts hdrAnn "reopt" hdrInfo - (os, discState, recovOut, moduleConstraints) <- either (fail . show) pure mr + (os, discState, recovOut, _, moduleConstraints) <- either (fail . show) pure mr let recMod = recoveredModule recovOut writeFile blocks_path $ show $ ppDiscoveryStateBlocks discState