diff --git a/fourmolu.yaml b/fourmolu.yaml index b85cb99e..3c07ba91 100644 --- a/fourmolu.yaml +++ b/fourmolu.yaml @@ -1,3 +1,10 @@ +comma-style: leading +function-arrows: trailing haddock-style: single-line +import-export-style: diff-friendly indentation: 2 +in-style: right-align +let-style: mixed +record-brace-space: false +single-constraint-parens: never unicode: never diff --git a/reopt-explore/Common.hs b/reopt-explore/Common.hs index aa23631e..c90730e6 100644 --- a/reopt-explore/Common.hs +++ b/reopt-explore/Common.hs @@ -1,10 +1,9 @@ - module Common where -import Control.Monad (foldM) -import Data.IORef (IORef, modifyIORef', newIORef, readIORef) +import Control.Monad (foldM) +import Data.IORef (IORef, modifyIORef', newIORef, readIORef) -import Reopt.Utils.Dir (withElfExeFilesInDir) +import Reopt.Utils.Dir (withElfExeFilesInDir) findAllElfFilesInDirs :: [FilePath] -> @@ -13,8 +12,9 @@ findAllElfFilesInDirs paths = do counter <- newIORef 1 files <- foldM (withElfExeFilesInDir (recordFile counter)) [] paths pure $ reverse files - where recordFile :: IORef Int -> [(Int, FilePath)] -> FilePath -> IO [(Int, FilePath)] - recordFile counter ps p = do - index <- readIORef counter - modifyIORef' counter (+ 1) - pure $ (index, p):ps + where + recordFile :: IORef Int -> [(Int, FilePath)] -> FilePath -> IO [(Int, FilePath)] + recordFile counter ps p = do + index <- readIORef counter + modifyIORef' counter (+ 1) + pure $ (index, p) : ps diff --git a/reopt-explore/Residual.hs b/reopt-explore/Residual.hs index 788e73a9..fa75462c 100644 --- a/reopt-explore/Residual.hs +++ b/reopt-explore/Residual.hs @@ -1,81 +1,98 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} module Residual (runResidual) where -import Control.Monad (forM_) -import qualified Data.ByteString.Char8 as BSC -import Data.IORef (newIORef) -import qualified Data.Map as Map -import qualified Data.Vector as Vec -import Data.Word (Word64) -import Numeric (showHex) - -import Data.Macaw.Discovery (DiscoveryState (memory)) -import Data.Macaw.Memory (MemChunk (ByteRegion), - MemSegment (segmentFlags, segmentOffset), - MemWord (memWordValue), Memory, - forcedTakeMemChunks, - memSegments, memWord, - resolveAbsoluteAddr, - segmentSize, - segoffAsAbsoluteAddr, - segoffContentsAfter, - segoffOffset, segoffSegment) -import qualified Data.Macaw.Memory.Permissions as Perm - -import Reopt (LoadOptions (LoadOptions), - RecoverX86Output (recoveredModule, summaryFailures), - ReoptOptions, X86_64, - loadOffset, - parseElfHeaderInfo64, - recoverX86Elf, resolveHeader, - roVerboseMode, runReoptM) -import Reopt.CFG.FnRep (FnBlock, RecoveredModule, - fbLabel, fbSize, - fnBlockLabelAddr, fnBlocks, - recoveredDefs) -import Reopt.Events (ReoptLogEvent, initReoptSummary, - joinLogEvents, printLogEvent, - recoverLogEvent) -import Reopt.Utils.Exit (checkedReadFile, - handleEitherStringWithExit, - handleEitherWithExit) - -import CommandLine (Options, - ResidualOptions (roClangPath, roHeader, roOutputForSpreadsheet, roPaths)) -import Common (findAllElfFilesInDirs) -import Data.Either (fromRight) -import Data.ElfEdit (ElfHeaderInfo, - Shdr (shdrAddr, shdrName, shdrSize), - Symtab (symtabEntries), - SymtabEntry (steName, steValue), - decodeHeaderSymtab, - headerNamedShdrs) -import Data.List (find, partition) -import Data.Maybe (isJust, mapMaybe) -import Flexdis86 (DisassembledAddr (disInstruction, disOffset), - disassembleBuffer, - ppInstruction) -import qualified Prettyprinter as PP -import Prettyprinter.Render.String (renderString) -import Residual.Recognizers (ResidualExplanation (BecauseFailure), - classifyInstrs, - ppResidualExplanation) -import Text.Printf (printf) - -newtype InclusiveRange w = InclusiveRange { getInclusiveRange :: (w, w) } +import Control.Monad (forM_) +import Data.ByteString.Char8 qualified as BSC +import Data.IORef (newIORef) +import Data.Map qualified as Map +import Data.Vector qualified as Vec +import Data.Word (Word64) +import Numeric (showHex) + +import Data.Macaw.Discovery (DiscoveryState (memory)) +import Data.Macaw.Memory (MemChunk, Memory) +import Data.Macaw.Memory qualified as Mem +import Data.Macaw.Memory.Permissions qualified as Perm + +import Reopt ( + LoadOptions (LoadOptions), + RecoverX86Output (recoveredModule, summaryFailures), + ReoptOptions, + X86_64, + loadOffset, + parseElfHeaderInfo64, + recoverX86Elf, + resolveHeader, + roVerboseMode, + runReoptM, + ) +import Reopt.CFG.FnRep ( + FnBlock, + RecoveredModule, + fbLabel, + fbSize, + fnBlockLabelAddr, + fnBlocks, + recoveredDefs, + ) +import Reopt.Events ( + ReoptLogEvent, + initReoptSummary, + joinLogEvents, + printLogEvent, + recoverLogEvent, + ) +import Reopt.Utils.Exit ( + checkedReadFile, + handleEitherStringWithExit, + handleEitherWithExit, + ) + +import CommandLine ( + Options, + ResidualOptions (roClangPath, roHeader, roOutputForSpreadsheet, roPaths), + ) +import Common (findAllElfFilesInDirs) +import Data.Either (fromRight) +import Data.ElfEdit ( + ElfHeaderInfo, + Shdr (shdrAddr, shdrName, shdrSize), + Symtab (symtabEntries), + SymtabEntry (steName, steValue), + decodeHeaderSymtab, + headerNamedShdrs, + ) +import Data.List (find, partition) +import Data.Maybe (isJust, mapMaybe) +import Flexdis86 ( + DisassembledAddr (disInstruction, disOffset), + disassembleBuffer, + ppInstruction, + ) +import Prettyprinter qualified as PP +import Prettyprinter.Render.String (renderString) +import Residual.Recognizers ( + ResidualExplanation (BecauseFailure), + classifyInstrs, + ppResidualExplanation, + ) +import Text.Printf (printf) + +newtype InclusiveRange w = InclusiveRange {getInclusiveRange :: (w, w)} instance (Integral w, Num w, Show w) => Show (InclusiveRange w) where show r = "0x" <> showHex (rangeLowerBound r) "" <> " - 0x" <> showHex (rangeUpperBound r) "" - -- sometimes nice when debugging: - -- <> " (size: " <> show (rangeSize r) <> "B)" + +-- sometimes nice when debugging: +-- <> " (size: " <> show (rangeSize r) <> "B)" rangeSize :: Num a => InclusiveRange a -> a rangeSize (getInclusiveRange -> (lo, hi)) = hi - lo + 1 @@ -94,10 +111,10 @@ ppInclusiveRange (getInclusiveRange -> (lo, hi)) = showHex lo "" <> " - " <> sho shdrInclusiveRange :: (Eq w, Ord w, Num w) => Shdr nm w -> Maybe (InclusiveRange w) shdrInclusiveRange s = - let addr = shdrAddr s in - if addr == 0 || shdrSize s == 0 - then Nothing - else Just $ InclusiveRange (shdrAddr s, shdrAddr s + shdrSize s - 1) + let addr = shdrAddr s + in if addr == 0 || shdrSize s == 0 + then Nothing + else Just $ InclusiveRange (shdrAddr s, shdrAddr s + shdrSize s - 1) inInclusiveRange :: (Num w, Ord w, Show w) => InclusiveRange w -> w -> Bool inInclusiveRange r v = rangeLowerBound r <= v && v <= rangeUpperBound r @@ -105,7 +122,7 @@ inInclusiveRange r v = rangeLowerBound r <= v && v <= rangeUpperBound r type RangedShdr nm w = (InclusiveRange w, Shdr nm w) rangedShdr :: (Num w, Ord w) => Shdr nm w -> Maybe (RangedShdr nm w) -rangedShdr s = (, s) <$> shdrInclusiveRange s +rangedShdr s = (,s) <$> shdrInclusiveRange s createLogger :: ReoptOptions -> FilePath -> IO (ReoptLogEvent arch -> IO ()) createLogger reoptOpts filePath = do @@ -122,21 +139,23 @@ performRecovery :: (a, FilePath) -> IO (ElfHeaderInfo 64, DiscoveryState X86_64, RecoverX86Output) performRecovery residualOpts reoptOpts (_idx, fPath) = do - let lOpts = LoadOptions {loadOffset = Nothing} + let lOpts = LoadOptions{loadOffset = Nothing} let unnamedFunPrefix = BSC.pack "reopt" -- hPutStrLn stderr $ "[" ++ (show index) ++ " of " ++ (show totalCount) -- ++ "] Analyzing " ++ fPath ++ " ..." bs <- checkedReadFile fPath annDecl <- - runReoptM printLogEvent - (resolveHeader (roHeader residualOpts) (roClangPath residualOpts)) >>= - either (error . show) return + runReoptM + printLogEvent + (resolveHeader (roHeader residualOpts) (roClangPath residualOpts)) + >>= either (error . show) return hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs logger <- createLogger reoptOpts fPath (_os, ds, recovOut, _) <- - handleEitherWithExit =<< - runReoptM logger - (recoverX86Elf lOpts reoptOpts annDecl unnamedFunPrefix hdrInfo) + handleEitherWithExit + =<< runReoptM + logger + (recoverX86Elf lOpts reoptOpts annDecl unnamedFunPrefix hdrInfo) return (hdrInfo, ds, recovOut) data PartitionedSegments = PartitionedSegments @@ -159,13 +178,13 @@ computeResidualSegments discoveryState recoveredModule = do } let blocks = concatMap fnBlocks (recoveredDefs recoveredModule) let blockSeg (b :: FnBlock X86_64) = - let addr = fnBlockLabelAddr (fbLabel b) in - case segoffAsAbsoluteAddr addr of - Nothing -> - inclusiveRangeFromBaseAndSize - (memWordValue (segmentOffset (segoffSegment addr)) + memWordValue (segoffOffset addr)) - (fbSize b) - Just w -> inclusiveRangeFromBaseAndSize (memWordValue w) (fbSize b) + let addr = fnBlockLabelAddr (fbLabel b) + in case Mem.segoffAsAbsoluteAddr addr of + Nothing -> + inclusiveRangeFromBaseAndSize + (Mem.memWordValue (Mem.segmentOffset (Mem.segoffSegment addr)) + Mem.memWordValue (Mem.segoffOffset addr)) + (fbSize b) + Just w -> inclusiveRangeFromBaseAndSize (Mem.memWordValue w) (fbSize b) let blockSegs = map blockSeg blocks return $ foldl registerAsBlockSegment allMemorySegments blockSegs @@ -186,7 +205,7 @@ displayResiduals residualOpts parts residualInfos = do runResidual :: Options -> ResidualOptions -> ReoptOptions -> IO () runResidual _opts residualOpts reoptOpts = do files <- findAllElfFilesInDirs (roPaths residualOpts) - forM_ files $ \ file -> do + forM_ files $ \file -> do (hdrInfo, discoveryState, recovOut) <- performRecovery residualOpts reoptOpts file let mem = memory discoveryState partitionedSegs <- computeResidualSegments discoveryState $ recoveredModule recovOut @@ -198,13 +217,13 @@ segmentsFootprint = sum . map (uncurry subtract . getInclusiveRange) memoryToSegmentList :: Memory 64 -> [InclusiveRange Word64] memoryToSegmentList m = map segBounds esegs - where - esegs = filter (Perm.isExecutable . segmentFlags) (memSegments m) - -- assumes sorted, non-overlapping - segBounds eseg = - inclusiveRangeFromBaseAndSize - (memWordValue (segmentOffset eseg)) - (memWordValue (segmentSize eseg)) + where + esegs = filter (Perm.isExecutable . Mem.segmentFlags) (Mem.memSegments m) + -- assumes sorted, non-overlapping + segBounds eseg = + inclusiveRangeFromBaseAndSize + (Mem.memWordValue (Mem.segmentOffset eseg)) + (Mem.memWordValue (Mem.segmentSize eseg)) -------------------------------------------------------------------------------- -- Segment lists @@ -247,9 +266,11 @@ displayResidualsForHuman ranges fps = ppDisInstr :: Int -> DisassembledAddr -> String ppDisInstr ofs da = offset <> " " <> instr da - where - offset = printf "0x%08x" (ofs + disOffset da) - instr = maybe "???\n" + where + offset = printf "0x%08x" (ofs + disOffset da) + instr = + maybe + "???\n" (renderString . PP.layoutCompact . (<> PP.hardline) . ppInstruction) . disInstruction @@ -261,7 +282,7 @@ ppDisSegment ofs = concatMap (ppDisInstr (fromIntegral ofs)) splitSegmentAtAddresses :: [Word64] -> Segment -> [Segment] splitSegmentAtAddresses addrs seg@(getInclusiveRange -> (lo, hi)) = case find (inInclusiveRange (InclusiveRange (lo + 1, hi))) addrs of - Nothing -> [seg] + Nothing -> [seg] Just split -> InclusiveRange (lo, split - 1) : splitSegmentAtAddresses addrs (InclusiveRange (split, hi)) -- | Splits a list of segments into a more fine-grained list of segments, based @@ -301,8 +322,8 @@ registerAsBlockSegment part@(residualSegments -> map getInclusiveRange -> sl) (g chunkBytes :: MemChunk 64 -> Maybe BSC.ByteString chunkBytes = \case - ByteRegion bs -> Just bs - _ -> error "chunkBytes: not a ByteRegion" + Mem.ByteRegion bs -> Just bs + _ -> error "chunkBytes: not a ByteRegion" -- chunksBytes :: [MemChunk 64] -> Maybe [BSC.ByteString] -- chunksBytes = traverse chunkBytes @@ -317,27 +338,27 @@ chunkBytes = \case segmentInstrs :: Memory 64 -> InclusiveRange Word64 -> Maybe [DisassembledAddr] segmentInstrs m r = do - ofs <- resolveAbsoluteAddr m (memWord (rangeLowerBound r)) - case segoffContentsAfter ofs of + ofs <- Mem.resolveAbsoluteAddr m (Mem.memWord (rangeLowerBound r)) + case Mem.segoffContentsAfter ofs of Right chunks -> do - let seg = forcedTakeMemChunks chunks (memWord (rangeSize r)) + let seg = Mem.forcedTakeMemChunks chunks (Mem.memWord (rangeSize r)) concat <$> traverse (fmap disassembleBuffer . chunkBytes) seg _ -> Nothing symbolAtAddress :: Maybe (Symtab 64) -> Word64 -> Maybe String symbolAtAddress mSymTab addr = - let symbolEntryForSegment symTab = Vec.find ((== addr) . steValue) $ symtabEntries @64 symTab in - let symbolForSegment = fmap (BSC.unpack . steName) . symbolEntryForSegment in - symbolForSegment =<< mSymTab + let symbolEntryForSegment symTab = Vec.find ((== addr) . steValue) $ symtabEntries @64 symTab + in let symbolForSegment = fmap (BSC.unpack . steName) . symbolEntryForSegment + in symbolForSegment =<< mSymTab data ResidualRangeInfo = ResidualRangeInfo - { rriRange :: InclusiveRange Word64 - , -- | Just the difference between the bounds of the range, for convenience - rriFootprint :: Word64 - , rriSection :: Maybe (Shdr BSC.ByteString Word64) - , rriSymbolName :: Maybe String + { rriRange :: InclusiveRange Word64 + , rriFootprint :: Word64 + -- ^ Just the difference between the bounds of the range, for convenience + , rriSection :: Maybe (Shdr BSC.ByteString Word64) + , rriSymbolName :: Maybe String , rriInstructions :: Maybe [DisassembledAddr] - , rriExplanation :: Maybe ResidualExplanation + , rriExplanation :: Maybe ResidualExplanation } classifyResidual :: @@ -348,12 +369,12 @@ classifyResidual :: classifyResidual recovOut range instrs = case classifyInstrs =<< instrs of Just expl -> Just expl - Nothing -> + Nothing -> let failures = Map.toList $ summaryFailures recovOut - overlaps (k, _) = maybe False (inInclusiveRange range . memWordValue) (segoffAsAbsoluteAddr k) - in - BecauseFailure . snd <$> find overlaps failures + overlaps (k, _) = maybe False (inInclusiveRange range . Mem.memWordValue) (Mem.segoffAsAbsoluteAddr k) + in + BecauseFailure . snd <$> find overlaps failures constructResidualRangeInfos :: ElfHeaderInfo 64 -> diff --git a/src/Reopt.hs b/src/Reopt.hs index 03093ea2..704ca6ee 100644 --- a/src/Reopt.hs +++ b/src/Reopt.hs @@ -218,7 +218,7 @@ import Data.Parameterized.Some (Some (..)) import Data.Parameterized.TraversableF (FoldableF) import Data.Set qualified as Set import Data.String (IsString (..)) -import qualified Data.Text as T +import Data.Text qualified as T import Data.Vector qualified as V import Data.Word (Word16, Word32, Word64) import Flexdis86 qualified as F @@ -624,8 +624,9 @@ checkBlockError b = do , Events.discErrorBlockAddr = a , Events.discErrorBlockSize = Macaw.blockSize b , Events.discErrorBlockInsnIndex = length (Macaw.pblockStmts b) - , Events.discErrorMessage = "Unclassified control flow transfer.\n" - <> T.intercalate "\n" (map (T.pack . ("→ " <>)) reasons) + , Events.discErrorMessage = + "Unclassified control flow transfer.\n" + <> T.intercalate "\n" (map (T.pack . ("→ " <>)) reasons) } _ -> Nothing @@ -716,27 +717,28 @@ addPLTSyms :: addPLTSyms hdr strtab symtab mem regIdx m0 p = do let cl = Elf.headerClass hdr let dta = Elf.headerData hdr - let ins :: - Elf.ElfWordType w -> - (ResolvedPLTEntry, Elf.ElfWordType w) -> - (SymAddrMap w -> InitDiscM r (SymAddrMap w)) -> - (SymAddrMap w -> InitDiscM r (SymAddrMap w)) - ins o (f, _sz) cont m = do - case resolveRegionOff mem regIdx (fromIntegral o) of - Nothing -> do - initWarning $ printf "Unexpected symbol offset %s." (showHex (toInteger o) "") - cont m - Just a -> do - case f of - Reopt.PLTStub idx -> - case Elf.decodeSymtabEntry cl dta strtab symtab idx of - Left e -> do - initWarning (show e) - cont m - Right sym -> do - cont $! symAddrMapInsert sym a m - PLTNotCallable -> - cont m + let + ins :: + Elf.ElfWordType w -> + (ResolvedPLTEntry, Elf.ElfWordType w) -> + (SymAddrMap w -> InitDiscM r (SymAddrMap w)) -> + (SymAddrMap w -> InitDiscM r (SymAddrMap w)) + ins o (f, _sz) cont m = do + case resolveRegionOff mem regIdx (fromIntegral o) of + Nothing -> do + initWarning $ printf "Unexpected symbol offset %s." (showHex (toInteger o) "") + cont m + Just a -> do + case f of + Reopt.PLTStub idx -> + case Elf.decodeSymtabEntry cl dta strtab symtab idx of + Left e -> do + initWarning (show e) + cont m + Right sym -> do + cont $! symAddrMapInsert sym a m + PLTNotCallable -> + cont m Map.foldrWithKey' ins pure (pltMap p) m0 shdrContents :: @@ -745,11 +747,13 @@ shdrContents :: Elf.Shdr nm (Elf.ElfWordType w) -> BS.ByteString shdrContents hdrInfo shdr = - let fileOff = Elf.shdrOff shdr - size = Elf.shdrSize shdr - in BS.take (fromIntegral size) $ - BS.drop (fromIntegral fileOff) $ - Elf.headerFileContents hdrInfo + let + fileOff = Elf.shdrOff shdr + size = Elf.shdrSize shdr + in + BS.take (fromIntegral size) $ + BS.drop (fromIntegral fileOff) $ + Elf.headerFileContents hdrInfo ------------------------------------------------------------------------ -- Explore a control flow graph. @@ -798,8 +802,9 @@ insSymbol mem baseAddr (idx, symEntry) let nm = Elf.steName symEntry let val = Elf.steValue symEntry -- Get memory address of symbol - let symAddr :: MemAddr w - symAddr = incAddr (toInteger val) baseAddr + let + symAddr :: MemAddr w + symAddr = incAddr (toInteger val) baseAddr -- Resolve address as segment offset. case asSegmentOff mem symAddr of Just addr -> @@ -850,10 +855,12 @@ addDefinedSymbolTableFuns hdrInfo mem baseAddr symtabData strtab = do let hdr = Elf.header hdrInfo let cl = Elf.headerClass hdr let dta = Elf.headerData hdr - let symEntrySize :: Int - symEntrySize = Elf.symtabEntrySize cl - let cnt :: Word32 - cnt = fromIntegral (BS.length symtabData `quot` symEntrySize) + let + symEntrySize :: Int + symEntrySize = Elf.symtabEntrySize cl + let + cnt :: Word32 + cnt = fromIntegral (BS.length symtabData `quot` symEntrySize) let go idx | idx >= cnt = pure () | otherwise = @@ -987,14 +994,15 @@ discoverSymbolNames hdrInfo mem baseAddr = do case Elf.headerNamedShdrs hdrInfo of Left _ -> V.empty Right r -> r - let shdrNameMap :: Map BS.ByteString [Word16] - shdrNameMap = - Map.fromListWith - (++) - [ (Elf.shdrName s, [fromIntegral (idx - 1)]) - | idx <- [1 .. V.length shdrs] - , let s = shdrs V.! (idx - 1) - ] + let + shdrNameMap :: Map BS.ByteString [Word16] + shdrNameMap = + Map.fromListWith + (++) + [ (Elf.shdrName s, [fromIntegral (idx - 1)]) + | idx <- [1 .. V.length shdrs] + , let s = shdrs V.! (idx - 1) + ] symAddrMap0 <- do let nm = "static symbol table" withSymtab hdrInfo shdrNameMap nm ".symtab" symAddrMapEmpty $ \symtab strtab -> do @@ -1168,33 +1176,37 @@ initExecDiscovery baseAddr hdrInfo ainfo pltFn reoptOpts = elfInstances hdrInfo pure symAddrMap0 -- Exclude PLT bounds - let addrIsNotInPLT :: ArchSegmentOff arch -> Bool - addrIsNotInPLT = - case mPLTRes of - Nothing -> const True - Just pltRes -> \a -> do - let off = fromIntegral $ addrOffset (segoffAddr a) - case Map.lookupLE off (pltMap pltRes) of - Just (entryOff, (_pltFn, entrySize)) - | off - entryOff < entrySize -> - False - _ -> True + let + addrIsNotInPLT :: ArchSegmentOff arch -> Bool + addrIsNotInPLT = + case mPLTRes of + Nothing -> const True + Just pltRes -> \a -> do + let off = fromIntegral $ addrOffset (segoffAddr a) + case Map.lookupLE off (pltMap pltRes) of + Just (entryOff, (_pltFn, entrySize)) + | off - entryOff < entrySize -> + False + _ -> True -- Maps section header names to the section headers for it. - let shdrMap :: Map BS.ByteString [Elf.Shdr BS.ByteString (Elf.ElfWordType (Macaw.ArchAddrWidth arch))] - shdrMap = - Map.fromListWith (++) [(Elf.shdrName s, [s]) | s <- V.toList shdrs] + let + shdrMap :: Map BS.ByteString [Elf.Shdr BS.ByteString (Elf.ElfWordType (Macaw.ArchAddrWidth arch))] + shdrMap = + Map.fromListWith (++) [(Elf.shdrName s, [s]) | s <- V.toList shdrs] let addrInRodata = case Map.findWithDefault [] ".rodata" shdrMap of [shdr] -> - let sOff :: Word64 - sOff = fromIntegral $ Elf.shdrAddr shdr - sSize :: Word64 - sSize = fromIntegral (Elf.shdrSize shdr) - in \a -> - let aOff = memWordValue $ addrOffset (segoffAddr a) - in sOff <= aOff && (aOff - sOff) < sSize + let + sOff :: Word64 + sOff = fromIntegral $ Elf.shdrAddr shdr + sSize :: Word64 + sSize = fromIntegral (Elf.shdrSize shdr) + in + \a -> + let aOff = memWordValue $ addrOffset (segoffAddr a) + in sOff <= aOff && (aOff - sOff) < sSize _ -> const False let explorePred a = addrIsNotInPLT a && not (addrInRodata a) @@ -1210,8 +1222,9 @@ initExecDiscovery baseAddr hdrInfo ainfo pltFn reoptOpts = elfInstances hdrInfo ehframeEntryPoints hdrInfo shdrMap mem (addrBase baseAddr) (maybeToList entryAddr) -- Create initial discovery state. - let regInfo :: RegionInfo - regInfo = HasDefaultRegion (addrBase baseAddr) + let + regInfo :: RegionInfo + regInfo = HasDefaultRegion (addrBase baseAddr) s <- case runExcept (initDiscState mem ehFrameAddrs regInfo symAddrMap explorePred ainfo reoptOpts) of Left e -> initError e @@ -1294,8 +1307,9 @@ doInit loadOpts hdrInfo ainfo pltFn reoptOpts = elfInstances hdrInfo $ do let entryAddr = Nothing -- Get region of text segment let regIdx = segmentBase (segoffSegment textBaseAddr) - let regInfo :: RegionInfo - regInfo = HasDefaultRegion regIdx + let + regInfo :: RegionInfo + regInfo = HasDefaultRegion regIdx let explorePred = const True s <- case runExcept (initDiscState mem (maybeToList entryAddr) regInfo symAddrMap explorePred ainfo reoptOpts) of Left e -> initError e @@ -1310,17 +1324,19 @@ doInit loadOpts hdrInfo ainfo pltFn reoptOpts = elfInstances hdrInfo $ do -- Executable Elf.ET_EXEC -> do -- Get base address to use for computing section offsets. - let baseAddr :: MemAddr (Macaw.ArchAddrWidth arch) - baseAddr = MemAddr{addrBase = 0, addrOffset = fromInteger (loadRegionBaseOffset loadOpts)} + let + baseAddr :: MemAddr (Macaw.ArchAddrWidth arch) + baseAddr = MemAddr{addrBase = 0, addrOffset = fromInteger (loadRegionBaseOffset loadOpts)} initExecDiscovery baseAddr hdrInfo ainfo pltFn reoptOpts -- Shared library or position-independent executable. Elf.ET_DYN -> do -- Get base address to use for computing section offsets. - let baseAddr :: MemAddr (Macaw.ArchAddrWidth arch) - baseAddr = - case loadOffset loadOpts of - Just o -> MemAddr{addrBase = 0, addrOffset = fromIntegral o} - Nothing -> MemAddr{addrBase = 1, addrOffset = 0} + let + baseAddr :: MemAddr (Macaw.ArchAddrWidth arch) + baseAddr = + case loadOffset loadOpts of + Just o -> MemAddr{addrBase = 0, addrOffset = fromIntegral o} + Nothing -> MemAddr{addrBase = 1, addrOffset = 0} initExecDiscovery baseAddr hdrInfo ainfo pltFn reoptOpts Elf.ET_CORE -> do initError "Core files unsupported." @@ -1415,19 +1431,20 @@ findGnuDebugLinkSection :: Elf.ElfHeaderInfo w -> Maybe String findGnuDebugLinkSection elfInfo = do - let secDataMap :: - Map - BSC.ByteString - [ ( Elf.FileRange (Elf.ElfWordType w) - , Elf.ElfSection (Elf.ElfWordType w) - ) - ] - secDataMap = - Map.fromListWith - (++) - [ (Elf.elfSectionName sec, [(r, sec)]) - | (r, sec) <- V.toList (Elf.headerSections elfInfo) - ] + let + secDataMap :: + Map + BSC.ByteString + [ ( Elf.FileRange (Elf.ElfWordType w) + , Elf.ElfSection (Elf.ElfWordType w) + ) + ] + secDataMap = + Map.fromListWith + (++) + [ (Elf.elfSectionName sec, [(r, sec)]) + | (r, sec) <- V.toList (Elf.headerSections elfInfo) + ] case Map.findWithDefault [] ".gnu_debuglink" secDataMap of [] -> Nothing (_, s) : _ -> @@ -1695,31 +1712,33 @@ headerTypeMap hdrAnn dynDepsTypeMap symAddrMap noretMap = do -- -- This is used when we see a function jumps to a defined address. addrAnnTypeMap <- do - let insSymType :: - Map (ArchSegmentOff arch) ReoptFunType -> - (BS.ByteString, ReoptFunType) -> - ReoptM arch r (Map (ArchSegmentOff arch) ReoptFunType) - insSymType m (sym, annTp) = do - case symAddrMapLookup symAddrMap sym of - Left SymAddrMapNotFound -> do - -- Silently drop symbols without addresses as they may be undefined. - pure m - Left SymAddrMapAmbiguous -> do - globalStepWarning Events.HeaderTypeInference $ - "Ambiguous symbol " ++ BSC.unpack sym ++ "." - pure m - Right addr -> do - pure $! Map.insert addr annTp m + let + insSymType :: + Map (ArchSegmentOff arch) ReoptFunType -> + (BS.ByteString, ReoptFunType) -> + ReoptM arch r (Map (ArchSegmentOff arch) ReoptFunType) + insSymType m (sym, annTp) = do + case symAddrMapLookup symAddrMap sym of + Left SymAddrMapNotFound -> do + -- Silently drop symbols without addresses as they may be undefined. + pure m + Left SymAddrMapAmbiguous -> do + globalStepWarning Events.HeaderTypeInference $ + "Ambiguous symbol " ++ BSC.unpack sym ++ "." + pure m + Right addr -> do + pure $! Map.insert addr annTp m foldlM insSymType Map.empty (Map.toList nameAnnTypeMap) - let annTypeMap :: FunTypeMaps (Macaw.ArchAddrWidth arch) - annTypeMap = - FunTypeMaps - { nameToAddrMap = symAddrMap - , nameTypeMap = nameAnnTypeMap - , addrTypeMap = addrAnnTypeMap - , noreturnMap = noretMap - } + let + annTypeMap :: FunTypeMaps (Macaw.ArchAddrWidth arch) + annTypeMap = + FunTypeMaps + { nameToAddrMap = symAddrMap + , nameTypeMap = nameAnnTypeMap + , addrTypeMap = addrAnnTypeMap + , noreturnMap = noretMap + } globalStepFinished Events.HeaderTypeInference () pure annTypeMap @@ -2129,42 +2148,46 @@ inferFunctionTypeFromDemands :: Map (MemSegmentOff 64) (DemandSet X86Reg) -> Map (MemSegmentOff 64) X86FunTypeInfo inferFunctionTypeFromDemands dm = - let go :: - DemandSet X86Reg -> - Map (MemSegmentOff 64) (RegisterSet X86Reg) -> - Map (MemSegmentOff 64) (RegisterSet X86Reg) - go ds = Map.unionWith Set.union (functionResultDemands ds) - - retDemands :: Map (MemSegmentOff 64) (RegisterSet X86Reg) - retDemands = foldr go Map.empty dm - - -- drop the suffix which isn't a member of the arg set. This - -- allows e.g. arg0, arg2 to go to arg0, arg1, arg2. - dropArgSuffix :: - (a -> X86Reg tp) -> - [a] -> - RegisterSet X86Reg -> - [a] - dropArgSuffix f regs rs = - reverse $ dropWhile (not . (`Set.member` rs) . Some . f) $ reverse regs - - -- Turns a set of arguments into a prefix of x86 argument registers and friends - orderPadArgs :: (RegisterSet X86Reg, RegisterSet X86Reg) -> X86FunTypeInfo - orderPadArgs (argSet, retSet) = - let args = - fmap ArgBV64 (dropArgSuffix X86_GP x86GPPArgumentRegs argSet) - ++ fmap (ArgZMM ZMM512D) (dropArgSuffix X86_ZMMReg [0 .. 7] argSet) - rets = - fmap (Some . RetBV64) (dropArgSuffix X86_GP [F.RAX, F.RDX] retSet) - ++ fmap (Some . RetZMM ZMM512D) (dropArgSuffix X86_ZMMReg [0, 1] retSet) - in X86NonvarargFunType args rets - in orderPadArgs - <$> Map.mergeWithKey - (\_ ds rets -> Just (registerDemands ds, rets)) - (fmap (\ds -> (registerDemands ds, mempty))) - (fmap (mempty,)) - dm - retDemands + let + go :: + DemandSet X86Reg -> + Map (MemSegmentOff 64) (RegisterSet X86Reg) -> + Map (MemSegmentOff 64) (RegisterSet X86Reg) + go ds = Map.unionWith Set.union (functionResultDemands ds) + + retDemands :: Map (MemSegmentOff 64) (RegisterSet X86Reg) + retDemands = foldr go Map.empty dm + + -- drop the suffix which isn't a member of the arg set. This + -- allows e.g. arg0, arg2 to go to arg0, arg1, arg2. + dropArgSuffix :: + (a -> X86Reg tp) -> + [a] -> + RegisterSet X86Reg -> + [a] + dropArgSuffix f regs rs = + reverse $ dropWhile (not . (`Set.member` rs) . Some . f) $ reverse regs + + -- Turns a set of arguments into a prefix of x86 argument registers and friends + orderPadArgs :: (RegisterSet X86Reg, RegisterSet X86Reg) -> X86FunTypeInfo + orderPadArgs (argSet, retSet) = + let + args = + fmap ArgBV64 (dropArgSuffix X86_GP x86GPPArgumentRegs argSet) + ++ fmap (ArgZMM ZMM512D) (dropArgSuffix X86_ZMMReg [0 .. 7] argSet) + rets = + fmap (Some . RetBV64) (dropArgSuffix X86_GP [F.RAX, F.RDX] retSet) + ++ fmap (Some . RetZMM ZMM512D) (dropArgSuffix X86_ZMMReg [0, 1] retSet) + in + X86NonvarargFunType args rets + in + orderPadArgs + <$> Map.mergeWithKey + (\_ ds rets -> Just (registerDemands ds, rets)) + (fmap (\ds -> (registerDemands ds, mempty))) + (fmap (mempty,)) + dm + retDemands resolveReoptFunType :: Monad m => @@ -2207,24 +2230,26 @@ x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do -- Generate map from symbol names to known type. let mem = Macaw.memory discState -- Compute only those functions whose types are not known. - let known :: Macaw.DiscoveryFunInfo X86_64 ids -> Bool - known f = - case resolveFunName (Macaw.discoveredFunAddr f) of - Just nm -> isJust (resolveFunType nm) - Nothing -> False + let + known :: Macaw.DiscoveryFunInfo X86_64 ids -> Bool + known f = + case resolveFunName (Macaw.discoveredFunAddr f) of + Just nm -> isJust (resolveFunType nm) + Nothing -> False let shouldPropagate (Some f) = not (known f) && isNothing (matchPLT f) globalStepStarted Events.FunctionArgInference let (dems, summaryFails) = do - let resolveFn :: - MemSegmentOff 64 -> - RegState X86Reg (Value X86_64 ids) -> - Either String [Some (Value X86_64 ids)] - resolveFn callSite callRegs = do - case x86CallRegs mem resolveFunName resolveFunType callSite callRegs of - Left rsn -> Left (ppRegisterUseErrorReason rsn) - Right r -> Right (callArgValues r) + let + resolveFn :: + MemSegmentOff 64 -> + RegState X86Reg (Value X86_64 ids) -> + Either String [Some (Value X86_64 ids)] + resolveFn callSite callRegs = do + case x86CallRegs mem resolveFunName resolveFunType callSite callRegs of + Left rsn -> Left (ppRegisterUseErrorReason rsn) + Right r -> Right (callArgValues r) functionDemands (x86DemandInfo sysp) mem resolveFn $ filter shouldPropagate $ Macaw.exploredFunctions discState @@ -2261,14 +2286,15 @@ doRecoverX86 :: ReoptM X86_64 r RecoverX86Output doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do -- Map names to known function types when we have explicit information. - let knownFunTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) - knownFunTypeMap = - Map.fromList - [ (recoveredFunctionName symAddrMap unnamedFunPrefix addr, (addr, xtp)) - | (addr, rtp) <- Map.toList (addrTypeMap debugTypeMap) - , Right xtp <- [runExcept (resolveReoptFunType rtp)] - ] - <> Map.mapMaybeWithKey (resolveX86Type symAddrMap) (nameTypeMap debugTypeMap) + let + knownFunTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) + knownFunTypeMap = + Map.fromList + [ (recoveredFunctionName symAddrMap unnamedFunPrefix addr, (addr, xtp)) + | (addr, rtp) <- Map.toList (addrTypeMap debugTypeMap) + , Right xtp <- [runExcept (resolveReoptFunType rtp)] + ] + <> Map.mapMaybeWithKey (resolveX86Type symAddrMap) (nameTypeMap debugTypeMap) -- Used to compute sizes of functions for overwriting purposes. let addrUseMap = mkFunUseMap discState @@ -2276,20 +2302,21 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do let mem = Macaw.memory discState -- Maps address to name of function to use. - let funNameMap :: Map (MemSegmentOff 64) BS.ByteString - funNameMap = - Map.mapWithKey - (localFunctionName unnamedFunPrefix) - (samAddrMap symAddrMap) - <> Map.fromList - [ (addr, nm) - | Some finfo <- Macaw.exploredFunctions discState - , let addr = Macaw.discoveredFunAddr finfo - , Map.notMember addr (samAddrMap symAddrMap) - , let nm = case matchPLT finfo of - Just sym -> versymName sym - Nothing -> nosymFunctionName unnamedFunPrefix addr - ] + let + funNameMap :: Map (MemSegmentOff 64) BS.ByteString + funNameMap = + Map.mapWithKey + (localFunctionName unnamedFunPrefix) + (samAddrMap symAddrMap) + <> Map.fromList + [ (addr, nm) + | Some finfo <- Macaw.exploredFunctions discState + , let addr = Macaw.discoveredFunAddr finfo + , Map.notMember addr (samAddrMap symAddrMap) + , let nm = case matchPLT finfo of + Just sym -> versymName sym + Nothing -> nosymFunctionName unnamedFunPrefix addr + ] -- Infer registers each function demands. (fDems, summaryFailures) <- do @@ -2297,16 +2324,17 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do let resolveFunType fnm = snd <$> Map.lookup fnm knownFunTypeMap x86ArgumentAnalysis sysp resolveFunName resolveFunType discState - let funTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) - funTypeMap = - knownFunTypeMap - <> Map.fromList - [ (nm, (faddr, tp)) - | Some finfo <- Macaw.exploredFunctions discState - , let faddr = Macaw.discoveredFunAddr finfo - , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap - , tp <- maybeToList $ Map.lookup faddr fDems - ] + let + funTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) + funTypeMap = + knownFunTypeMap + <> Map.fromList + [ (nm, (faddr, tp)) + | Some finfo <- Macaw.exploredFunctions discState + , let faddr = Macaw.discoveredFunAddr finfo + , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + , tp <- maybeToList $ Map.lookup faddr fDems + ] fnDefsAndLogEvents <- fmap catMaybes $ forM (Macaw.exploredFunctions discState) $ \(Some finfo) -> do @@ -2349,17 +2377,20 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do Right fn -> do funStepFinished Events.Recovery fnId () pure (Just fn) - let fnDefs = map recoveredFunction fnDefsAndLogEvents - logEvents = concatMap llvmLogEvents fnDefsAndLogEvents + let + fnDefs = map recoveredFunction fnDefsAndLogEvents + logEvents = concatMap llvmLogEvents fnDefsAndLogEvents -- Get list of names of functions defined - let definedNames :: Set.Set BSC.ByteString - definedNames = - Set.fromList $ - recoveredFunctionName symAddrMap unnamedFunPrefix . fnAddr <$> fnDefs + let + definedNames :: Set.Set BSC.ByteString + definedNames = + Set.fromList $ + recoveredFunctionName symAddrMap unnamedFunPrefix . fnAddr <$> fnDefs -- Get all functions that are referenced, but not defined in the module. - let declFunTypeMap :: FunctionTypeMap X86_64 - declFunTypeMap = foldl (getReferencedFunctions definedNames) Map.empty fnDefs + let + declFunTypeMap :: FunctionTypeMap X86_64 + declFunTypeMap = foldl (getReferencedFunctions definedNames) Map.empty fnDefs let fnDecls = [ FunctionDecl @@ -2382,23 +2413,25 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do , recoveredDefs = fnDefs } - let mkObjFunDef :: Function X86_64 -> ObjFunDef - mkObjFunDef f = - ObjFunDef - { ofdObjName = fnName f - , ofdBinAddr = memWordValue (addrOffset (segoffAddr (fnAddr f))) - , ofdBinSize = lookupFunSize (fnAddr f) addrUseMap - } + let + mkObjFunDef :: Function X86_64 -> ObjFunDef + mkObjFunDef f = + ObjFunDef + { ofdObjName = fnName f + , ofdBinAddr = memWordValue (addrOffset (segoffAddr (fnAddr f))) + , ofdBinSize = lookupFunSize (fnAddr f) addrUseMap + } -- Map name of declared functions to the address in binary (if any) - let undefinedFuns :: V.Vector ObjFunRef - undefinedFuns = - V.fromList - [ ObjFunRef{ofrObjName = name, ofrBinAddr = memWordValue addr} - | (segOff, name) <- Map.toList funNameMap - , not (Set.member name definedNames) - , let addr = addrOffset (segoffAddr segOff) - ] + let + undefinedFuns :: V.Vector ObjFunRef + undefinedFuns = + V.fromList + [ ObjFunRef{ofrObjName = name, ofrBinAddr = memWordValue addr} + | (segOff, name) <- Map.toList funNameMap + , not (Set.member name definedNames) + , let addr = addrOffset (segoffAddr segOff) + ] let mergeRelations = MergeRelations @@ -2642,11 +2675,13 @@ renderLLVMBitcode :: ) renderLLVMBitcode llvmGenOpt cfg os recMod constraints = -- Generate LLVM module - let archOps = LLVM.x86LLVMArchOps (show os) - (m, ann, ext, logEvents) = moduleForFunctions archOps llvmGenOpt recMod constraints - -- Render into LLVM - out = HPJ.fullRender HPJ.PageMode 10000 1 pp mempty (ppLLVM cfg m) - in (out, ann, ext, logEvents) + let + archOps = LLVM.x86LLVMArchOps (show os) + (m, ann, ext, logEvents) = moduleForFunctions archOps llvmGenOpt recMod constraints + -- Render into LLVM + out = HPJ.fullRender HPJ.PageMode 10000 1 pp mempty (ppLLVM cfg m) + in + (out, ann, ext, logEvents) where pp :: HPJ.TextDetails -> Builder.Builder -> Builder.Builder pp (HPJ.Chr c) b = Builder.charUtf8 c <> b diff --git a/src/Reopt/Analysis/Domains/DiffEquations.hs b/src/Reopt/Analysis/Domains/DiffEquations.hs index 4e2cdeae..f4584df6 100644 --- a/src/Reopt/Analysis/Domains/DiffEquations.hs +++ b/src/Reopt/Analysis/Domains/DiffEquations.hs @@ -112,12 +112,14 @@ mapPrepend k v = Map.alter (\o -> Just (v : fromMaybe [] o)) k -- | Add a difference equation to constraint set. addEq :: Ord v => v -> v -> Integer -> DiffEquations v -> Maybe (DiffEquations v) addEq x y c p = do - let Diff xr xo = getRep' x 0 p - Diff yr yo = getRep' y c p + let + Diff xr xo = getRep' x 0 p + Diff yr yo = getRep' y c p if xr /= yr then do - let x_sz = repSize xr p - y_sz = repSize yr p + let + x_sz = repSize xr p + y_sz = repSize yr p -- Merge yr into xr if x_sz >= y_sz then @@ -168,12 +170,10 @@ ccSet v p = Map.delete v (ccSet' [Diff vr vo] p (Map.singleton vr vo)) ccSet' :: Ord v => [Diff v] -> DiffEquations v -> Map v Integer -> Map v Integer ccSet' [] _ m = m ccSet' (Diff _h o : r) p m = - let - -- l0 = Map.findWithDefault [] h (p^.eqRevMap) - -- Add offset o to differences. - l = (diffOffset +~ o) <$> l - in - ccSet' (l ++ r) p (foldl' (\s (Diff v d) -> Map.insert v d s) m l) + let -- l0 = Map.findWithDefault [] h (p^.eqRevMap) + -- Add offset o to differences. + l = (diffOffset +~ o) <$> l + in ccSet' (l ++ r) p (foldl' (\s (Diff v d) -> Map.insert v d s) m l) -- | @join old new@ returns `Nothing` if all the constraints in `old` are -- implied by constraints in `new`. Otherwise, it returns the constraints diff --git a/src/Reopt/ArgResolver.hs b/src/Reopt/ArgResolver.hs index 8d08af3d..e50fd0a3 100644 --- a/src/Reopt/ArgResolver.hs +++ b/src/Reopt/ArgResolver.hs @@ -1,34 +1,44 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} -module Reopt.ArgResolver - ( ArgResolver - , runArgResolver - , ArgResolverError(..) - , showArgResolverError - , addGPReg64 - , addDoubleArg - ) where -import Control.Monad.Except -import Control.Monad.State -import Data.Word -import qualified Flexdis86 as F -import Text.Printf (printf) +module Reopt.ArgResolver ( + ArgResolver, + runArgResolver, + ArgResolverError (..), + showArgResolverError, + addGPReg64, + addDoubleArg, +) where -import Reopt.CFG.Recovery (X86ArgInfo(..), ZMMType(..)) +import Control.Monad.Except ( + ExceptT, + MonadError (throwError), + MonadIO, + ) +import Control.Monad.State ( + StateT (StateT), + execStateT, + gets, + modify, + ) +import Data.Word (Word8) +import Flexdis86 qualified as F +import Text.Printf (printf) + +import Reopt.CFG.Recovery (X86ArgInfo (..), ZMMType (..)) -- | An error from parsing the external header for arguments data ArgResolverError - = OutOfGPRegs !String - -- ^ @OutOfGPRegs nm@ The argument @nm@ could not be added due to limit on general-purpose registers. - | OutOfSSERegs !String - -- ^ @OutOfSSERegs nm@ The argument @nm@ could not be added due to limit on SSE/AVX registers. - | MissingArgType !String - -- ^ Return type for argument is missing - | UnsupportedArgType !String !String - -- ^ @UnsupportedArgType nm tp@ Argument @nm@ does not support type. - | UnsupportedReturnType !String - | DebugResolveError !String - | VarArgsUnsupported + = -- | @OutOfGPRegs nm@ The argument @nm@ could not be added due to limit on general-purpose registers. + OutOfGPRegs !String + | -- | @OutOfSSERegs nm@ The argument @nm@ could not be added due to limit on SSE/AVX registers. + OutOfSSERegs !String + | -- | Return type for argument is missing + MissingArgType !String + | -- | @UnsupportedArgType nm tp@ Argument @nm@ does not support type. + UnsupportedArgType !String !String + | UnsupportedReturnType !String + | DebugResolveError !String + | VarArgsUnsupported -- | Pretty print for header errors. showArgResolverError :: ArgResolverError -> String @@ -48,30 +58,32 @@ showArgResolverError VarArgsUnsupported = "Do not support vararg functions." -- | State monad for resolving arguments. -data ArgResolverState = ARS { arsPrev :: [X86ArgInfo] - -- ^ Arguments identified in reverse order. - , arsNextGPP :: [F.Reg64] - -- ^ General purpose registers still - -- available for arguments. - , arsXMMCount :: !Word8 - -- ^ Number of xmm registers used so far. - } +data ArgResolverState = ARS + { arsPrev :: [X86ArgInfo] + -- ^ Arguments identified in reverse order. + , arsNextGPP :: [F.Reg64] + -- ^ General purpose registers still + -- available for arguments. + , arsXMMCount :: !Word8 + -- ^ Number of xmm registers used so far. + } -- | Monad used for recording arguments to resolve. -newtype ArgResolver m a = - ArgResolver (StateT ArgResolverState (ExceptT ArgResolverError m) a) +newtype ArgResolver m a + = ArgResolver (StateT ArgResolverState (ExceptT ArgResolverError m) a) deriving (Functor, Applicative, Monad, MonadError ArgResolverError, MonadIO) -- | Run the aergument resolver and get the next state. runArgResolver :: Monad m => ArgResolver m () -> ExceptT ArgResolverError m [X86ArgInfo] runArgResolver (ArgResolver m) = - let s0 = ARS { arsPrev = [] - , arsNextGPP = [ F.RDI, F.RSI, F.RDX, F.RCX, F.R8, F.R9 ] - , arsXMMCount = 0 - } + let s0 = + ARS + { arsPrev = [] + , arsNextGPP = [F.RDI, F.RSI, F.RDX, F.RCX, F.R8, F.R9] + , arsXMMCount = 0 + } in reverse . arsPrev <$> execStateT m s0 - -- | Reserve a 64-bit register for an argument addGPReg64 :: Monad m => String -> ArgResolver m () addGPReg64 nm = ArgResolver $ do @@ -79,15 +91,16 @@ addGPReg64 nm = ArgResolver $ do case regs of [] -> throwError $ OutOfGPRegs nm - (r:rest) -> do - modify $ \s -> s { arsPrev = ArgBV64 r : arsPrev s - , arsNextGPP = rest - } + (r : rest) -> do + modify $ \s -> + s + { arsPrev = ArgBV64 r : arsPrev s + , arsNextGPP = rest + } addDoubleArg :: Monad m => String -> ArgResolver m () addDoubleArg nm = ArgResolver $ do cnt <- gets arsXMMCount - if cnt >= 8 then - throwError $ OutOfSSERegs nm - else - modify $ \s -> s { arsPrev = ArgZMM ZMMDouble cnt : arsPrev s, arsXMMCount = cnt+1 } \ No newline at end of file + if cnt >= 8 + then throwError $ OutOfSSERegs nm + else modify $ \s -> s{arsPrev = ArgZMM ZMMDouble cnt : arsPrev s, arsXMMCount = cnt + 1} diff --git a/src/Reopt/CFG/FnRep.hs b/src/Reopt/CFG/FnRep.hs index 1f833f1f..639dc2a0 100644 --- a/src/Reopt/CFG/FnRep.hs +++ b/src/Reopt/CFG/FnRep.hs @@ -33,8 +33,15 @@ module Reopt.CFG.FnRep ( import Control.Monad.Identity (Identity (runIdentity)) import Data.ByteString.Char8 qualified as BSC -import Data.Kind -import Data.Parameterized.Classes +import Data.Kind (Type) +import Data.Parameterized.Classes ( + OrdF (compareF), + OrderingF (EQF, GTF, LTF), + ShowF, + TestEquality (..), + orderingF_refl, + type (:~:) (Refl), + ) import Data.Parameterized.NatRepr (type (<=)) import Data.Parameterized.Some (Some (..), viewSome) import Data.Parameterized.TraversableF (FoldableF (foldlF')) @@ -378,11 +385,13 @@ instance FnWriteMem addr val -> "*" <> PP.pretty addr PP.<+> ":=" PP.<+> PP.pretty val FnCondWriteMem cond addr val _repr -> "cond_write" PP.<+> PP.pretty cond PP.<+> PP.pretty addr PP.<+> PP.pretty val FnCall f args mret -> - let argDocs = (\(Some v) -> PP.pretty v) <$> args - retDoc = case mret of - Just (Some r) -> PP.pretty r <> " := " - Nothing -> mempty - in retDoc <> "call" PP.<+> PP.pretty f <> PP.parens (commas argDocs) + let + argDocs = (\(Some v) -> PP.pretty v) <$> args + retDoc = case mret of + Just (Some r) -> PP.pretty r <> " := " + Nothing -> mempty + in + retDoc <> "call" PP.<+> PP.pretty f <> PP.parens (commas argDocs) FnArchStmt stmt -> ppArchStmt PP.pretty stmt instance FoldFnValue FnStmt where @@ -406,9 +415,11 @@ fnBlockLabelFromAddr = FnBlockLabel instance PP.Pretty (FnBlockLabel w) where pretty (FnBlockLabel s) = - let a = segoffAddr s - o = memWordToUnsigned (addrOffset a) - in "block_" <> PP.pretty (addrBase a) <> "_" <> PP.pretty (showHex o "") + let + a = segoffAddr s + o = memWordToUnsigned (addrOffset a) + in + "block_" <> PP.pretty (addrBase a) <> "_" <> PP.pretty (showHex o "") -- | Render block label as a string fnBlockLabelString :: FnBlockLabel w -> String @@ -610,21 +621,23 @@ instance PP.Pretty (Function arch) where pretty fn = - let nm = PP.pretty (BSC.unpack (fnName fn)) - addr = PP.pretty (fnAddr fn) - ftp = fnType fn - ppArg :: Integer -> Some TypeRepr -> PP.Doc a - ppArg i (Some tp) = "arg" <> PP.pretty i <> " : " <> PP.pretty tp - atp = PP.parens (commas (zipWith ppArg [0 ..] (fnArgTypes ftp))) - rtp = case fnReturnType ftp of - Nothing -> "void" - Just (Some tp) -> PP.pretty tp - in PP.vcat - [ "function " <> nm <> " @ " <> addr <> atp <> " : " <> rtp - , PP.lbrace - , PP.indent 4 $ PP.vcat (PP.pretty <$> fnBlocks fn) - , PP.rbrace - ] + let + nm = PP.pretty (BSC.unpack (fnName fn)) + addr = PP.pretty (fnAddr fn) + ftp = fnType fn + ppArg :: Integer -> Some TypeRepr -> PP.Doc a + ppArg i (Some tp) = "arg" <> PP.pretty i <> " : " <> PP.pretty tp + atp = PP.parens (commas (zipWith ppArg [0 ..] (fnArgTypes ftp))) + rtp = case fnReturnType ftp of + Nothing -> "void" + Just (Some tp) -> PP.pretty tp + in + PP.vcat + [ "function " <> nm <> " @ " <> addr <> atp <> " : " <> rtp + , PP.lbrace + , PP.indent 4 $ PP.vcat (PP.pretty <$> fnBlocks fn) + , PP.rbrace + ] -- | A function declaration that has type information, but no recovered definition. data FunctionDecl arch = FunctionDecl diff --git a/src/Reopt/CFG/LLVM/X86.hs b/src/Reopt/CFG/LLVM/X86.hs index 89776dde..50f3d6f5 100644 --- a/src/Reopt/CFG/LLVM/X86.hs +++ b/src/Reopt/CFG/LLVM/X86.hs @@ -383,9 +383,10 @@ emitX86ArchStmt _ (X86FnStmt stmt) = WordRepVal -> "rep movsw" DWordRepVal -> "rep movsd" QWordRepVal -> "rep movsq" - let dfAsm = if df - then "std\n" ++ movsAsm ++ "\ncld" - else movsAsm + let dfAsm = + if df + then "std\n" ++ movsAsm ++ "\ncld" + else movsAsm callAsm_ noSideEffect (dfAsm ++ "\n" ++ movsAsm) @@ -404,9 +405,10 @@ emitX86ArchStmt _ (X86FnStmt stmt) = WordRepVal -> "rep stosw" DWordRepVal -> "rep stosd" QWordRepVal -> "rep stosq" - let dfAsm = if df - then "std\n" ++ stosAsm ++ "\ncld" - else stosAsm + let dfAsm = + if df + then "std\n" ++ stosAsm ++ "\ncld" + else stosAsm callAsm_ noSideEffect dfAsm diff --git a/src/Reopt/CFG/Recovery.hs b/src/Reopt/CFG/Recovery.hs index e9e3d339..4b9bb2d4 100644 --- a/src/Reopt/CFG/Recovery.hs +++ b/src/Reopt/CFG/Recovery.hs @@ -1045,12 +1045,13 @@ recoverJumpTarget retVarMap tgtAddr = do let postValues = let emsg = "Could not find post values for target." in Map.findWithDefault (error emsg) thisAddr (biPredPostValues tgtInv) - let recoverVec :: - Some (BoundLoc X86Reg) -> - Recover ids (Some (FnValue X86_64)) - recoverVec (Some (RegLoc r)) - | Just v <- MapF.lookup r retVarMap = pure (Some v) - recoverVec (Some l) = resolveInferValue tgtAddr postValues l + let + recoverVec :: + Some (BoundLoc X86Reg) -> + Recover ids (Some (FnValue X86_64)) + recoverVec (Some (RegLoc r)) + | Just v <- MapF.lookup r retVarMap = pure (Some v) + recoverVec (Some l) = resolveInferValue tgtAddr postValues l values <- traverse recoverVec (V.fromList (biPhiLocs tgtInv)) pure $! FnJumpTarget @@ -1190,21 +1191,23 @@ inferRetRegRelations (retInfo P.:< P.Nil) = } in Just (Some frv) inferRetRegRelations fields = - let insField :: - P.Index fields tp -> - X86RetInfo tp -> - MapF X86Reg (RetRegRelation (TupleType fields)) -> - MapF X86Reg (RetRegRelation (TupleType fields)) - insField idx retInfo m = - case retInfoRegPair retInfo of - Pair reg fieldRegRel -> - MapF.insert reg (RetRegRelation (IndexField idx) fieldRegRel) m - frv = - RetRegRelations - { fnRetValuesType = TupleTypeRepr (fmapFC typeRepr fields) - , fnRetValueMap = P.ifoldr insField MapF.empty fields - } - in Just (Some frv) + let + insField :: + P.Index fields tp -> + X86RetInfo tp -> + MapF X86Reg (RetRegRelation (TupleType fields)) -> + MapF X86Reg (RetRegRelation (TupleType fields)) + insField idx retInfo m = + case retInfoRegPair retInfo of + Pair reg fieldRegRel -> + MapF.insert reg (RetRegRelation (IndexField idx) fieldRegRel) m + frv = + RetRegRelations + { fnRetValuesType = TupleTypeRepr (fmapFC typeRepr fields) + , fnRetValueMap = P.ifoldr insField MapF.empty fields + } + in + Just (Some frv) getRetField :: FnValue X86_64 retType -> @@ -1357,18 +1360,19 @@ recoverBlock b = do v <- mkReturnVar tp addFnStmt (FnCall callTarget args (Just (Some v))) let rv = FnReturn v - let g :: - forall fields tp. - P.List TypeRepr fields -> - FnValue X86_64 (TupleType fields) -> - MapF X86Reg (FnValue X86_64) -> - P.Index fields tp -> - EmbeddingInv X86Reg tp -> - Recover ids (MapF X86Reg (FnValue X86_64)) - g flds s m idx (EmbeddingInv emb r) = do - fv <- evalAssignRhs $ FnEvalApp $ TupleField flds s idx - v' <- coerceRegValue (EmbeddingApp fv emb) - pure $! MapF.insert r v' m + let + g :: + forall fields tp. + P.List TypeRepr fields -> + FnValue X86_64 (TupleType fields) -> + MapF X86Reg (FnValue X86_64) -> + P.Index fields tp -> + EmbeddingInv X86Reg tp -> + Recover ids (MapF X86Reg (FnValue X86_64)) + g flds s m idx (EmbeddingInv emb r) = do + fv <- evalAssignRhs $ FnEvalApp $ TupleField flds s idx + v' <- coerceRegValue (EmbeddingApp fv emb) + pure $! MapF.insert r v' m P.ifoldlM (g fieldTypes rv) MapF.empty retEmbeddings FnJump <$> recoverJumpTarget retMap retAddr PLTStub{} -> do @@ -1702,21 +1706,25 @@ inferPrintfArgs mem nm regs initState = do Left msg -> Left $ "printf error: " ++ show s ++ "\n" ++ msg Right pas -> do -- Type in declaration - let funType :: FunctionType X86_64 - funType = do - let declArgTypes = argRegTypeRepr <$> reverse (pasArgRegs initState') - FunctionType - { fnArgTypes = declArgTypes - , fnReturnType = Just (Some (BVTypeRepr n64)) - , fnVarArgs = True - } - let fnEntry :: FnValue X86_64 (BVType 64) - fnEntry = FnFunctionEntryValue funType nm - - let x86ArgInfo :: [X86ArgInfo] - x86ArgInfo = reverse (pasArgRegs pas) - let x86RetInfo :: [Some X86RetInfo] - x86RetInfo = [Some (RetBV64 F.RAX)] + let + funType :: FunctionType X86_64 + funType = do + let declArgTypes = argRegTypeRepr <$> reverse (pasArgRegs initState') + FunctionType + { fnArgTypes = declArgTypes + , fnReturnType = Just (Some (BVTypeRepr n64)) + , fnVarArgs = True + } + let + fnEntry :: FnValue X86_64 (BVType 64) + fnEntry = FnFunctionEntryValue funType nm + + let + x86ArgInfo :: [X86ArgInfo] + x86ArgInfo = reverse (pasArgRegs pas) + let + x86RetInfo :: [Some X86RetInfo] + x86RetInfo = [Some (RetBV64 F.RAX)] Right $ CallRegs { callRegsFnType = (fnEntry, x86ArgInfo, x86RetInfo) @@ -1746,13 +1754,15 @@ mkX86FunctionType (X86PrintfFunType icnt0) = , fnVarArgs = True } mkX86FunctionType X86OpenFunType = - let stringPtrType = BVTypeRepr n64 - intType = BVTypeRepr n64 - in FunctionType - { fnArgTypes = [Some stringPtrType, Some intType] - , fnReturnType = Just (Some intType) - , fnVarArgs = True - } + let + stringPtrType = BVTypeRepr n64 + intType = BVTypeRepr n64 + in + FunctionType + { fnArgTypes = [Some stringPtrType, Some intType] + , fnReturnType = Just (Some intType) + , fnVarArgs = True + } x86TranslateCallType :: Memory 64 -> @@ -1804,10 +1814,12 @@ x86TranslateCallType _mem nm regs x86Ftp@X86OpenFunType = do -- Get number of arguments for open let argCnt = if isCreat then 3 else 2 - let args :: [X86ArgInfo] - args = fmap ArgBV64 (take argCnt x86GPPArgumentRegs) - let rets :: [Some X86RetInfo] - rets = [Some (RetBV64 F.RAX)] + let + args :: [X86ArgInfo] + args = fmap ArgBV64 (take argCnt x86GPPArgumentRegs) + let + rets :: [Some X86RetInfo] + rets = [Some (RetBV64 F.RAX)] Right CallRegs { callRegsFnType = (v, args, rets) @@ -1959,33 +1971,36 @@ recoverFunction sysp mem fInfo invMap nm curArgs curRets = do let entryBlk = fromJust $ Map.lookup entryAddr (fInfo ^. parsedBlocks) -- Insert uninitialized register into initial block location map. - let insUninit :: - Pair X86Reg (FnRegValue X86_64) -> - MapF (BoundLoc X86Reg) (FnRegValue X86_64) -> - MapF (BoundLoc X86Reg) (FnRegValue X86_64) - insUninit (Pair r v) = MapF.insertWith (\_n old -> old) (RegLoc r) v + let + insUninit :: + Pair X86Reg (FnRegValue X86_64) -> + MapF (BoundLoc X86Reg) (FnRegValue X86_64) -> + MapF (BoundLoc X86Reg) (FnRegValue X86_64) + insUninit (Pair r v) = MapF.insertWith (\_n old -> old) (RegLoc r) v -- Compute registers for first block - let locMap :: MapF (BoundLoc X86Reg) (FnRegValue X86_64) - locMap = - MapF.empty - -- Set df to 0 at function start. - & MapF.insert (RegLoc DF) (mkIdentEmbeddingApp (FnConstantBool False)) - -- Populate used arguments. - & initializeArgumentValues curArgs - -- Populate unused registers with default values. - & flip (foldr insUninit) uninitRegs + let + locMap :: MapF (BoundLoc X86Reg) (FnRegValue X86_64) + locMap = + MapF.empty + -- Set df to 0 at function start. + & MapF.insert (RegLoc DF) (mkIdentEmbeddingApp (FnConstantBool False)) + -- Populate used arguments. + & initializeArgumentValues curArgs + -- Populate unused registers with default values. + & flip (foldr insUninit) uninitRegs entryInv <- getBlockInvariants entryAddr recoveredEntryBlk <- evalRecover entryBlk entryInv [] V.empty locMap -- Recover all blocks after first. - let recoverBlk :: - MemSegmentOff 64 -> - ParsedBlock X86_64 ids -> - FunRecover ids (Maybe (FnBlock X86_64)) - recoverBlk a blk - | a == entryAddr = pure Nothing - | otherwise = Just <$> recoverInnerBlock blk + let + recoverBlk :: + MemSegmentOff 64 -> + ParsedBlock X86_64 ids -> + FunRecover ids (Maybe (FnBlock X86_64)) + recoverBlk a blk + | a == entryAddr = pure Nothing + | otherwise = Just <$> recoverInnerBlock blk blks <- Map.traverseMaybeWithKey recoverBlk (fInfo ^. parsedBlocks) let fn = diff --git a/src/Reopt/CFG/StackDepth.hs b/src/Reopt/CFG/StackDepth.hs index 7aaa305c..58b7fe0c 100644 --- a/src/Reopt/CFG/StackDepth.hs +++ b/src/Reopt/CFG/StackDepth.hs @@ -127,9 +127,11 @@ minimizeStackDepthValues = Set.fromList . Set.fold go [] . Set.map discardPositi discardPositive v = v{dynamicPart = Set.filter isNegativeDepth (dynamicPart v)} -- FIXME: can we use ordering to simplify this? go v xs = - let (_subs, xs') = partition (subsumes v) xs - dominated = any (`subsumes` v) xs' - in if not dominated then v : xs' else xs' + let + (_subs, xs') = partition (subsumes v) xs + dominated = any (`subsumes` v) xs' + in + if not dominated then v : xs' else xs' -- ----------------------------------------------------------------------------- diff --git a/src/Reopt/Events.hs b/src/Reopt/Events.hs index 20cb8510..4b42dde9 100644 --- a/src/Reopt/Events.hs +++ b/src/Reopt/Events.hs @@ -347,10 +347,10 @@ heraldLength = 11 -- [HERALD] THING withHerald :: String -> String -> IO () withHerald herald = hPutStrLn stderr . (printf formatString ("[" <> heraldText <> "] ") <>) - where - -- NOTE: 3 accounts for '[' and '] ' - heraldText = take (heraldLength - 3) herald - formatString = "%-" <> show heraldLength <> "s" + where + -- NOTE: 3 accounts for '[' and '] ' + heraldText = take (heraldLength - 3) herald + formatString = "%-" <> show heraldLength <> "s" logBeginOf :: String -> IO () logBeginOf = withHerald "BEGIN" @@ -775,9 +775,11 @@ summaryRows stats = map toCsvRow $ Map.toList $ summaryFnResults stats where toCsvRow :: (FunId, FnRecoveryResult) -> [String] toCsvRow (FunId faddr nm, res) = - let name = BS.unpack nm - hexAddr = "0x" ++ showHex faddr "" - in [summaryBinaryPath stats, name, hexAddr, show res] + let + name = BS.unpack nm + hexAddr = "0x" ++ showHex faddr "" + in + [summaryBinaryPath stats, name, hexAddr, show res] -- | Function for updating statistics using events capturing during run. recoverLogEvent :: diff --git a/src/Reopt/FunUseMap.hs b/src/Reopt/FunUseMap.hs index cebcfadf..ddf4b250 100644 --- a/src/Reopt/FunUseMap.hs +++ b/src/Reopt/FunUseMap.hs @@ -114,13 +114,15 @@ recordRegionUse :: FunUseMap w -> FunUseMap w recordRegionUse f so sz (FunUseMap regionMap) = do - let a = segoffAddr so - -- Offset of block - off :: BlockOff - off = memWordValue (addrOffset a) - initMap = initFunUseOffsetMap f off (off + sz) - updMap _ = updateFunUseOffsetMap f off (off + sz) - in FunUseMap (Map.insertWith updMap (addrBase a) initMap regionMap) + let + a = segoffAddr so + -- Offset of block + off :: BlockOff + off = memWordValue (addrOffset a) + initMap = initFunUseOffsetMap f off (off + sz) + updMap _ = updateFunUseOffsetMap f off (off + sz) + in + FunUseMap (Map.insertWith updMap (addrBase a) initMap regionMap) -- | Record memory used by block to function address recordBlockUse :: @@ -132,8 +134,9 @@ recordBlockUse f b = do -- Record jumptable backing as well. case pblockTermStmt b of ParsedLookupTable layout _ _ _ -> do - let a = jtlBackingAddr layout - sz = jtlBackingSize layout + let + a = jtlBackingAddr layout + sz = jtlBackingSize layout modify $ recordRegionUse f a sz _ -> pure () @@ -166,15 +169,17 @@ endOwnedByFun f o m regionSize = -- point in the segment that are exclusive allocated to function. lookupFunSize :: MemWidth w => MemSegmentOff w -> FunUseMap w -> Word64 lookupFunSize f (FunUseMap m) = - let seg = segoffSegment f - a = segoffAddr f - o = memWordValue (addrOffset a) - err = error "internal error: Function register index" - offMap = Map.findWithDefault err (addrBase a) m - maxValue = memWordValue (segmentSize seg) - memWordValue (segoffOffset f) - in case Map.lookup o offMap of - Just _ -> endOwnedByFun f o offMap maxValue - o - Nothing -> error $ "Unknown function " ++ showHex o "" + let + seg = segoffSegment f + a = segoffAddr f + o = memWordValue (addrOffset a) + err = error "internal error: Function register index" + offMap = Map.findWithDefault err (addrBase a) m + maxValue = memWordValue (segmentSize seg) - memWordValue (segoffOffset f) + in + case Map.lookup o offMap of + Just _ -> endOwnedByFun f o offMap maxValue - o + Nothing -> error $ "Unknown function " ++ showHex o "" totalFunUseOffsetMapSize :: FunUseOffsetMap w -> Word64 totalFunUseOffsetMapSize = Map.foldlWithKey' (\z s (e, _) -> z + (e - s)) 0 diff --git a/src/Reopt/Hints.hs b/src/Reopt/Hints.hs index 0a582eb7..fe9f5f9f 100644 --- a/src/Reopt/Hints.hs +++ b/src/Reopt/Hints.hs @@ -1,33 +1,31 @@ -{-| -This defines a JSON format for providing hints to Reopt to help its -analysis. --} -module Reopt.Hints - ( FunIdent(..) - , resolveSymName - ) where +-- | +-- This defines a JSON format for providing hints to Reopt to help its +-- analysis. +module Reopt.Hints ( + FunIdent (..), + resolveSymName, +) where -import qualified Data.ByteString.Char8 as BSC -import Data.Hashable -import Data.Word -import Numeric +import Data.ByteString.Char8 qualified as BSC +import Data.Hashable (Hashable (hashWithSalt)) +import Data.Word (Word64) +import Numeric (readHex) -- | A function identifier data FunIdent - = AddrIdent !Word64 - -- ^ A function identified by an offset in the virtual address - -- space. - | SymbolIdent !BSC.ByteString - -- ^ A function identified by a symbol name. + = -- | A function identified by an offset in the virtual address + -- space. + AddrIdent !Word64 + | -- | A function identified by a symbol name. + SymbolIdent !BSC.ByteString deriving (Eq) - instance Hashable FunIdent where - hashWithSalt i (AddrIdent w) = hashWithSalt (hashWithSalt i (0::Int)) w - hashWithSalt i (SymbolIdent w) = hashWithSalt (hashWithSalt i (1::Int)) w + hashWithSalt i (AddrIdent w) = hashWithSalt (hashWithSalt i (0 :: Int)) w + hashWithSalt i (SymbolIdent w) = hashWithSalt (hashWithSalt i (1 :: Int)) w -- | Resolve a hex string or other string as a address of symbol name. resolveSymName :: String -> FunIdent -resolveSymName ('0':'x': nm) | [(w,"")] <- readHex nm = AddrIdent w -resolveSymName ('0':'X': nm) | [(w,"")] <- readHex nm = AddrIdent w +resolveSymName ('0' : 'x' : nm) | [(w, "")] <- readHex nm = AddrIdent w +resolveSymName ('0' : 'X' : nm) | [(w, "")] <- readHex nm = AddrIdent w resolveSymName nm = SymbolIdent (BSC.pack nm) diff --git a/src/Reopt/PLTParser.hs b/src/Reopt/PLTParser.hs index 4aa1969c..0e47f139 100644 --- a/src/Reopt/PLTParser.hs +++ b/src/Reopt/PLTParser.hs @@ -50,9 +50,11 @@ ppBuffer = go . BS.unpack phdrContents :: Integral (Elf.ElfWordType w) => Elf.ElfHeaderInfo w -> Elf.Phdr w -> BS.ByteString phdrContents elf p = - let off = fromIntegral (Elf.phdrFileStart p) - sz = fromIntegral (Elf.phdrFileSize p) - in BS.take sz $ BS.drop off (Elf.headerFileContents elf) + let + off = fromIntegral (Elf.phdrFileStart p) + sz = fromIntegral (Elf.phdrFileSize p) + in + BS.take sz $ BS.drop off (Elf.headerFileContents elf) word32LE :: Word32 -> BS.ByteString word32LE x = BS.pack [w 0, w 1, w 2, w 3] @@ -289,10 +291,12 @@ endbrBndJmpInsns pltBaseAddr gotMap idx actual = do unless (BS.take 7 actual == expectedInsn) $ do Left $ "ebj: " ++ ppBuffer expectedInsn -- Compute index we are jumping to. - let actualDelta :: Word32 - actualDelta = getWord32LE actual 7 - let actualAddr :: Word64 - actualAddr = fromIntegral pltEntryAddr + 11 + fromIntegral actualDelta + let + actualDelta :: Word32 + actualDelta = getWord32LE actual 7 + let + actualAddr :: Word64 + actualAddr = fromIntegral pltEntryAddr + 11 + fromIntegral actualDelta -- Check 5-byte no-op instruction at end. let actualNop = BS.drop 11 actual let expectedNop5 = [nopl5Insn, nopl4Insn <> nopInsn] @@ -373,14 +377,16 @@ jmp8Insns pltAddr idx actual = do unless (BS.take 2 actual == indRelJmpInsnPrefix) $ do Left $ "jm8: Jmp " ++ ppBuffer indRelJmpInsnPrefix -- Indirect jump target - let actualDelta :: Word32 - actualDelta = getWord32LE actual 2 - let actualAddr :: Word64 - actualAddr = - fromIntegral pltEntryAddr - + fromIntegral idx - + 4 - + fromIntegral actualDelta + let + actualDelta :: Word32 + actualDelta = getWord32LE actual 2 + let + actualAddr :: Word64 + actualAddr = + fromIntegral pltEntryAddr + + fromIntegral idx + + 4 + + fromIntegral actualDelta unless (BS.take 2 (BS.drop 6 actual) == nopl2Insn) $ do Left $ "jm8: Nop " ++ ppBuffer nopl2Insn pure $! PLTJmp8 actualAddr @@ -413,33 +419,35 @@ checkPLT elf mpltgotAddr gotMap shdr = Elf.elfClassInstances (Elf.headerClass (E let (cnt, m) = fileSize `quotRem` entrySize when (m /= 0) $ do throwError $ printf "%s is not a multiple of expected entry size." shdrName - let pltAddr :: Elf.ElfWordType w - pltAddr = Elf.shdrAddr shdr + let + pltAddr :: Elf.ElfWordType w + pltAddr = Elf.shdrAddr shdr -- Check entries v <- forM (V.generate cnt fromIntegral) $ \(idx :: Word32) -> do -- Recognizers - let recognizers :: [BS.ByteString -> Either String PLTEntry] - recognizers - | hasPLT8Size pltContents = - [jmp8Insns pltAddr idx] - | otherwise = - case Elf.shdrName shdr of - ".plt" - | Just pltgotAddr <- mpltgotAddr - , idx == 0 -> - [ jmpPushJumpDispatch pltAddr pltgotAddr - , bndPLTInitInsn pltAddr pltgotAddr - ] - | Just pltgotAddr <- mpltgotAddr -> - [ pltIdxInsns pltAddr pltgotAddr idx - , pltBndIdxInsns idx - , endbrBndJmpInsns pltAddr gotMap idx - ] - | otherwise -> [] - ".plt.got" -> [endbrBndJmpInsns pltAddr gotMap idx] - ".plt.sec" -> [endbrBndJmpInsns pltAddr gotMap idx] - _ -> error "Unexpected section header." + let + recognizers :: [BS.ByteString -> Either String PLTEntry] + recognizers + | hasPLT8Size pltContents = + [jmp8Insns pltAddr idx] + | otherwise = + case Elf.shdrName shdr of + ".plt" + | Just pltgotAddr <- mpltgotAddr + , idx == 0 -> + [ jmpPushJumpDispatch pltAddr pltgotAddr + , bndPLTInitInsn pltAddr pltgotAddr + ] + | Just pltgotAddr <- mpltgotAddr -> + [ pltIdxInsns pltAddr pltgotAddr idx + , pltBndIdxInsns idx + , endbrBndJmpInsns pltAddr gotMap idx + ] + | otherwise -> [] + ".plt.got" -> [endbrBndJmpInsns pltAddr gotMap idx] + ".plt.sec" -> [endbrBndJmpInsns pltAddr gotMap idx] + _ -> error "Unexpected section header." -- Get entry contents let off = entrySize * fromIntegral idx let entryContents = BS.take entrySize $ BS.drop off pltContents @@ -496,8 +504,9 @@ checkRela nm dta bs allowed cnt m idx = do if idx >= cnt then pure m else do - let rela :: Elf.RelaEntry Elf.X86_64_RelocationType - rela = Elf.decodeRelaEntry dta bs (fromIntegral idx) + let + rela :: Elf.RelaEntry Elf.X86_64_RelocationType + rela = Elf.decodeRelaEntry dta bs (fromIntegral idx) when (Elf.relaType rela `notElem` allowed) $ do throwError $ printf "%s: Unexpected relocation type %s." nm (show (Elf.relaType rela)) case Elf.relaType rela of @@ -635,28 +644,31 @@ extractPLTEntries elf shdrs = runExcept $ do -- Note. There are binaries with `.plt.got` sections but no DT_PLTGOT section. pltgotAddr <- tryGetDynamicWord dynSection Elf.DT_PLTGOT - let gotAddrMap :: Map.Map Word64 Word32 - gotAddrMap = relaMap <> jmprelMap - let ins :: - PLTMap w -> - Elf.Shdr BS.ByteString (Elf.ElfWordType w) -> - Except String (PLTMap w) - ins m0 shdr - | Elf.shdrName shdr `elem` [".plt", ".plt.got", ".plt.sec"] - , Elf.shdrType shdr /= Elf.SHT_NOBITS = do - (entrySize, pltEntries) <- checkPLT elf pltgotAddr gotAddrMap shdr - let insEntry :: - PLTMap w -> - Int -> - Except String (PLTMap w) - insEntry m i = do - let addr = Elf.shdrAddr shdr + fromIntegral (entrySize * i) - let entry = pltEntries V.! i - e <- resolvePLTEntry gotAddrMap addr entry - pure $! Map.insert addr (e, fromIntegral entrySize) m - foldlM insEntry m0 (V.generate (V.length pltEntries) id) - | otherwise = - pure m0 + let + gotAddrMap :: Map.Map Word64 Word32 + gotAddrMap = relaMap <> jmprelMap + let + ins :: + PLTMap w -> + Elf.Shdr BS.ByteString (Elf.ElfWordType w) -> + Except String (PLTMap w) + ins m0 shdr + | Elf.shdrName shdr `elem` [".plt", ".plt.got", ".plt.sec"] + , Elf.shdrType shdr /= Elf.SHT_NOBITS = do + (entrySize, pltEntries) <- checkPLT elf pltgotAddr gotAddrMap shdr + let + insEntry :: + PLTMap w -> + Int -> + Except String (PLTMap w) + insEntry m i = do + let addr = Elf.shdrAddr shdr + fromIntegral (entrySize * i) + let entry = pltEntries V.! i + e <- resolvePLTEntry gotAddrMap addr entry + pure $! Map.insert addr (e, fromIntegral entrySize) m + foldlM insEntry m0 (V.generate (V.length pltEntries) id) + | otherwise = + pure m0 m <- foldlM ins Map.empty shdrs pure $ Just $ diff --git a/src/Reopt/Relinker.hs b/src/Reopt/Relinker.hs index e185bd90..242e5de1 100644 --- a/src/Reopt/Relinker.hs +++ b/src/Reopt/Relinker.hs @@ -385,18 +385,21 @@ substituteShdrName shstrtab shdr = getShdrTable :: Elf.ElfHeaderInfo 64 -> PureRelinkM (V.Vector (Elf.Shdr BS.ByteString Word64)) getShdrTable binHeaderInfo = do -- Index of section header entry for section name table. - let shstrtabShdrIndex :: Word16 - shstrtabShdrIndex = Elf.shstrtabIndex binHeaderInfo + let + shstrtabShdrIndex :: Word16 + shstrtabShdrIndex = Elf.shstrtabIndex binHeaderInfo when (shstrtabShdrIndex == 0) $ do throwError "Require non-zero shstrtab index." -- Sections in binary - let rawBinShdrs :: V.Vector (Elf.Shdr Word32 Word64) - rawBinShdrs = Elf.headerShdrs binHeaderInfo + let + rawBinShdrs :: V.Vector (Elf.Shdr Word32 Word64) + rawBinShdrs = Elf.headerShdrs binHeaderInfo when (fromIntegral shstrtabShdrIndex >= V.length rawBinShdrs) $ do throwError "Invalid binary section header table" - let binShstrtab :: BS.ByteString - binShstrtab = getShdrContents (rawBinShdrs V.! fromIntegral shstrtabShdrIndex) binHeaderInfo + let + binShstrtab :: BS.ByteString + binShstrtab = getShdrContents (rawBinShdrs V.! fromIntegral shstrtabShdrIndex) binHeaderInfo traverse (substituteShdrName binShstrtab) rawBinShdrs -------------------------------------------------------------------------------- @@ -414,9 +417,11 @@ mkAddrToSectionMap = V.ifoldl' ins Map.empty where ins m idx shdr | (Elf.shdrFlags shdr .&. Elf.shf_alloc) == Elf.shf_alloc = - let addr = Elf.shdrAddr shdr - sz = Elf.shdrSize shdr - in Map.insert addr (sz, fromIntegral idx) m + let + addr = Elf.shdrAddr shdr + sz = Elf.shdrSize shdr + in + Map.insert addr (sz, fromIntegral idx) m | otherwise = m -------------------------------------------------------------------------------- @@ -478,13 +483,15 @@ performObjRelocs :: performObjRelocs elfHdr objShdrs objRelocInfo (NewAddr addr) secIdx = do let elfDta = Elf.headerData (Elf.header elfHdr) let contents = Elf.headerFileContents elfHdr - let shdrCount :: Word32 - shdrCount = fromIntegral (V.length objShdrs) + let + shdrCount :: Word32 + shdrCount = fromIntegral (V.length objShdrs) when (secIdx >= fromIntegral shdrCount) $ do error "Internal error: invalid shdr index" let shdr = objShdrs V.! fromIntegral secIdx - let nm :: String - nm = BSC.unpack (Elf.shdrName shdr) + let + nm :: String + nm = BSC.unpack (Elf.shdrName shdr) code <- pureCheckedSlice (nm <> " section") (Elf.shdrFileRange shdr) contents case findRelaSection objRelocInfo secIdx of Nothing -> do @@ -518,23 +525,24 @@ generateOverflowSection :: [ObjectSectionIndex] -> PureRelinkM Bld.Builder generateOverflowSection objHeaderInfo objShdrs objRelocInfo overflowAddr overflowSections = do - let insObjFun :: - ObjectSectionIndex -> - (NewAddr -> Bld.Builder -> PureRelinkM Bld.Builder) -> - -- \^ Continuation that takes next address and - -- object code built so far. - NewAddr -> - -- \^ Current address - Bld.Builder -> - -- \^ Bytestring built so far. - PureRelinkM Bld.Builder - insObjFun secIdx cont addr prev = do - bytes <- - case runExcept (performObjRelocs objHeaderInfo objShdrs objRelocInfo addr secIdx) of - Left e -> throwError e - Right r -> pure r - let newAddr = incNewAddr addr (BS.length bytes) - cont newAddr (prev <> Bld.byteString bytes) + let + insObjFun :: + ObjectSectionIndex -> + (NewAddr -> Bld.Builder -> PureRelinkM Bld.Builder) -> + -- \^ Continuation that takes next address and + -- object code built so far. + NewAddr -> + -- \^ Current address + Bld.Builder -> + -- \^ Bytestring built so far. + PureRelinkM Bld.Builder + insObjFun secIdx cont addr prev = do + bytes <- + case runExcept (performObjRelocs objHeaderInfo objShdrs objRelocInfo addr secIdx) of + Left e -> throwError e + Right r -> pure r + let newAddr = incNewAddr addr (BS.length bytes) + cont newAddr (prev <> Bld.byteString bytes) foldr insObjFun (\_ p -> pure p) overflowSections overflowAddr mempty -- | Make the new code program header @@ -561,37 +569,39 @@ mkBinCodeContent binContents objHeaderInfo objShdrs objRelocInfo objInfo codeAdd let codeEndAddr = codeAddr + fromIntegral codeSize -- Map from addresses of functions to be replaced in -- binary to the section index of that object - let objBinCodeMap :: Map Word64 ObjectSectionIndex - objBinCodeMap = - Map.takeWhileAntitone (< codeEndAddr) $ - Map.dropWhileAntitone (< codeAddr) $ - replaceAddrSectionMap objInfo + let + objBinCodeMap :: Map Word64 ObjectSectionIndex + objBinCodeMap = + Map.takeWhileAntitone (< codeEndAddr) $ + Map.dropWhileAntitone (< codeAddr) $ + replaceAddrSectionMap objInfo -- Infer bytes in regions - let insObjBin :: - Word64 -> - -- \^ Address of next function in object file. - ObjectSectionIndex -> - -- \^ Section index in object file. - ((Word64, BS.ByteString, Bld.Builder) -> Except String a) -> - (Word64, BS.ByteString, Bld.Builder) -> - Except String a - insObjBin nextObjAddr secIdx cont (prevEndAddr, curBytes, prev) = do - when (nextObjAddr < prevEndAddr) $ do - throwError $ - printf - "Expected next function %s after previous function ends %s." - (showHex nextObjAddr "") - (showHex prevEndAddr "") - -- Get size of binary data to copy from last address to new one. - let binSize = fromIntegral (nextObjAddr - prevEndAddr) - -- Get bytes in binary in front of object code. - let binCopy = Bld.byteString (BS.take binSize curBytes) - -- Get relocation code - bytes <- performObjRelocs objHeaderInfo objShdrs objRelocInfo (NewAddr nextObjAddr) secIdx - -- Go to next - let thisEndAddr = nextObjAddr + fromIntegral (BS.length bytes) - let nextBytes = BS.drop (fromIntegral (thisEndAddr - prevEndAddr)) curBytes - cont (thisEndAddr, nextBytes, prev <> binCopy <> Bld.byteString bytes) + let + insObjBin :: + Word64 -> + -- \^ Address of next function in object file. + ObjectSectionIndex -> + -- \^ Section index in object file. + ((Word64, BS.ByteString, Bld.Builder) -> Except String a) -> + (Word64, BS.ByteString, Bld.Builder) -> + Except String a + insObjBin nextObjAddr secIdx cont (prevEndAddr, curBytes, prev) = do + when (nextObjAddr < prevEndAddr) $ do + throwError $ + printf + "Expected next function %s after previous function ends %s." + (showHex nextObjAddr "") + (showHex prevEndAddr "") + -- Get size of binary data to copy from last address to new one. + let binSize = fromIntegral (nextObjAddr - prevEndAddr) + -- Get bytes in binary in front of object code. + let binCopy = Bld.byteString (BS.take binSize curBytes) + -- Get relocation code + bytes <- performObjRelocs objHeaderInfo objShdrs objRelocInfo (NewAddr nextObjAddr) secIdx + -- Go to next + let thisEndAddr = nextObjAddr + fromIntegral (BS.length bytes) + let nextBytes = BS.drop (fromIntegral (thisEndAddr - prevEndAddr)) curBytes + cont (thisEndAddr, nextBytes, prev <> binCopy <> Bld.byteString bytes) let finish (_, remaining, prev) = pure $ prev <> Bld.byteString remaining Map.foldrWithKey insObjBin finish objBinCodeMap (codeAddr, codeBytes, mempty) @@ -612,17 +622,20 @@ getObjectSymbols objHeaderInfo objShdrs objSymtabIndex = do throwError "Could not find object file symbol table." pure $ objShdrs V.! fromIntegral objSymtabIndex - let objSymtab :: BS.ByteString - objSymtab = getShdrContents objSymtabShdr objHeaderInfo + let + objSymtab :: BS.ByteString + objSymtab = getShdrContents objSymtabShdr objHeaderInfo -- Get object string table. - let objStrtabIdx :: Int - objStrtabIdx = fromIntegral (Elf.shdrLink objSymtabShdr) + let + objStrtabIdx :: Int + objStrtabIdx = fromIntegral (Elf.shdrLink objSymtabShdr) when (objStrtabIdx >= V.length objShdrs) $ do throwError "Invalid binary string table index." - let objStrtab :: BS.ByteString - objStrtab = getShdrContents (objShdrs V.! objStrtabIdx) objHeaderInfo + let + objStrtab :: BS.ByteString + objStrtab = getShdrContents (objShdrs V.! objStrtabIdx) objHeaderInfo case Elf.decodeSymtab cl elfDta objStrtab objSymtab of Left _e -> throwError "Could not parse object file symbol table." @@ -683,23 +696,26 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do -- Code program header in binary let binCodePhdrIndex = Bin.eclCodePhdrIndex binLayout - let binPhdrs :: V.Vector (Elf.Phdr 64) - binPhdrs = - V.generate - (fromIntegral (Elf.phdrCount binHeaderInfo)) - (Elf.phdrByIndex binHeaderInfo . fromIntegral) + let + binPhdrs :: V.Vector (Elf.Phdr 64) + binPhdrs = + V.generate + (fromIntegral (Elf.phdrCount binHeaderInfo)) + (Elf.phdrByIndex binHeaderInfo . fromIntegral) - let binCodePhdr :: Elf.Phdr 64 - binCodePhdr = binPhdrs V.! fromIntegral binCodePhdrIndex + let + binCodePhdr :: Elf.Phdr 64 + binCodePhdr = binPhdrs V.! fromIntegral binCodePhdrIndex binShdrIndexInfo <- mkBinarySectionLayout binShdrs -- End of code section - let binCodeEndOffset :: Elf.FileOffset Word64 - binCodeEndOffset = - Elf.incOffset - (Elf.phdrFileStart binCodePhdr) - (Elf.phdrFileSize binCodePhdr) + let + binCodeEndOffset :: Elf.FileOffset Word64 + binCodeEndOffset = + Elf.incOffset + (Elf.phdrFileStart binCodePhdr) + (Elf.phdrFileSize binCodePhdr) ----------------------------------------------------------------------------- -- 2. Collect object information. @@ -716,8 +732,9 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do -- Check object assumptions checkObjectHeaderAssumptions objHdr - let objectNameInfoMap :: Map BS.ByteString ObjFunDef - objectNameInfoMap = mapFromFuns ofdObjName id (mrObjectFuns ctx) + let + objectNameInfoMap :: Map BS.ByteString ObjFunDef + objectNameInfoMap = mapFromFuns ofdObjName id (mrObjectFuns ctx) -- Get section headers for objects objShdrs <- @@ -755,10 +772,12 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do objSymbols <- getObjectSymbols objHeaderInfo objShdrs (objinfoSymtab objInfo) -- Number of bytes in overflow sections - let codeOverflowSize :: Word64 - codeOverflowSize = objOverflowSize objInfo - let rodataOverflowSize :: Word64 - rodataOverflowSize = objRodataSize objInfo + let + codeOverflowSize :: Word64 + codeOverflowSize = objOverflowSize objInfo + let + rodataOverflowSize :: Word64 + rodataOverflowSize = objRodataSize objInfo let codeEndOff = Elf.incOffset (Elf.phdrFileStart binCodePhdr) (Elf.phdrFileSize binCodePhdr) @@ -783,28 +802,30 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do Just idx -> pure (Just (fromIntegral idx :: Word16)) -- Map section indices in binary to number of sections inserted before that section. - let insertedSectionIndices :: Map Word16 Word16 - insertedSectionIndices = Map.fromList $ - case codeOverflowShdrIndex of - Nothing -> - case rodataOverflowShdrIndex of - Nothing -> [] - Just ridx -> [(ridx, 1)] - Just cidx -> - case rodataOverflowShdrIndex of - Nothing -> [(cidx, 1)] - Just ridx -> - case compare cidx ridx of - LT -> [(cidx, 1), (ridx, 2)] - EQ -> [(cidx, 2)] - GT -> [(ridx, 1), (cidx, 2)] + let + insertedSectionIndices :: Map Word16 Word16 + insertedSectionIndices = Map.fromList $ + case codeOverflowShdrIndex of + Nothing -> + case rodataOverflowShdrIndex of + Nothing -> [] + Just ridx -> [(ridx, 1)] + Just cidx -> + case rodataOverflowShdrIndex of + Nothing -> [(cidx, 1)] + Just ridx -> + case compare cidx ridx of + LT -> [(cidx, 1), (ridx, 2)] + EQ -> [(cidx, 2)] + GT -> [(ridx, 1), (cidx, 2)] -- Map a section heder in binary to new section - let binShdrIndexMap :: Word16 -> Word16 - binShdrIndexMap i = - case Map.lookupLE i insertedSectionIndices of - Just (_, c) -> i + c - Nothing -> i + let + binShdrIndexMap :: Word16 -> Word16 + binShdrIndexMap i = + case Map.lookupLE i insertedSectionIndices of + Just (_, c) -> i + c + Nothing -> i newSymtab <- if bslSymtabIndex binShdrIndexInfo == 0 @@ -813,20 +834,21 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do let addrToSec = mkAddrToSectionMap binShdrs -- Map symbol names in object file to new section index and offset -- within new section. - let addrOfObjSymbol :: BS.ByteString -> Maybe (New.NewSectionIndex, Word64) - addrOfObjSymbol nm = do - case Map.lookup nm (objinfoSymbolAddrMap objInfo) of - Nothing -> Nothing - Just (OrigBinaryAddr addr) -> do - case Map.lookupLE addr addrToSec of - Just (base, (sz, idx)) - | addr < base + sz -> - Just (NewSectionIndex (binShdrIndexMap idx), addr - base) - _ -> Nothing - Just (OverflowSection off) -> do - case codeOverflowShdrIndex of - Nothing -> error "internal: Did not expect function in overflow section." - Just idx -> Just (NewSectionIndex idx, off) + let + addrOfObjSymbol :: BS.ByteString -> Maybe (New.NewSectionIndex, Word64) + addrOfObjSymbol nm = do + case Map.lookup nm (objinfoSymbolAddrMap objInfo) of + Nothing -> Nothing + Just (OrigBinaryAddr addr) -> do + case Map.lookupLE addr addrToSec of + Just (base, (sz, idx)) + | addr < base + sz -> + Just (NewSectionIndex (binShdrIndexMap idx), addr - base) + _ -> Nothing + Just (OverflowSection off) -> do + case codeOverflowShdrIndex of + Nothing -> error "internal: Did not expect function in overflow section." + Just idx -> Just (NewSectionIndex idx, off) case runExcept $ mkNewSymtab ctx binHeaderInfo binShdrs (bslSymtabIndex binShdrIndexInfo) binShdrIndexMap objSymbols addrOfObjSymbol of Left e -> throwError e Right r -> pure (Just r) @@ -835,53 +857,59 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do -- 3. Compute file layout -- Start of overflow section offset. - let overflowOffset :: Elf.FileOffset Word64 - overflowOffset = Elf.alignFileOffset 16 binCodeEndOffset + let + overflowOffset :: Elf.FileOffset Word64 + overflowOffset = Elf.alignFileOffset 16 binCodeEndOffset -- File offset for end of overflow let codeOverflowEndOffset = Elf.incOffset overflowOffset codeOverflowSize -- Number of bytes in padding between end of old code and overflow code section. - let overflowPadding :: Word64 - overflowPadding = Elf.fromFileOffset overflowOffset - Elf.fromFileOffset binCodeEndOffset + let + overflowPadding :: Word64 + overflowPadding = Elf.fromFileOffset overflowOffset - Elf.fromFileOffset binCodeEndOffset -- .shstrtab contents - let newShstrtabContents :: BS.ByteString - shstrtabOffsetMap :: Map BS.ByteString Word32 - (newShstrtabContents, shstrtabOffsetMap) = - let binNames = Elf.shdrName <$> V.toList binShdrs - overflowCodeShdrNames - | codeOverflowSize > 0 = [newCodeSectionName] - | otherwise = [] - overflowRodataShdrNames - | rodataOverflowSize > 0 = [newRodataSectionName] - | otherwise = [] - allNames = binNames ++ overflowCodeShdrNames ++ overflowRodataShdrNames - in Elf.encodeStringTable allNames + let + newShstrtabContents :: BS.ByteString + shstrtabOffsetMap :: Map BS.ByteString Word32 + (newShstrtabContents, shstrtabOffsetMap) = + let + binNames = Elf.shdrName <$> V.toList binShdrs + overflowCodeShdrNames + | codeOverflowSize > 0 = [newCodeSectionName] + | otherwise = [] + overflowRodataShdrNames + | rodataOverflowSize > 0 = [newRodataSectionName] + | otherwise = [] + allNames = binNames ++ overflowCodeShdrNames ++ overflowRodataShdrNames + in + Elf.encodeStringTable allNames -- Create file indices where new data is added. - let layoutSegmentAppends :: [(Int, Int)] - layoutSegmentAppends - | objRodataSize objInfo == 0 = do - let newCodeSize = Elf.fromFileOffset (codeOverflowEndOffset - binCodeEndOffset) - [(fromIntegral binCodeEndOffset, fromIntegral newCodeSize)] - | otherwise = - case Bin.eclRodataPhdrIndex binLayout of - -- No rodata -- put rodata after overflow. - Nothing -> do - let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset - let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) - let sz = fromIntegral $ newRodataEndOffset - binCodeEndOffset - [(fromIntegral binCodeEndOffset, sz)] - -- Separate rodata - Just rodataPhdrIdx -> do - let rodataEndOffset = phdrEndOff $ Elf.phdrByIndex binHeaderInfo rodataPhdrIdx - let newCodeSize = Elf.fromFileOffset $ codeOverflowEndOffset - binCodeEndOffset - let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset - let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) - let newRodataSize = newRodataEndOffset - rodataEndOffset - [ (fromIntegral binCodeEndOffset, fromIntegral newCodeSize) - , (fromIntegral rodataEndOffset, fromIntegral newRodataSize) - ] + let + layoutSegmentAppends :: [(Int, Int)] + layoutSegmentAppends + | objRodataSize objInfo == 0 = do + let newCodeSize = Elf.fromFileOffset (codeOverflowEndOffset - binCodeEndOffset) + [(fromIntegral binCodeEndOffset, fromIntegral newCodeSize)] + | otherwise = + case Bin.eclRodataPhdrIndex binLayout of + -- No rodata -- put rodata after overflow. + Nothing -> do + let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset + let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) + let sz = fromIntegral $ newRodataEndOffset - binCodeEndOffset + [(fromIntegral binCodeEndOffset, sz)] + -- Separate rodata + Just rodataPhdrIdx -> do + let rodataEndOffset = phdrEndOff $ Elf.phdrByIndex binHeaderInfo rodataPhdrIdx + let newCodeSize = Elf.fromFileOffset $ codeOverflowEndOffset - binCodeEndOffset + let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset + let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) + let newRodataSize = newRodataEndOffset - rodataEndOffset + [ (fromIntegral binCodeEndOffset, fromIntegral newCodeSize) + , (fromIntegral rodataEndOffset, fromIntegral newRodataSize) + ] -- Create new binary layout newBinLayout <- do @@ -921,48 +949,53 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do -- Address of start of overflow section. -- This is the of the binary code segment rounded up to nearest multiple of 16. - let codeOverflowAddr :: Word64 - codeOverflowAddr = codeEndAddr + overflowPadding - let codeOverflowEndAddr :: Word64 - codeOverflowEndAddr = codeOverflowAddr + codeOverflowSize + let + codeOverflowAddr :: Word64 + codeOverflowAddr = codeEndAddr + overflowPadding + let + codeOverflowEndAddr :: Word64 + codeOverflowEndAddr = codeOverflowAddr + codeOverflowSize -- rodataOverflowEndOffAndAddr - let rodataOverflowEndOffAndAddr :: Maybe (Elf.FileOffset Word64, Word64) - rodataOverflowEndOffAndAddr - | objRodataSize objInfo == 0 = Nothing - | otherwise = - case Bin.eclRodataPhdrIndex binLayout of - -- No rodata -- put rodata after overflow. - Nothing -> do - let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset - let padding = Elf.fromFileOffset (newRodataOffset - codeOverflowEndOffset) - Just (codeOverflowEndOffset, codeOverflowEndAddr + padding) - -- Separate rodata - Just rodataPhdrIdx -> do - let rodataPhdr = Elf.phdrByIndex binHeaderInfo rodataPhdrIdx - let rodataAddr = Elf.phdrSegmentVirtAddr rodataPhdr - let rodataOffset = Elf.phdrFileStart rodataPhdr - let rodataEndOffset = Elf.incOffset rodataOffset (Elf.phdrFileSize rodataPhdr) - let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset - let size = Elf.fromFileOffset (newRodataOffset - rodataOffset) - Just (rodataEndOffset, rodataAddr + size) + let + rodataOverflowEndOffAndAddr :: Maybe (Elf.FileOffset Word64, Word64) + rodataOverflowEndOffAndAddr + | objRodataSize objInfo == 0 = Nothing + | otherwise = + case Bin.eclRodataPhdrIndex binLayout of + -- No rodata -- put rodata after overflow. + Nothing -> do + let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset + let padding = Elf.fromFileOffset (newRodataOffset - codeOverflowEndOffset) + Just (codeOverflowEndOffset, codeOverflowEndAddr + padding) + -- Separate rodata + Just rodataPhdrIdx -> do + let rodataPhdr = Elf.phdrByIndex binHeaderInfo rodataPhdrIdx + let rodataAddr = Elf.phdrSegmentVirtAddr rodataPhdr + let rodataOffset = Elf.phdrFileStart rodataPhdr + let rodataEndOffset = Elf.incOffset rodataOffset (Elf.phdrFileSize rodataPhdr) + let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset + let size = Elf.fromFileOffset (newRodataOffset - rodataOffset) + Just (rodataEndOffset, rodataAddr + size) -- Compute information needed to resolve relocations in object file. objRelocInfo <- do - let addrOfObjSec :: ObjectSectionIndex -> Maybe Word64 - addrOfObjSec idx = - case Map.lookup idx (objSectionAddrMap objInfo) of - Nothing -> Nothing - Just (BinAddr a) -> Just a - Just (OverflowAddr o) -> Just (codeOverflowEndAddr + o) - Just (RodataAddr o) -> - case rodataOverflowEndOffAndAddr of - Nothing -> error "Unexpected rodata address." - Just (_, a) -> Just (a + o) - let undefAddrMap :: Map BS.ByteString Word64 - undefAddrMap = - let ins m r = Map.insert (ofrObjName r) (ofrBinAddr r) m - in foldl' ins Map.empty (mrUndefinedFuns ctx) + let + addrOfObjSec :: ObjectSectionIndex -> Maybe Word64 + addrOfObjSec idx = + case Map.lookup idx (objSectionAddrMap objInfo) of + Nothing -> Nothing + Just (BinAddr a) -> Just a + Just (OverflowAddr o) -> Just (codeOverflowEndAddr + o) + Just (RodataAddr o) -> + case rodataOverflowEndOffAndAddr of + Nothing -> error "Unexpected rodata address." + Just (_, a) -> Just (a + o) + let + undefAddrMap :: Map BS.ByteString Word64 + undefAddrMap = + let ins m r = Map.insert (ofrObjName r) (ofrBinAddr r) m + in foldl' ins Map.empty (mrUndefinedFuns ctx) case mkRelocInfo (`Map.lookup` undefAddrMap) addrOfObjSec objSymbols objShdrs of Left err -> throwError $ "Object file error: " ++ err Right r -> pure r @@ -979,86 +1012,91 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do generateOverflowSection objHeaderInfo objShdrs objRelocInfo (NewAddr addr) overflowSections -- List of program headers, new size and contents - let phdrAppends :: [(Word16, Word64, [(Word64, Bld.Builder)])] - phdrAppends - | rodataOverflowSize == 0 = do - let sz = Elf.fromFileOffset (codeOverflowEndOffset - Elf.phdrFileStart binCodePhdr) - [(binCodePhdrIndex, sz, [(codeOverflowSize, objOverflowCode)])] - | otherwise = - case Bin.eclRodataPhdrIndex binLayout of - -- No rodata -- put rodata after overflow. - Nothing -> do - let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset - let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) - [ - ( binCodePhdrIndex - , Elf.fromFileOffset (newRodataEndOffset - Elf.phdrFileStart binCodePhdr) - , [(codeOverflowSize, objOverflowCode), (objRodataSize objInfo, newRodataContents)] - ) - ] - -- Separate rodata - Just rodataPhdrIdx -> do - let rodataPhdr = Elf.phdrByIndex binHeaderInfo rodataPhdrIdx - let rodataEndOffset = phdrEndOff rodataPhdr - let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset - let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) - [ - ( binCodePhdrIndex - , Elf.fromFileOffset (codeOverflowEndOffset - Elf.phdrFileStart binCodePhdr) - , [(codeOverflowSize, objOverflowCode)] - ) + let + phdrAppends :: [(Word16, Word64, [(Word64, Bld.Builder)])] + phdrAppends + | rodataOverflowSize == 0 = do + let sz = Elf.fromFileOffset (codeOverflowEndOffset - Elf.phdrFileStart binCodePhdr) + [(binCodePhdrIndex, sz, [(codeOverflowSize, objOverflowCode)])] + | otherwise = + case Bin.eclRodataPhdrIndex binLayout of + -- No rodata -- put rodata after overflow. + Nothing -> do + let newRodataOffset = Elf.alignFileOffset 16 codeOverflowEndOffset + let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) + [ + ( binCodePhdrIndex + , Elf.fromFileOffset (newRodataEndOffset - Elf.phdrFileStart binCodePhdr) + , [(codeOverflowSize, objOverflowCode), (objRodataSize objInfo, newRodataContents)] + ) + ] + -- Separate rodata + Just rodataPhdrIdx -> do + let rodataPhdr = Elf.phdrByIndex binHeaderInfo rodataPhdrIdx + let rodataEndOffset = phdrEndOff rodataPhdr + let newRodataOffset = Elf.alignFileOffset 16 rodataEndOffset + let newRodataEndOffset = Elf.incOffset newRodataOffset (objRodataSize objInfo) + [ + ( binCodePhdrIndex + , Elf.fromFileOffset (codeOverflowEndOffset - Elf.phdrFileStart binCodePhdr) + , [(codeOverflowSize, objOverflowCode)] + ) + , + ( rodataPhdrIdx + , Elf.fromFileOffset (newRodataEndOffset - Elf.phdrFileStart rodataPhdr) , - ( rodataPhdrIdx - , Elf.fromFileOffset (newRodataEndOffset - Elf.phdrFileStart rodataPhdr) - , - [ (objRodataSize objInfo, newRodataContents) - ] - ) - ] + [ (objRodataSize objInfo, newRodataContents) + ] + ) + ] -- Return offset in new .shstrtab of name - let shdrNameFn :: BS.ByteString -> Word32 - shdrNameFn nm = - let msg = "internal failure: Missing section header name." - in Map.findWithDefault (error msg) nm shstrtabOffsetMap + let + shdrNameFn :: BS.ByteString -> Word32 + shdrNameFn nm = + let msg = "internal failure: Missing section header name." + in Map.findWithDefault (error msg) nm shstrtabOffsetMap -- Map from section indices to contents to insert before them. - let insSecMap :: Map Word16 [Elf.Shdr Word32 Word64] - insSecMap = Map.fromList $ do - let mcodeShdr = - case codeOverflowShdrIndex of - Nothing -> Nothing - Just idx -> - let codeShdr = - New.newBinaryCodeSection - (shdrNameFn newCodeSectionName) - (Elf.shf_alloc .|. Elf.shf_execinstr) - codeOverflowAddr - binCodeEndOffset - codeOverflowSize - in Just (idx, codeShdr) - let mrodataShdr = - case (rodataOverflowShdrIndex, rodataOverflowEndOffAndAddr) of - (Just idx, Just (endOff, addr)) -> - let off = Elf.alignFileOffset 16 endOff - nm = shdrNameFn newRodataSectionName - sz = rodataOverflowSize - shdr = New.newBinaryCodeSection nm Elf.shf_alloc addr off sz - in Just (idx, shdr) - _ -> Nothing - case mcodeShdr of - Nothing -> - case mrodataShdr of - Nothing -> [] - Just (ridx, rshdr) -> [(ridx, [rshdr])] - Just (cidx, cshdr) -> do - case mrodataShdr of - Nothing -> [(cidx, [cshdr])] - Just (ridx, rshdr) -> - case compare cidx ridx of - LT -> [(,) cidx [cshdr], (,) ridx [rshdr]] - EQ -> [(,) cidx [cshdr, rshdr]] - GT -> [(,) ridx [rshdr], (,) cidx [cshdr]] + let + insSecMap :: Map Word16 [Elf.Shdr Word32 Word64] + insSecMap = Map.fromList $ do + let mcodeShdr = + case codeOverflowShdrIndex of + Nothing -> Nothing + Just idx -> + let codeShdr = + New.newBinaryCodeSection + (shdrNameFn newCodeSectionName) + (Elf.shf_alloc .|. Elf.shf_execinstr) + codeOverflowAddr + binCodeEndOffset + codeOverflowSize + in Just (idx, codeShdr) + let mrodataShdr = + case (rodataOverflowShdrIndex, rodataOverflowEndOffAndAddr) of + (Just idx, Just (endOff, addr)) -> + let + off = Elf.alignFileOffset 16 endOff + nm = shdrNameFn newRodataSectionName + sz = rodataOverflowSize + shdr = New.newBinaryCodeSection nm Elf.shf_alloc addr off sz + in + Just (idx, shdr) + _ -> Nothing + case mcodeShdr of + Nothing -> + case mrodataShdr of + Nothing -> [] + Just (ridx, rshdr) -> [(ridx, [rshdr])] + Just (cidx, cshdr) -> do + case mrodataShdr of + Nothing -> [(cidx, [cshdr])] + Just (ridx, rshdr) -> + case compare cidx ridx of + LT -> [(,) cidx [cshdr], (,) ridx [rshdr]] + EQ -> [(,) cidx [cshdr, rshdr]] + GT -> [(,) ridx [rshdr], (,) cidx [cshdr]] -- Compute context for new binary layout. let newBuildCtx = @@ -1076,9 +1114,11 @@ mergeObject binHeaderInfo objName objContents ctx = runPureRelinkM $ do , New.bctxAppendMap = Map.fromList $ let f (idx, _, l) = - let p = Elf.phdrByIndex binHeaderInfo idx - endOff = Elf.incOffset (Elf.phdrFileStart p) (Elf.phdrFileSize p) - in (endOff, l) + let + p = Elf.phdrByIndex binHeaderInfo idx + endOff = Elf.incOffset (Elf.phdrFileStart p) (Elf.phdrFileSize p) + in + (endOff, l) in f <$> phdrAppends , New.bctxNewSymtab = newSymtab , New.bctxFileOffsetFn = New.nblFindNewOffset newBinLayout diff --git a/src/Reopt/TypeInference/ConstraintGen.hs b/src/Reopt/TypeInference/ConstraintGen.hs index e2c35fd9..dd5c96e9 100644 --- a/src/Reopt/TypeInference/ConstraintGen.hs +++ b/src/Reopt/TypeInference/ConstraintGen.hs @@ -12,30 +12,10 @@ module Reopt.TypeInference.ConstraintGen ( showInferredTypes, ) where -import Control.Lens ( - At (at), - Getting, - Ixed (ix), - Lens', - makeLenses, - non, - set, - use, - view, - (<>=), - (?=), - (^?), - ) -import Control.Monad (mapAndUnzipM) -import Control.Monad.Reader ( - MonadReader (ask), - ReaderT (..), - asks, - join, - local, - withReaderT, - zipWithM_, - ) +import Control.Lens ((<>=), (?=), (^?)) +import Control.Lens qualified as L +import Control.Monad (join, mapAndUnzipM, zipWithM_) +import Control.Monad.Reader qualified as Reader import Control.Monad.State.Strict (MonadState, StateT, evalStateT) import Control.Monad.Trans (lift) import Data.Bits (testBit) @@ -171,7 +151,7 @@ data CGenGlobalContext arch = CGenGlobalContext -- ^ The map from memory segments to their row types. } -makeLenses ''CGenGlobalContext +L.makeLenses ''CGenGlobalContext -- | Context available when generating constraints for a given module data CGenModuleContext arch = CGenModuleContext @@ -184,7 +164,7 @@ data CGenModuleContext arch = CGenModuleContext -- ^ Enclosing global context } -makeLenses ''CGenModuleContext +L.makeLenses ''CGenModuleContext -- | Context available when generating constraints for a given function data CGenFunctionContext arch = CGenFunctionContext @@ -201,7 +181,7 @@ data CGenFunctionContext arch = CGenFunctionContext -- ^ Enclosing module context } -makeLenses ''CGenFunctionContext +L.makeLenses ''CGenFunctionContext -- | Context available when generating constraints for a given block. At the -- moment, I managed to make it so that we don't need anything special, but it's @@ -215,7 +195,7 @@ data CGenBlockContext arch = CGenBlockContext -- ^ Enclosing function context } -makeLenses ''CGenBlockContext +L.makeLenses ''CGenBlockContext data CGenState arch = CGenState { _assignTyVars :: Map BSC.ByteString (Map FnAssignId TyVar) @@ -225,11 +205,11 @@ data CGenState arch = CGenState _warnings :: [Warning] } -makeLenses ''CGenState +L.makeLenses ''CGenState newtype CGenM ctx arch a = CGenM { _getCGenM :: - ReaderT + Reader.ReaderT (ctx arch) (StateT (CGenState arch) SolverM) a @@ -238,7 +218,7 @@ newtype CGenM ctx arch a = CGenM ( Functor , Applicative , Monad - , MonadReader (ctx arch) + , Reader.MonadReader (ctx arch) , MonadState (CGenState arch) ) @@ -246,7 +226,7 @@ withinContext :: (outer arch -> inner arch) -> CGenM inner arch a -> CGenM outer arch a -withinContext f (CGenM m) = CGenM (withReaderT f m) +withinContext f (CGenM m) = CGenM (Reader.withReaderT f m) inSolverM :: SolverM a -> CGenM ctxt arch a inSolverM = CGenM . lift . lift @@ -266,7 +246,7 @@ runCGenM mem trace orig (CGenM m) = runSolverM trace orig ptrWidth $ do { _cgenMemory = mem , _cgenMemoryRegions = memRows } - evalStateT (runReaderT m ctxt0) st0 + evalStateT (Reader.runReaderT m ctxt0) st0 where ptrWidth = widthVal (memWidth mem) @@ -288,8 +268,8 @@ warn s = CGenM $ warnings <>= [Warning s] atFnAssignId :: BSC.ByteString -> FnAssignId -> - Lens' (CGenState arch) (Maybe TyVar) -atFnAssignId fn aId = assignTyVars . at fn . non Map.empty . at aId + L.Lens' (CGenState arch) (Maybe TyVar) +atFnAssignId fn aId = assignTyVars . L.at fn . L.non Map.empty . L.at aId -- | Retrieves the type variable associated to the given `FnAssignId`, if any, -- otherwise creates and registers a fresh type variable for it. @@ -303,7 +283,7 @@ tyVarForAssignId :: FnAssignId -> CGenM ctx arch TyVar tyVarForAssignId fn aId = do - mtv <- CGenM $ use (atFnAssignId fn aId) + mtv <- CGenM $ L.use (atFnAssignId fn aId) case mtv of Just tv -> pure tv Nothing -> do @@ -323,7 +303,7 @@ tyVarForAssignId fn aId = do assignIdTyVar :: BSC.ByteString -> FnAssignId -> CGenM ctx arch TyVar assignIdTyVar fn aId = do -- fn <- askContext cgenCurrentFunName - mTyVar <- CGenM $ use (atFnAssignId fn aId) + mTyVar <- CGenM $ L.use (atFnAssignId fn aId) case mTyVar of Nothing -> tyVarForAssignId fn aId Just tyVar -> pure tyVar @@ -358,12 +338,12 @@ phiType = assignIdType . unFnPhiVar argumentType :: Int -> CGenM CGenBlockContext arch Ty argumentType i = do tys <- fttvArgs <$> askContext (cgenFunctionContext . cgenCurrentFun) - case tys ^? ix i of + case tys ^? L.ix i of Nothing -> error "Missing argument" Just ty -> pure (varTy ty) -askContext :: Getting a (ctx arch) a -> CGenM ctx arch a -askContext = CGenM . ask . view +askContext :: L.Getting a (ctx arch) a -> CGenM ctx arch a +askContext = CGenM . Reader.ask . L.view addrWidth :: CGenM CGenBlockContext arch (NatRepr (ArchAddrWidth arch)) addrWidth = @@ -412,9 +392,9 @@ functionTypeTyVars :: CGenM CGenBlockContext arch (Maybe FunctionTypeTyVars) functionTypeTyVars saddr = do moduleContext <- askContext (cgenFunctionContext . cgenModuleContext) - let ftypes = view cgenFunTypes moduleContext - let namedftypes = view cgenNamedFunTypes moduleContext - let mem = view (cgenGlobalContext . cgenMemory) moduleContext + let ftypes = L.view cgenFunTypes moduleContext + let namedftypes = L.view cgenNamedFunTypes moduleContext + let mem = L.view (cgenGlobalContext . cgenMemory) moduleContext let aWidth = memAddrWidth mem case saddr of @@ -549,7 +529,7 @@ genApp :: App (FnValue arch) tp -> CGenM CGenBlockContext arch () genApp (ty, outSize) app = do - prov <- asks $ view cgenConstraintProv + prov <- Reader.asks $ L.view cgenConstraintProv case app of Eq l r -> do join (emitEq prov <$> genFnValue l <*> genFnValue r) @@ -692,7 +672,7 @@ genMemOp :: Some TypeRepr -> CGenM CGenBlockContext arch () genMemOp ty ptr (Some tp) = do - prov <- asks $ view cgenConstraintProv + prov <- Reader.asks $ L.view cgenConstraintProv ptrWidth <- widthVal <$> addrWidth emitPtr prov ty =<< genFnValue ptr case tp of @@ -730,7 +710,7 @@ genFnAssignment :: FnArchConstraints arch => FnAssignment arch tp -> CGenM CGenBlockContext arch () -genFnAssignment a = local (set cgenConstraintProv prov) $ do +genFnAssignment a = Reader.local (L.set cgenConstraintProv prov) $ do fn <- askContext (cgenFunctionContext . cgenCurrentFunName) ty <- varTy <$> tyVarForAssignId fn (fnAssignId a) case rhs of @@ -769,7 +749,7 @@ genFnStmt :: FnStmt arch -> CGenM CGenBlockContext arch () genFnStmt stmt = - local (set cgenConstraintProv prov) $ + Reader.local (L.set cgenConstraintProv prov) $ case stmt of FnComment _ -> pure () FnAssignStmt a -> genFnAssignment a @@ -972,16 +952,17 @@ genModuleConstraints m mem trace orig = runCGenM mem trace orig $ do (defAddrs, defSyms) <- mapAndUnzipM doDef (recoveredDefs m) - let symMap = Map.fromList (defSyms ++ declSyms) - addrMap = Map.fromList (defAddrs ++ declAddrs) + let + symMap = Map.fromList (defSyms ++ declSyms) + addrMap = Map.fromList (defAddrs ++ declAddrs) withinContext (CGenModuleContext addrMap symMap) (mapM_ genFunction (recoveredDefs m)) -- FIXME: abstract - tyVars <- CGenM $ use assignTyVars - warns <- CGenM $ use warnings + tyVars <- CGenM $ L.use assignTyVars + warns <- CGenM $ L.use warnings tyMap <- inSolverM unifyConstraints diff --git a/src/Reopt/TypeInference/DebugTypes.hs b/src/Reopt/TypeInference/DebugTypes.hs index 69bff65d..7049282d 100644 --- a/src/Reopt/TypeInference/DebugTypes.hs +++ b/src/Reopt/TypeInference/DebugTypes.hs @@ -251,11 +251,13 @@ resolveDwarfSubprogramDebugName sub moff case moff of Nothing -> Nothing Just o -> - let nmVal :: String - nmVal - | Dwarf.subName sub == "" = "Unnamed function" - | otherwise = BSC.unpack (Dwarf.nameVal (Dwarf.subName sub)) - in Just $! printf "%s (0x%x)" nmVal (toInteger o) + let + nmVal :: String + nmVal + | Dwarf.subName sub == "" = "Unnamed function" + | otherwise = BSC.unpack (Dwarf.nameVal (Dwarf.subName sub)) + in + Just $! printf "%s (0x%x)" nmVal (toInteger o) -- | Resolve type information from subroutine. resolveSubprogramType :: @@ -275,8 +277,9 @@ resolveSubprogramType cu annMap sub entryAddr -- Var args functions have a special usage. | Dwarf.subUnspecifiedParams sub = do -- Get name as an external symbol - let externalName :: Maybe BSC.ByteString - externalName = dwarfExternalName sub + let + externalName :: Maybe BSC.ByteString + externalName = dwarfExternalName sub -- Get entry address in terms of memory. case resolveDwarfSubprogramDebugName sub (dwarfSubEntry sub) of Nothing -> pure annMap @@ -286,8 +289,9 @@ resolveSubprogramType cu annMap sub entryAddr pure annMap | otherwise = do -- Get name as an external symbol - let externalName :: Maybe BSC.ByteString - externalName = dwarfExternalName sub + let + externalName :: Maybe BSC.ByteString + externalName = dwarfExternalName sub -- Get origin if this is an inlined or specialized instance of a source subprogram. let emorigin = case Dwarf.subOrigin sub of @@ -393,19 +397,20 @@ resolveDebugFunTypes :: IncCompM (ReoptLogEvent arch) r (FunTypeMaps (ArchAddrWidth arch)) resolveDebugFunTypes resolveFn annMap elfInfo = do let hdr = Elf.header elfInfo - let secDataMap :: - Map - BSC.ByteString - [ ( Elf.FileRange (Elf.ElfWordType (ArchAddrWidth arch)) - , Elf.ElfSection (Elf.ElfWordType (ArchAddrWidth arch)) - ) - ] - secDataMap = - Map.fromListWith - (++) - [ (Elf.elfSectionName sec, [(r, sec)]) - | (r, sec) <- V.toList (Elf.headerSections elfInfo) - ] + let + secDataMap :: + Map + BSC.ByteString + [ ( Elf.FileRange (Elf.ElfWordType (ArchAddrWidth arch)) + , Elf.ElfSection (Elf.ElfWordType (ArchAddrWidth arch)) + ) + ] + secDataMap = + Map.fromListWith + (++) + [ (Elf.elfSectionName sec, [(r, sec)]) + | (r, sec) <- V.toList (Elf.headerSections elfInfo) + ] case Map.findWithDefault [] ".debug_info" secDataMap of [] -> do -- No debug information diff --git a/src/Reopt/TypeInference/Header.hs b/src/Reopt/TypeInference/Header.hs index f9dff90e..81eb6915 100644 --- a/src/Reopt/TypeInference/Header.hs +++ b/src/Reopt/TypeInference/Header.hs @@ -1,21 +1,20 @@ -{-| -Type information pulled from user-provided header file. --} {-# LANGUAGE BlockArguments #-} -module Reopt.TypeInference.Header - ( parseHeader - ) where +-- | +-- Type information pulled from user-provided header file. +module Reopt.TypeInference.Header ( + parseHeader, +) where -import Control.Monad.Except -import Control.Monad.State -import qualified Data.ByteString.Char8 as BSC -import Data.Foldable -import qualified Data.Map.Strict as Map -import qualified Data.Vector as V -import qualified Language.C as C +import Control.Monad.Except +import Control.Monad.State +import Data.ByteString.Char8 qualified as BSC +import Data.Foldable +import Data.Map.Strict qualified as Map +import Data.Vector qualified as V +import Language.C qualified as C -import Reopt.TypeInference.HeaderTypes +import Reopt.TypeInference.HeaderTypes identToByteString :: C.Ident -> BSC.ByteString identToByteString = BSC.pack . C.identToString @@ -23,7 +22,7 @@ identToByteString = BSC.pack . C.identToString type CParser = StateT AnnDeclarations (Except (C.NodeInfo, String)) errorAt :: C.NodeInfo -> String -> CParser a -errorAt n s = throwError (n,s) +errorAt n s = throwError (n, s) resolveTypedef :: C.NodeInfo -> C.Ident -> CParser AnnType resolveTypedef n typeName = do @@ -34,68 +33,70 @@ resolveTypedef n typeName = do Just tp -> pure (TypedefAnnType nm tp) data DeclLenMod - = NoLenMod - | ShortLenMod - | LongLenMod - | LongLongLenMod + = NoLenMod + | ShortLenMod + | LongLenMod + | LongLongLenMod -- | Integer type when an int is used. declLenModIntType :: DeclLenMod -> AnnType declLenModIntType dlm = - case dlm of - NoLenMod -> IAnnType 32 - ShortLenMod -> IAnnType 16 - LongLenMod -> IAnnType 64 - LongLongLenMod -> IAnnType 64 + case dlm of + NoLenMod -> IAnnType 32 + ShortLenMod -> IAnnType 16 + LongLenMod -> IAnnType 64 + LongLongLenMod -> IAnnType 64 -data QualMods = QM { qmLenMod :: !DeclLenMod - , qmTypeSpec :: !(Maybe C.CTypeSpec) - } +data QualMods = QM + { qmLenMod :: !DeclLenMod + , qmTypeSpec :: !(Maybe C.CTypeSpec) + } emptyQualMods :: QualMods -emptyQualMods = QM { qmLenMod = NoLenMod - , qmTypeSpec = Nothing - } +emptyQualMods = + QM + { qmLenMod = NoLenMod + , qmTypeSpec = Nothing + } parseType :: QualMods -> C.CTypeSpec -> CParser AnnType parseType qm tp = case tp of - C.CVoidType _ -> pure $! VoidAnnType - C.CCharType _ -> pure $! IAnnType 8 - C.CShortType _ -> pure $! IAnnType 16 - C.CIntType _ -> pure $! declLenModIntType (qmLenMod qm) - C.CLongType _ -> pure $! IAnnType 64 - C.CFloatType _ -> pure $! FloatAnnType - C.CDoubleType _ -> pure $! DoubleAnnType + C.CVoidType _ -> pure VoidAnnType + C.CCharType _ -> pure $! IAnnType 8 + C.CShortType _ -> pure $! IAnnType 16 + C.CIntType _ -> pure $! declLenModIntType (qmLenMod qm) + C.CLongType _ -> pure $! IAnnType 64 + C.CFloatType _ -> pure FloatAnnType + C.CDoubleType _ -> pure DoubleAnnType C.CSignedType _ -> pure $! IAnnType 32 - C.CUnsigType _ -> pure $! IAnnType 32 - C.CBoolType _ -> pure $! IAnnType 1 + C.CUnsigType _ -> pure $! IAnnType 32 + C.CBoolType _ -> pure $! IAnnType 1 C.CComplexType n -> errorAt n "_Complex is not supported." - C.CInt128Type n -> errorAt n "__int128 is not supported." - C.CUInt128Type n -> errorAt n "__uint128 is not supported." + C.CInt128Type n -> errorAt n "__int128 is not supported." + C.CUInt128Type n -> errorAt n "__uint128 is not supported." C.CFloatNType _ _ n -> errorAt n "Floating point extensions are not supported." - C.CEnumType en _ -> parseEnum en C.CSUType su _ -> parseStructUnion su - C.CTypeDef typeName n -> resolveTypedef n typeName + C.CTypeDef typeName n -> resolveTypedef n typeName C.CTypeOfExpr _ n -> errorAt n "typeof unsupported." C.CTypeOfType _ n -> errorAt n "typeof unsupported." C.CAtomicType _ n -> errorAt n "atomic is unsupported." - longShortError :: C.NodeInfo -> CParser a -longShortError n = errorAt n $ "Both 'long' and 'short in declaration specifier." +longShortError n = errorAt n "Both 'long' and 'short in declaration specifier." -- | Parse the declaration specifiers to get a header type. -parseQualType :: QualMods - -> [C.CDeclarationSpecifier C.NodeInfo] - -> CParser AnnType +parseQualType :: + QualMods -> + [C.CDeclarationSpecifier C.NodeInfo] -> + CParser AnnType -- We ignore qualifiers as they do not change layout. parseQualType qm qtp = case qtp of - C.CFunSpec _ : r -> parseQualType qm r - C.CAlignSpec _ : r -> parseQualType qm r - C.CTypeQual _ : r -> parseQualType qm r + C.CFunSpec _ : r -> parseQualType qm r + C.CAlignSpec _ : r -> parseQualType qm r + C.CTypeQual _ : r -> parseQualType qm r C.CStorageSpec _ : r -> parseQualType qm r C.CTypeSpec s : r -> case s of @@ -103,29 +104,29 @@ parseQualType qm qtp = C.CCharType _ -> pure $! IAnnType 8 C.CShortType n -> case qmLenMod qm of - NoLenMod -> parseQualType qm { qmLenMod = ShortLenMod } r - ShortLenMod -> errorAt n $ "Duplicate short" - LongLenMod -> longShortError n + NoLenMod -> parseQualType qm{qmLenMod = ShortLenMod} r + ShortLenMod -> errorAt n "Duplicate short" + LongLenMod -> longShortError n LongLongLenMod -> longShortError n C.CIntType _ -> parseQualType qm r C.CLongType n -> case qmLenMod qm of - NoLenMod -> parseQualType qm { qmLenMod = LongLenMod } r + NoLenMod -> parseQualType qm{qmLenMod = LongLenMod} r ShortLenMod -> longShortError n - LongLenMod -> parseQualType qm { qmLenMod = LongLongLenMod } r - LongLongLenMod -> errorAt n $ "Type is too long." - C.CFloatType _ -> pure FloatAnnType + LongLenMod -> parseQualType qm{qmLenMod = LongLongLenMod} r + LongLongLenMod -> errorAt n "Type is too long." + C.CFloatType _ -> pure FloatAnnType C.CDoubleType _ -> pure DoubleAnnType C.CSignedType _ -> parseQualType qm r - C.CUnsigType _ -> parseQualType qm r - C.CBoolType _ -> pure $! IAnnType 1 + C.CUnsigType _ -> parseQualType qm r + C.CBoolType _ -> pure $! IAnnType 1 C.CComplexType n -> errorAt n "_Complex is not supported." - C.CInt128Type n -> errorAt n "__int128 is not supported." - C.CUInt128Type n -> errorAt n "__uint128 is not supported." + C.CInt128Type n -> errorAt n "__int128 is not supported." + C.CUInt128Type n -> errorAt n "__uint128 is not supported." C.CFloatNType _ _ n -> errorAt n "Floating point extensions are not supported." C.CEnumType en _ -> parseEnum en C.CSUType su _ -> parseStructUnion su - C.CTypeDef typeName n -> resolveTypedef n typeName + C.CTypeDef typeName n -> resolveTypedef n typeName C.CTypeOfExpr _ n -> errorAt n "typeof unsupported." C.CTypeOfType _ n -> errorAt n "typeof unsupported." C.CAtomicType _ n -> errorAt n "atomic is unsupported." @@ -142,46 +143,52 @@ parseStructUnion :: C.CStructUnion -> CParser AnnType parseStructUnion (C.CStruct tag _mi _mdecl _attrs n) = case tag of C.CStructTag -> errorAt n "Struct is not supported." - C.CUnionTag -> errorAt n "Union is not supported." + C.CUnionTag -> errorAt n "Union is not supported." -- | Parser derived declarators. parseTypeDerivedDecl :: [C.CDerivedDeclarator C.NodeInfo] -> AnnType -> CParser AnnType parseTypeDerivedDecl [] tp = pure tp -parseTypeDerivedDecl (C.CPtrDeclr _ _:rest) tp = do +parseTypeDerivedDecl (C.CPtrDeclr _ _ : rest) tp = do parseTypeDerivedDecl rest $! PtrAnnType tp -parseTypeDerivedDecl (C.CArrDeclr _ _ n:_) _tp = do - errorAt n $ "Arrays are not supported." -parseTypeDerivedDecl (C.CFunDeclr _ _ n:_) _tp = do - errorAt n $ "Function declarations are not supported in this context." +parseTypeDerivedDecl (C.CArrDeclr _ _ n : _) _tp = do + errorAt n "Arrays are not supported." +parseTypeDerivedDecl (C.CFunDeclr _ _ n : _) _tp = do + errorAt n "Function declarations are not supported in this context." -parseFullType :: [C.CDeclarationSpecifier C.NodeInfo] - -> [C.CDerivedDeclarator C.NodeInfo] - -> CParser AnnType +parseFullType :: + [C.CDeclarationSpecifier C.NodeInfo] -> + [C.CDerivedDeclarator C.NodeInfo] -> + CParser AnnType parseFullType tp derived = do parseTypeDerivedDecl derived =<< parseQualType emptyQualMods tp - parseFunDeclArg :: C.CDeclaration C.NodeInfo -> CParser AnnFunArg parseFunDeclArg (C.CDecl ctype initDeclList n) = do tp <- parseQualType emptyQualMods ctype case initDeclList of -- Only type [] -> do - pure $! AnnFunArg { funArgName = Nothing - , funArgType = tp - } + pure $! + AnnFunArg + { funArgName = Nothing + , funArgType = tp + } [(Just declr, Nothing, Nothing)] -> do case declr of C.CDeclr mnm typeMod Nothing [] _ -> do qtp <- parseTypeDerivedDecl typeMod tp - pure $! AnnFunArg { funArgName = C.identToString <$> mnm - , funArgType = qtp - } + pure $! + AnnFunArg + { funArgName = C.identToString <$> mnm + , funArgType = qtp + } _ -> errorAt n $ "Unsupported declarator:\n" ++ show declr ++ "\n" _ -> do - errorAt n $ "Invalid declaration list:\n" - ++ show initDeclList ++ "\n" + errorAt n $ + "Invalid declaration list:\n" + ++ show initDeclList + ++ "\n" parseFunDeclArg (C.CStaticAssert _ _ n) = errorAt n "Unexpected static assertion inside argument." @@ -190,18 +197,20 @@ parseExtDeclaration :: C.CExternalDeclaration C.NodeInfo -> CParser () parseExtDeclaration (C.CDeclExt d) = do case d of -- Parse typedef - C.CDecl (C.CStorageSpec (C.CTypedef _) : ctype) - [(Just (C.CDeclr (Just typeIdent) typeMod Nothing [] _), Nothing, Nothing)] - _n -> do - m <- gets typeDefs - let nm = identToByteString typeIdent - when (Map.member nm m) $ - errorAt (C.nodeInfo typeIdent) $ BSC.unpack nm ++ " already defines a type." - tp <- parseFullType ctype typeMod - modify $ \s -> s { typeDefs = Map.insert nm tp (typeDefs s) } + C.CDecl + (C.CStorageSpec (C.CTypedef _) : ctype) + [(Just (C.CDeclr (Just typeIdent) typeMod Nothing [] _), Nothing, Nothing)] + _n -> do + m <- gets typeDefs + let nm = identToByteString typeIdent + when (Map.member nm m) $ + errorAt (C.nodeInfo typeIdent) $ + BSC.unpack nm ++ " already defines a type." + tp <- parseFullType ctype typeMod + modify $ \s -> s{typeDefs = Map.insert nm tp (typeDefs s)} C.CDecl [C.CTypeSpec ctype] [(Just declr, Nothing, Nothing)] n -> do case declr of - C.CDeclr (Just fnIdent) (C.CFunDeclr cparams attrs _:derived) Nothing [] _ -> do + C.CDeclr (Just fnIdent) (C.CFunDeclr cparams attrs _ : derived) Nothing [] _ -> do unless (null attrs) $ do errorAt n "Functions may not have attributes." case cparams of @@ -209,33 +218,44 @@ parseExtDeclaration (C.CDeclExt d) = do Right (cArgs, varArgs) -> do args <- traverse parseFunDeclArg (V.fromList cArgs) retType <- parseTypeDerivedDecl derived =<< parseType emptyQualMods ctype - when varArgs $ errorAt n $ "Vararg functions unsupported." - let fd = AnnFunType { funRet = retType - , funArgs = args - } + when varArgs $ errorAt n "Vararg functions unsupported." + let fd = + AnnFunType + { funRet = retType + , funArgs = args + } m <- gets funDecls let nm = BSC.pack (C.identToString fnIdent) when (Map.member nm m) $ do errorAt n $ "Function " ++ C.identToString fnIdent ++ " already defined." - modify $ \s -> s { funDecls = Map.insert nm fd (funDecls s) } + modify $ \s -> s{funDecls = Map.insert nm fd (funDecls s)} C.CDeclr mname indirections masmName attrs declNode -> do - errorAt n $ "Unexpected external declarator:\n" - ++ show mname ++ "\n" - ++ show indirections ++ "\n" - ++ show masmName ++ "\n" - ++ show attrs ++ "\n" - ++ show declNode ++ "\n" - C.CDecl [C.CTypeSpec _] (_:_:_) n -> errorAt n "More than one thing in the list" + errorAt n $ + "Unexpected external declarator:\n" + ++ show mname + ++ "\n" + ++ show indirections + ++ "\n" + ++ show masmName + ++ "\n" + ++ show attrs + ++ "\n" + ++ show declNode + ++ "\n" + C.CDecl [C.CTypeSpec _] (_ : _ : _) n -> errorAt n "More than one thing in the list" C.CDecl [C.CTypeSpec _] [_] n -> errorAt n "Not (J, N, N)" C.CDecl specs bleh n -> do - errorAt n $ "Unexpected declaration:\n" - ++ show specs ++ "\n" - ++ show bleh ++ "\n" - ++ show n + errorAt n $ + "Unexpected declaration:\n" + ++ show specs + ++ "\n" + ++ show bleh + ++ "\n" + ++ show n C.CStaticAssert _ _ n -> do - errorAt n $ "Static assertions not supported." + errorAt n "Static assertions not supported." parseExtDeclaration (C.CFDefExt d) = do - errorAt (C.annotation d) "Function definitions are not supported." + errorAt (C.annotation d) "Function definitions are not supported." parseExtDeclaration (C.CAsmExt _ n) = do errorAt n "Assembly is not supported." @@ -257,17 +277,19 @@ parseHeader fname inputStream = do ctu <- case C.parseC inputStream (C.initPos fname) of Left e -> do - Left $ unlines - [ "Language.C could not parse header " ++ fname, - " " ++ show e - ] + Left $ + unlines + [ "Language.C could not parse header " ++ fname + , " " ++ show e + ] Right ctu -> pure ctu -- Parse the compiled code. case parseTransUnit ctu of - Left (_,e) -> do - Left $ unlines $ - ("Error parsing header in " ++ fname) - : [ " " ++ m | m <- lines e ] + Left (_, e) -> do + Left $ + unlines $ + ("Error parsing header in " ++ fname) + : [" " ++ m | m <- lines e] Right r -> do pure $! r diff --git a/src/Reopt/TypeInference/Pretty.hs b/src/Reopt/TypeInference/Pretty.hs index 756197cf..d64127b9 100644 --- a/src/Reopt/TypeInference/Pretty.hs +++ b/src/Reopt/TypeInference/Pretty.hs @@ -98,15 +98,17 @@ ppFunction :: ppFunction mcs fn | Just tyvs <- Map.lookup (fnName fn) (mcAssignTyVars mcs) , Just fty <- Map.lookup (fnAddr fn) (mcFunTypes mcs) = - let tyvs' = Map.compose (mcTypeMap mcs) tyvs - atp = parens (commas (zipWith ppArg [0 ..] (fttvArgs fty))) - rtp = maybe "void" pretty (fttvRet fty) - in vcat - [ "function " <> nm <> " @ " <> addr <> atp <> " : " <> rtp - , lbrace - , nest 4 $ vcat (ppBlock tyvs' <$> fnBlocks fn) - , rbrace - ] + let + tyvs' = Map.compose (mcTypeMap mcs) tyvs + atp = parens (commas (zipWith ppArg [0 ..] (fttvArgs fty))) + rtp = maybe "void" pretty (fttvRet fty) + in + vcat + [ "function " <> nm <> " @ " <> addr <> atp <> " : " <> rtp + , lbrace + , nest 4 $ vcat (ppBlock tyvs' <$> fnBlocks fn) + , rbrace + ] where nm = pretty (BSC.unpack (fnName fn)) addr = pretty (fnAddr fn) diff --git a/src/Reopt/Utils/Exit.hs b/src/Reopt/Utils/Exit.hs index 995dfe16..90935304 100644 --- a/src/Reopt/Utils/Exit.hs +++ b/src/Reopt/Utils/Exit.hs @@ -1,21 +1,25 @@ -module Reopt.Utils.Exit - ( handleEitherStringWithExit, - handleEitherWithExit, - handleExceptTStringWithExit, - handleExceptTWithExit, - runReoptInIO, - reportErrorAndExit, - checkedReadFile, - ) +module Reopt.Utils.Exit ( + handleEitherStringWithExit, + handleEitherWithExit, + handleExceptTStringWithExit, + handleExceptTWithExit, + runReoptInIO, + reportErrorAndExit, + checkedReadFile, +) where -import Control.Exception -import Control.Monad.Except -import qualified Data.ByteString as BS -import Data.Macaw.Utils.IncComp -import System.Exit -import System.IO -import System.IO.Error +import Control.Exception (catch) +import Control.Monad.Except (ExceptT, runExceptT) +import Data.ByteString qualified as BS +import Data.Macaw.Utils.IncComp ( + IncCompM, + processIncCompLogs, + runIncCompM, + ) +import System.Exit (exitFailure) +import System.IO (hPrint, hPutStrLn, stderr) +import System.IO.Error (ioeGetErrorType, isDoesNotExistError) handleEitherWithExit :: Show e => Either e a -> IO a handleEitherWithExit r = do @@ -59,10 +63,10 @@ checkedReadFile :: FilePath -> IO BS.ByteString checkedReadFile path = do let h e | isDoesNotExistError e = do - hPutStrLn stderr $ path ++ " does not exist." - exitFailure + hPutStrLn stderr $ path ++ " does not exist." + exitFailure | otherwise = do - hPutStrLn stderr (show e) - hPutStrLn stderr (show (ioeGetErrorType e)) - exitFailure + hPrint stderr e + hPrint stderr (ioeGetErrorType e) + exitFailure BS.readFile path `catch` h diff --git a/src/Reopt/Utils/Hex.hs b/src/Reopt/Utils/Hex.hs index adb0171b..9781030a 100644 --- a/src/Reopt/Utils/Hex.hs +++ b/src/Reopt/Utils/Hex.hs @@ -1,4 +1,4 @@ -module Reopt.Utils.Hex (Hex(..)) where +module Reopt.Utils.Hex (Hex (..)) where import Numeric (showHex) @@ -6,5 +6,6 @@ newtype Hex a = Hex a deriving (Eq, Ord) instance (Integral a, Show a) => Show (Hex a) where - show (Hex v) | v >= 0 = showHex v "" - | otherwise = '-' : showHex (negate (toInteger v)) "" + show (Hex v) + | v >= 0 = showHex v "" + | otherwise = '-' : showHex (negate (toInteger v)) "" diff --git a/src/Reopt/Utils/Printf.hs b/src/Reopt/Utils/Printf.hs index 236ba6fe..8a50e71f 100644 --- a/src/Reopt/Utils/Printf.hs +++ b/src/Reopt/Utils/Printf.hs @@ -1,44 +1,57 @@ -module Reopt.Utils.Printf - ( unpackFormat - , UnpackedRep(..) - , Specifier(..) - , UnpackError(..) - , Flag(..) - , IntSpecifier(..) - , Length(..) - , ppLength - , Conversion(..) - ) where - -import Control.Monad.Except -import Control.Monad.Identity -import Control.Monad.Reader -import Control.Monad.State -import Data.ByteString (ByteString) -import qualified Data.ByteString.Char8 as BSC -import GHC.Natural +module Reopt.Utils.Printf ( + Conversion (..), + Flag (..), + IntSpecifier (..), + Length (..), + Specifier (..), + UnpackedRep (..), + UnpackError (..), + ppLength, + unpackFormat, +) where + +import Control.Monad (when) +import Control.Monad.Except ( + ExceptT, + MonadError (throwError), + runExceptT, + ) +import Control.Monad.Identity (Identity (Identity)) +import Control.Monad.Reader (ReaderT (ReaderT, runReaderT)) +import Control.Monad.State ( + MonadState (get), + State, + StateT (StateT), + runState, + ) +import Control.Monad.Trans (lift) +import Data.ByteString (ByteString) +import Data.ByteString.Char8 qualified as BSC +import Data.Char (isDigit) +import Data.Functor (($>)) +import GHC.Natural (Natural) ----------------------------------------------------------------------- -- Utilities slice :: ByteString -> Int -> Int -> ByteString -slice s l h = BSC.take (h-l) (BSC.drop l s) +slice s l h = BSC.take (h - l) (BSC.drop l s) ----------------------------------------------------------------------- -- Declarations -- | A flag in the printf representation data Flag - = LeftAlign - | PrependPlus - | PrependSpace - | PrependZero - | ThousandsGrouping - | AlternateForm - | AlternateDigits + = LeftAlign + | PrependPlus + | PrependSpace + | PrependZero + | ThousandsGrouping + | AlternateForm + | AlternateDigits deriving (Show) --- | Match a flag character +-- | Match a flag character matchFlag :: Char -> Maybe Flag matchFlag '-' = Just LeftAlign matchFlag '+' = Just PrependPlus @@ -51,41 +64,45 @@ matchFlag _ = Nothing -- | An optional integer specifier for the width or precision fields. data IntSpecifier - -- | Indicates no argument was given - = UnspecifiedInt - -- ! @IntLit x@ is a natural literal of @x@. - | IntLit !Integer - -- ! @NextArg@ indicates specifier of '*' which indicates - -- next argument has the value. - | NextArg - -- | @SpecificArgWidth n@ indicates width specifier with form '*n$'. - -- - -- This indicates argument at position @n@ gives the width. - -- The argument should be positive. - | SpecificArg !Natural + = -- | Indicates no argument was given + UnspecifiedInt + | -- ! @IntLit x@ is a natural literal of @x@. + IntLit !Integer + | -- ! @NextArg@ indicates specifier of '*' which indicates + -- next argument has the value. + NextArg + | -- | @SpecificArgWidth n@ indicates width specifier with form '*n$'. + -- + -- This indicates argument at position @n@ gives the width. + -- The argument should be positive. + SpecificArg !Natural deriving (Show) -- | A printf length specifier. data Length - = HH-- ^ @hh@. - | H -- ^ @h@ - | LongInt -- ^ @l@ - | LLongInt -- ^ @ll@ - | LongDouble -- @L@ - | Q -- @q@ -- synonym for @ll@ that should not be used. - | J -- @j@ - | Z -- @z@ - | T -- @t@s - | NoLength + = -- | @hh@. + HH + | -- | @h@ + H + | -- | @l@ + LongInt + | -- | @ll@ + LLongInt + | LongDouble -- @L@ + | Q -- @q@ -- synonym for @ll@ that should not be used. + | J -- @j@ + | Z -- @z@ + | T -- @t@s + | NoLength deriving (Show) ppLength :: Length -> String ppLength len = case len of HH -> "hh" - H -> "h" - LongInt -> "l" - LLongInt -> "ll" + H -> "h" + LongInt -> "l" + LLongInt -> "ll" LongDouble -> "L" Q -> "q" J -> "j" @@ -95,19 +112,28 @@ ppLength len = -- | A printf conversion specifier. data Conversion - = IntDecimal -- ^ @d@ or @i@. - | UnsignedOctal -- ^ @o@ - | UnsignedDecimal -- ^ @u@. - | UnsignedHexadecimal -- ^ @x@ and @X@ - | FloatExponent -- ^ @e@ or @E@ - | FloatDecimal -- @f@ or @F@ - | FloatGeneral -- @g@ or @G@. - | FloatHexadecimal -- @a@ or @A@ - | Char -- ^ @c@ - | String -- ^ @s@ - | Pointer -- ^ @p@ - | CharCount -- ^ @n@ - | PrintStderr -- @m@ Glibc extension saying to use stderr + = -- | @d@ or @i@. + IntDecimal + | -- | @o@ + UnsignedOctal + | -- | @u@. + UnsignedDecimal + | -- | @x@ and @X@ + UnsignedHexadecimal + | -- | @e@ or @E@ + FloatExponent + | FloatDecimal -- @f@ or @F@ + | FloatGeneral -- @g@ or @G@. + | FloatHexadecimal -- @a@ or @A@ + | -- | @c@ + Char + | -- | @s@ + String + | -- | @p@ + Pointer + | -- | @n@ + CharCount + | PrintStderr -- @m@ Glibc extension saying to use stderr deriving (Show) -- | Match a character to a conversion @@ -136,50 +162,55 @@ matchConversion c = _ -> Nothing -- | An argument specifier. -data Specifier = - Specifier { specifierParameter :: Natural --- ^ 1-based index of argument or zero if parameters not used. - , specifierFlags :: ![Flag] -- ^ Flags read so far. - , specifierWidth :: !IntSpecifier - , specifierPrecision :: !IntSpecifier - , specifierLength :: !Length - , specifierConv :: !Conversion - } +data Specifier = Specifier + { specifierParameter :: Natural --- ^ 1-based index of argument or zero if parameters not used. + , specifierFlags :: ![Flag] + -- ^ Flags read so far. + , specifierWidth :: !IntSpecifier + , specifierPrecision :: !IntSpecifier + , specifierLength :: !Length + , specifierConv :: !Conversion + } deriving (Show) type StringIndex = Int data UnpackError - -- | String terminated while parsing a specifier that started at given index. - = UnexpectedEnd !StringIndex - -- | @UnexpectedConversion i c@ an unexpected conversion character @c@ at index @i@. - | UnexpectedConversion !StringIndex !Char - -- | @MissingArgTerminator i@ indicates we expected a '$' at index i to - -- terminate an argugment index in a width or length. - | MissingArgTerminator !StringIndex - -- | @InvalidFieldWidth i@ could not parse field width at index i. - | InvalidFieldWidth !StringIndex + = -- | String terminated while parsing a specifier that started at given index. + UnexpectedEnd !StringIndex + | -- | @UnexpectedConversion i c@ an unexpected conversion character @c@ at index @i@. + UnexpectedConversion !StringIndex !Char + | -- | @MissingArgTerminator i@ indicates we expected a '$' at index i to + -- terminate an argugment index in a width or length. + MissingArgTerminator !StringIndex + | -- | @InvalidFieldWidth i@ could not parse field width at index i. + InvalidFieldWidth !StringIndex deriving (Show) -- | The decoded representation of a printf format string. data UnpackedRep = UnpackedTerm !ByteString - -- A string that had an escape sequence we needed to escape - | UnpackedLiteral !ByteString !UnpackedRep + | -- A string that had an escape sequence we needed to escape + UnpackedLiteral !ByteString !UnpackedRep | UnpackedSpecifier !Specifier !UnpackedRep | UnpackedError !UnpackError deriving (Show) -- | Add a literal string to the front of the rep. -unpackPrefix :: BSC.ByteString -- ^ Bytestring we are extractng from - -> Int -- ^ Starting index to extract from - -> Int -- ^ One past the last index included in bytestring - -> UnpackedRep -- ^ Unpacked representation to include fixed bits. - -> UnpackedRep -unpackPrefix bs s i r = - if s == i then - r - else - UnpackedLiteral (slice bs s i) r +unpackPrefix :: + -- | Bytestring we are extractng from + BSC.ByteString -> + -- | Starting index to extract from + Int -> + -- | One past the last index included in bytestring + Int -> + -- | Unpacked representation to include fixed bits. + UnpackedRep -> + UnpackedRep +unpackPrefix bs s i r = + if s == i + then r + else UnpackedLiteral (slice bs s i) r ----------------------------------------------------------------------- -- Unpacking function @@ -201,13 +232,13 @@ atEnd = ReaderT $ \bs -> StateT $ \i -> Identity (i >= BSC.length bs, i) peekChar :: Parser Char peekChar = ReaderT $ \bs -> StateT $ \i -> let c = if i < BSC.length bs then BSC.index bs i else toEnum 0 - in seq c $ Identity $ (c, i) + in seq c $ Identity (c, i) -- | Skip next character in parser skipChar :: Parser () skipChar = ReaderT $ \bs -> StateT $ \i -> - let j = if i < BSC.length bs then i+1 else i - in seq j $ Identity $ ((), j) + let j = if i < BSC.length bs then i + 1 else i + in seq j $ Identity ((), j) asDigit :: Char -> Int asDigit c = fromEnum c - fromEnum '0' @@ -216,22 +247,22 @@ asDigit c = fromEnum c - fromEnum '0' -- adds them to @c@. parseNatDigits :: Natural -> Parser Natural parseNatDigits prev = do - c <- peekChar - if '0' <= c && c <= '9' then do - skipChar - parseNatDigits $! 10*prev + fromIntegral (asDigit c) - else - pure prev + c <- peekChar + if isDigit c + then do + skipChar + parseNatDigits $! 10 * prev + fromIntegral (asDigit c) + else pure prev -- | Parse a number that starts with 1..9 or return 0 parseNzNat :: Parser Natural parseNzNat = do c <- peekChar - if '1' <= c && c <= '9' then do - skipChar - parseNatDigits (fromIntegral (asDigit c)) - else - pure 0 + if '1' <= c && c <= '9' + then do + skipChar + parseNatDigits (fromIntegral (asDigit c)) + else pure 0 -- | Read the length from the character stream. parseLength :: Parser Length @@ -242,19 +273,19 @@ parseLength = do skipChar d <- peekChar case d of - 'h' -> skipChar *> pure HH + 'h' -> skipChar $> HH _ -> pure H 'l' -> do skipChar d <- peekChar case d of - 'l' -> skipChar *> pure LLongInt + 'l' -> skipChar $> LLongInt _ -> pure LongInt - 'L' -> skipChar *> pure LongDouble - 'q' -> skipChar *> pure Q - 'j' -> skipChar *> pure J - 'z' -> skipChar *> pure Z - 't' -> skipChar *> pure T + 'L' -> skipChar $> LongDouble + 'q' -> skipChar $> Q + 'j' -> skipChar $> J + 'z' -> skipChar $> Z + 't' -> skipChar $> T _ -> pure NoLength -- | Parse the flags @@ -262,7 +293,7 @@ parseFlags' :: [Flag] -> Parser [Flag] parseFlags' prev = do c <- peekChar case matchFlag c of - Just f -> skipChar *> parseFlags' (f:prev) + Just f -> skipChar *> parseFlags' (f : prev) Nothing -> pure (reverse prev) -- | Parse the flags in a specifier. @@ -274,16 +305,16 @@ parseFlags = parseFlags' [] parseArgIndex :: ExceptT UnpackError Parser IntSpecifier parseArgIndex = do argIndex <- lift parseNzNat - if argIndex == 0 then - pure NextArg - else do - d <- lift peekChar - when (d /= '$') $ do - idx <- lift getIndex - throwError $ MissingArgTerminator idx - lift skipChar - pure (SpecificArg argIndex) - + if argIndex == 0 + then pure NextArg + else do + d <- lift peekChar + when (d /= '$') $ do + idx <- lift getIndex + throwError $ MissingArgTerminator idx + lift skipChar + pure (SpecificArg argIndex) + parseWidth :: ExceptT UnpackError Parser IntSpecifier parseWidth = do i <- lift getIndex @@ -298,30 +329,32 @@ parseWidth = do when (d == 0) $ do throwError $ InvalidFieldWidth i pure $! IntLit (negate (toInteger d)) - _ | '1' <= c, c <= '9' -> lift $ do - skipChar - val <- parseNatDigits (fromIntegral (asDigit c)) - pure $! IntLit (toInteger val) + _ + | '1' <= c + , c <= '9' -> lift $ do + skipChar + val <- parseNatDigits (fromIntegral (asDigit c)) + pure $! IntLit (toInteger val) | otherwise -> do - pure $! UnspecifiedInt + pure UnspecifiedInt -- | Peek the precision setting parsePrecision :: ExceptT UnpackError Parser IntSpecifier parsePrecision = do d <- lift peekChar - if d == '.' then do - c <- lift peekChar - case c of - '*' -> do - lift skipChar - parseArgIndex - _ -> lift $ do - skipChar - val <- parseNatDigits 0 - pure $! IntLit (toInteger val) - else - pure $! UnspecifiedInt - + if d == '.' + then do + c <- lift peekChar + case c of + '*' -> do + lift skipChar + parseArgIndex + _ -> lift $ do + skipChar + val <- parseNatDigits 0 + pure $! IntLit (toInteger val) + else pure UnspecifiedInt + -- | Parse a specifier -- returning either an error or the specifier. parseSpecifier :: ExceptT UnpackError Parser Specifier parseSpecifier = do @@ -329,62 +362,68 @@ parseSpecifier = do lift skipChar paramOrWidth <- lift parseNzNat (param, flags, width) <- - if paramOrWidth == 0 then do - flags <- lift parseFlags - width <- parseWidth - pure (0, flags, width) - else do - c <- lift peekChar - if c == '$' then do - lift skipChar - flags <- lift parseFlags - width <- parseWidth - pure (paramOrWidth, flags, width) - else - pure (0, [], IntLit (toInteger paramOrWidth)) + if paramOrWidth == 0 + then do + flags <- lift parseFlags + width <- parseWidth + pure (0, flags, width) + else do + c <- lift peekChar + if c == '$' + then do + lift skipChar + flags <- lift parseFlags + width <- parseWidth + pure (paramOrWidth, flags, width) + else pure (0, [], IntLit (toInteger paramOrWidth)) prec <- parsePrecision len <- lift parseLength e <- lift atEnd - if e then - throwError $ UnexpectedEnd specStart - else do - c <- lift peekChar - case matchConversion c of - Nothing -> do - idx <- lift getIndex - throwError $ UnexpectedConversion idx c - Just co -> do - lift skipChar - pure $! Specifier { specifierParameter = param - , specifierFlags = flags - , specifierWidth = width - , specifierPrecision = prec - , specifierLength = len - , specifierConv = co - } + if e + then throwError $ UnexpectedEnd specStart + else do + c <- lift peekChar + case matchConversion c of + Nothing -> do + idx <- lift getIndex + throwError $ UnexpectedConversion idx c + Just co -> do + lift skipChar + pure $! + Specifier + { specifierParameter = param + , specifierFlags = flags + , specifierWidth = width + , specifierPrecision = prec + , specifierLength = len + , specifierConv = co + } -- | Unpack after a percent -unpackSpecifier :: BSC.ByteString -- ^ String to parse - -> Int -- ^ Start of specifier - -> UnpackedRep +unpackSpecifier :: + -- | String to parse + BSC.ByteString -> + -- | Start of specifier + Int -> + UnpackedRep unpackSpecifier bs i = case runParser bs i (runExceptT parseSpecifier) of (Left e, _) -> UnpackedError e (Right s, j) -> UnpackedSpecifier s (unpackFormat' bs j j) unpackFormat' :: BSC.ByteString -> Int -> Int -> UnpackedRep -unpackFormat' bs s i = - if i >= BSC.length bs then do - UnpackedTerm (BSC.drop s bs) - else if BSC.index bs i == '%' then - if i+1 >= BSC.length bs then - UnpackedError (UnexpectedEnd i) - else if BSC.index bs (i+1) == '%' then - unpackPrefix bs s (i+1) (unpackFormat' bs (i+2) (i+2)) - else - unpackPrefix bs s i (unpackSpecifier bs i) - else - unpackFormat' bs s (i+1) - +unpackFormat' bs s i + | i >= BSC.length bs = UnpackedTerm (BSC.drop s bs) + | otherwise = + if BSC.index bs i == '%' + then + if i + 1 >= BSC.length bs + then UnpackedError (UnexpectedEnd i) + else + if BSC.index bs (i + 1) == '%' + then unpackPrefix bs s (i + 1) (unpackFormat' bs (i + 2) (i + 2)) + else unpackPrefix bs s i (unpackSpecifier bs i) + else unpackFormat' bs s (i + 1) + unpackFormat :: BSC.ByteString -> UnpackedRep unpackFormat bs = unpackFormat' bs 0 0