From 78b2bbe0c56b1a294fb3ba9bf98693a271ed18f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Wed, 18 Sep 2024 15:44:44 +0200 Subject: [PATCH 1/7] cardano-node: added metric served.block.latest --- .../src/Cardano/Node/Tracing/Tracers.hs | 3 + .../Cardano/Node/Tracing/Tracers/Consensus.hs | 59 +++++++++++++++++++ .../Cardano/Node/Tracing/Tracers/Startup.hs | 4 +- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers.hs index b105e66bc08..4fab3056553 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers.hs @@ -276,6 +276,8 @@ mkConsensusTracers configReflection trBase trForward mbTrEKG _trDataPoint trConf ["BlockFetch", "Server"] configureTracers configReflection trConfig [blockFetchServerTr] + !servedBlockLatestTr <- servedBlockLatest mbTrEKG + !forgeKESInfoTr <- mkCardanoTracer trBase trForward mbTrEKG ["Forge", "StateInfo"] @@ -350,6 +352,7 @@ mkConsensusTracers configReflection trBase trForward mbTrEKG _trDataPoint trConf <> traceWith blockFetchClientMetricsTr , Consensus.blockFetchServerTracer = Tracer $ traceWith blockFetchServerTr + <> traceWith servedBlockLatestTr , Consensus.forgeStateInfoTracer = Tracer $ traceWith (traceAsKESInfo (Proxy @blk) forgeKESInfoTr) , Consensus.gddTracer = Tracer $ diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index 0ec5a5db4bc..73c10c61389 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -19,6 +19,7 @@ module Cardano.Node.Tracing.Tracers.Consensus , forgeTracerTransform , initialClientMetrics , calculateBlockFetchClientMetrics + , servedBlockLatest , ClientMetrics ) where @@ -981,6 +982,64 @@ instance LogFormatting SanityCheckIssue where "Configuration contains multiple security parameters: " <> Text.pack (show e) +-------------------------------------------------------------------------------- +-- Metric for server block latest +-------------------------------------------------------------------------------- + +data ServedBlock = ServedBlock { + maxSlotNo :: SlotNo + , localUp :: Word64 + , servedBlocksLatest :: Word64 +} + +instance MetaTrace ServedBlock where + namespaceFor ServedBlock {} = + Namespace [] ["ServedBlockLatest"] + + severityFor (Namespace [] ["ServedBlockLatest"]) _ = Just + Debug + severityFor _ _ = Nothing + + documentFor _ = Nothing + + metricsDocFor (Namespace [] ["ServedBlockLatest"]) = + [("served.block.latest", "")] + metricsDocFor _ = [] + + allNamespaces = [Namespace [] ["ServedBlockLatest"]] + + +instance LogFormatting ServedBlock where + forMachine _mDtal ServedBlock {} = mempty + + asMetrics ServedBlock {..} = + [IntM "served.block.latest" (fromIntegral servedBlocksLatest)] + + +emptyServedBlocks :: ServedBlock +emptyServedBlocks = ServedBlock 0 0 0 + +servedBlockLatest :: + Maybe (Trace IO FormattedMessage) + -> IO (Trace IO (TraceLabelPeer peer (TraceBlockFetchServerEvent blk))) +servedBlockLatest mbTrEKG = + foldTraceM calculateServedBlockLatest emptyServedBlocks + (metricsFormatter + (mkMetricsTracer mbTrEKG)) + +calculateServedBlockLatest :: ServedBlock + -> LoggingContext + -> TraceLabelPeer peer (TraceBlockFetchServerEvent blk) + -> IO ServedBlock +calculateServedBlockLatest ServedBlock{..} _lc (TraceLabelPeer _ (TraceBlockFetchServerSendBlock p)) = + case pointSlot p of + Origin -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest + At slotNo -> + case compare maxSlotNo slotNo of + LT -> return $ ServedBlock slotNo (localUp + 1) (localUp + 1) + GT -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest + EQ -> return $ ServedBlock maxSlotNo (localUp + 1) (localUp + 1) + -------------------------------------------------------------------------------- -- TxInbound Tracer -------------------------------------------------------------------------------- diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs index 3553093949c..74d39504abc 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs @@ -279,7 +279,9 @@ instance ( Show (BlockNodeToNodeVersion blk) NotEffective -> 0 )] asMetrics (BICommon BasicInfoCommon {..}) = - [ PrometheusM "basicInfo" [("nodeStartTime", (pack . show) biNodeStartTime)]] + [ PrometheusM "basicInfo" [("nodeStartTime", (pack . show) biNodeStartTime)] + , IntM "node.start.time" ((ceiling . utcTimeToPOSIXSeconds) biNodeStartTime) + ] asMetrics _ = [] instance MetaTrace (StartupTrace blk) where From 06ece678d50ee080169cf27fb52a9ccabc7fb56f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Mon, 23 Sep 2024 10:37:05 +0200 Subject: [PATCH 2/7] Fixes --- .../Cardano/Node/Tracing/Tracers/Consensus.hs | 102 ++++++++---------- .../Cardano/Node/Tracing/Tracers/Startup.hs | 3 +- 2 files changed, 46 insertions(+), 59 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index 73c10c61389..f488b8cc98f 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -865,7 +865,9 @@ instance MetaTrace (TraceBlockFetchServerEvent blk) where severityFor _ _ = Nothing metricsDocFor (Namespace [] ["SendBlock"]) = - [("served.block", "")] + [("served.block", "") + ,("served.block.latest", "")] + metricsDocFor _ = [] documentFor (Namespace [] ["SendBlock"]) = Just @@ -874,6 +876,47 @@ instance MetaTrace (TraceBlockFetchServerEvent blk) where allNamespaces = [Namespace [] ["SendBlock"]] +-------------------------------------------------------------------------------- +-- Metric for server block latest +-------------------------------------------------------------------------------- + +data ServedBlock = ServedBlock { + maxSlotNo :: SlotNo + , localUp :: Word64 + , servedBlocksLatest :: Word64 +} + +instance LogFormatting ServedBlock where + forMachine _mDtal ServedBlock {} = mempty + + asMetrics ServedBlock {..} = + [IntM "served.block.latest" (fromIntegral servedBlocksLatest)] + +emptyServedBlocks :: ServedBlock +emptyServedBlocks = ServedBlock 0 0 0 + +servedBlockLatest :: + Maybe (Trace IO FormattedMessage) + -> IO (Trace IO (TraceLabelPeer peer (TraceBlockFetchServerEvent blk))) +servedBlockLatest mbTrEKG = + foldTraceM calculateServedBlockLatest emptyServedBlocks + (metricsFormatter + (mkMetricsTracer mbTrEKG)) + +calculateServedBlockLatest :: ServedBlock + -> LoggingContext + -> TraceLabelPeer peer (TraceBlockFetchServerEvent blk) + -> IO ServedBlock +calculateServedBlockLatest ServedBlock{..} _lc (TraceLabelPeer _ (TraceBlockFetchServerSendBlock p)) = + case pointSlot p of + Origin -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest + At slotNo -> + case compare maxSlotNo slotNo of + LT -> return $ ServedBlock slotNo (localUp + 1) (localUp + 1) + GT -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest + EQ -> return $ ServedBlock maxSlotNo (localUp + 1) (localUp + 1) + + -------------------------------------------------------------------------------- -- Gdd Tracer -------------------------------------------------------------------------------- @@ -982,63 +1025,6 @@ instance LogFormatting SanityCheckIssue where "Configuration contains multiple security parameters: " <> Text.pack (show e) --------------------------------------------------------------------------------- --- Metric for server block latest --------------------------------------------------------------------------------- - -data ServedBlock = ServedBlock { - maxSlotNo :: SlotNo - , localUp :: Word64 - , servedBlocksLatest :: Word64 -} - -instance MetaTrace ServedBlock where - namespaceFor ServedBlock {} = - Namespace [] ["ServedBlockLatest"] - - severityFor (Namespace [] ["ServedBlockLatest"]) _ = Just - Debug - severityFor _ _ = Nothing - - documentFor _ = Nothing - - metricsDocFor (Namespace [] ["ServedBlockLatest"]) = - [("served.block.latest", "")] - metricsDocFor _ = [] - - allNamespaces = [Namespace [] ["ServedBlockLatest"]] - - -instance LogFormatting ServedBlock where - forMachine _mDtal ServedBlock {} = mempty - - asMetrics ServedBlock {..} = - [IntM "served.block.latest" (fromIntegral servedBlocksLatest)] - - -emptyServedBlocks :: ServedBlock -emptyServedBlocks = ServedBlock 0 0 0 - -servedBlockLatest :: - Maybe (Trace IO FormattedMessage) - -> IO (Trace IO (TraceLabelPeer peer (TraceBlockFetchServerEvent blk))) -servedBlockLatest mbTrEKG = - foldTraceM calculateServedBlockLatest emptyServedBlocks - (metricsFormatter - (mkMetricsTracer mbTrEKG)) - -calculateServedBlockLatest :: ServedBlock - -> LoggingContext - -> TraceLabelPeer peer (TraceBlockFetchServerEvent blk) - -> IO ServedBlock -calculateServedBlockLatest ServedBlock{..} _lc (TraceLabelPeer _ (TraceBlockFetchServerSendBlock p)) = - case pointSlot p of - Origin -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest - At slotNo -> - case compare maxSlotNo slotNo of - LT -> return $ ServedBlock slotNo (localUp + 1) (localUp + 1) - GT -> return $ ServedBlock maxSlotNo localUp servedBlocksLatest - EQ -> return $ ServedBlock maxSlotNo (localUp + 1) (localUp + 1) -------------------------------------------------------------------------------- -- TxInbound Tracer diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs index 74d39504abc..65e82a1d844 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Startup.hs @@ -403,7 +403,8 @@ instance MetaTrace (StartupTrace blk) where metricsDocFor (Namespace _ ["BlockForgingUpdate"]) = [("forging_enabled","Can this node forge blocks? (Is it provided with block forging credentials) 0 = no, 1 = yes")] metricsDocFor (Namespace _ ["Common"]) = - [("systemStartTime","The UTC time this node was started.")] + [("systemStartTime","The UTC time this node was started."), + ("node.start.time","The UTC time this node was started represented in POSIX seconds.")] metricsDocFor _ = [] From 39d5e0311432a06dcaef0cbe579edfadbd2f6d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Fri, 27 Sep 2024 18:46:10 +0200 Subject: [PATCH 3/7] cardano-node: Rewrite of client tracer --- .../Cardano/Node/Tracing/Tracers/Consensus.hs | 172 ++++++++++-------- 1 file changed, 92 insertions(+), 80 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index f488b8cc98f..d5906797044 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -84,6 +84,7 @@ import qualified Data.Text as Text import Data.Time (DiffTime, NominalDiffTime) import Data.Word (Word32, Word64) + instance (LogFormatting adr, Show adr) => LogFormatting (ConnectionId adr) where forMachine _dtal (ConnectionId local' remote) = mconcat [ "connectionId" .= String (showT local' @@ -517,14 +518,16 @@ incCdf v cdf = then cdf {counter = counter cdf + 1} else cdf + data ClientMetrics = ClientMetrics { - cmSlotMap :: IntPSQ Word64 NominalDiffTime - , cmCdf1sVar :: CdfCounter - , cmCdf3sVar :: CdfCounter - , cmCdf5sVar :: CdfCounter - , cmDelay :: Double + cmSlotMap :: IntPSQ Word64 NominalDiffTime + , cmCdf1sVar :: CdfCounter + , cmCdf3sVar :: CdfCounter + , cmCdf5sVar :: CdfCounter + , cmDelay :: Double , cmBlockSize :: Word32 - , cmTraceIt :: Bool + , cmTraceIt :: Bool + , cmTraceVars :: Bool } instance LogFormatting ClientMetrics where @@ -532,29 +535,32 @@ instance LogFormatting ClientMetrics where asMetrics ClientMetrics {..} = if cmTraceIt then - let size = Pq.size cmSlotMap - msgs = - [ DoubleM "blockfetchclient.blockdelay" - cmDelay - , IntM "blockfetchclient.blocksize" - (fromIntegral cmBlockSize) - , DoubleM "blockfetchclient.blockdelay.cdfOne" - (fromIntegral (counter cmCdf1sVar) / fromIntegral size) - , DoubleM "blockfetchclient.blockdelay.cdfThree" - (fromIntegral (counter cmCdf3sVar) / fromIntegral size) - , DoubleM "blockfetchclient.blockdelay.cdfFive" - (fromIntegral (counter cmCdf5sVar) / fromIntegral size) - ] - in if cmDelay > 5 - then - CounterM "blockfetchclient.lateblocks" Nothing - : msgs - else msgs - else [] + let size = Pq.size cmSlotMap + msgs = + [ DoubleM "blockfetchclient.blockdelay" cmDelay + , IntM "blockfetchclient.blocksize" (fromIntegral cmBlockSize) + ] + <> if cmTraceVars + then cdfMetric "blockfetchclient.blockdelay.cdfOne" cmCdf1sVar + <> cdfMetric "blockfetchclient.blockdelay.cdfThree" cmCdf3sVar + <> cdfMetric "blockfetchclient.blockdelay.cdfFive" cmCdf5sVar + <> lateBlockMetric cmDelay + else [] + where + cdfMetric name var = + [ DoubleM name (fromIntegral (counter var) / fromIntegral size) + ] + + lateBlockMetric delay = + [ CounterM "blockfetchclient.lateblocks" Nothing + | delay > 5 + ] + in msgs + else [] instance MetaTrace ClientMetrics where namespaceFor _ = Namespace [] ["ClientMetrics"] - severityFor _ _ = Just Info + severityFor _ _ = Nothing documentFor _ = Just "" metricsDocFor (Namespace _ ["ClientMetrics"]) = @@ -581,6 +587,7 @@ initialClientMetrics = 0 0 False + False calculateBlockFetchClientMetrics :: ClientMetrics @@ -588,61 +595,66 @@ calculateBlockFetchClientMetrics :: -> BlockFetch.TraceLabelPeer peer (BlockFetch.TraceFetchClientState header) -> IO ClientMetrics calculateBlockFetchClientMetrics cm@ClientMetrics {..} _lc - (TraceLabelPeer _ (BlockFetch.CompletedBlockFetch p _ _ _ forgeDelay blockSize)) = - case pointSlot p of - Origin -> pure cm {cmTraceIt = False} -- Nothing to do. - At (SlotNo slotNo) -> do - if Pq.null cmSlotMap && forgeDelay > 20 - then pure cm {cmTraceIt = False} -- During startup wait until we are in sync - else case Pq.lookup (fromIntegral slotNo) cmSlotMap of - Just _ -> pure cm {cmTraceIt = False} -- dupe, we only track the first - Nothing -> do - let slotMap' = Pq.insert (fromIntegral slotNo) slotNo forgeDelay cmSlotMap - if Pq.size slotMap' > 1080 -- TODO k/2, should come from config file - then case Pq.minView slotMap' of - Nothing -> pure cm {cmTraceIt = False} -- Err. We just inserted an element! - Just (_, minSlotNo, minDelay, slotMap'') -> - if minSlotNo == slotNo - then pure cm {cmTraceIt = False, cmSlotMap = slotMap'} - else let - cdf1sVar = decCdf minDelay cmCdf1sVar - cdf3sVar = decCdf minDelay cmCdf3sVar - cdf5sVar = decCdf minDelay cmCdf5sVar - cdf1sVar' = incCdf forgeDelay cdf1sVar - cdf3sVar' = incCdf forgeDelay cdf3sVar - cdf5sVar' = incCdf forgeDelay cdf5sVar - in pure cm { - cmCdf1sVar = cdf1sVar' - , cmCdf3sVar = cdf3sVar' - , cmCdf5sVar = cdf5sVar' - , cmDelay = realToFrac forgeDelay - , cmBlockSize = getSizeInBytes blockSize - , cmTraceIt = True - , cmSlotMap = slotMap''} - else let - cdf1sVar' = incCdf forgeDelay cmCdf1sVar - cdf3sVar' = incCdf forgeDelay cmCdf3sVar - cdf5sVar' = incCdf forgeDelay cmCdf5sVar - -- -- Wait until we have at least 45 samples before we start providing - -- -- cdf estimates. - in if Pq.size slotMap' >= 45 - then pure cm { - cmCdf1sVar = cdf1sVar' - , cmCdf3sVar = cdf3sVar' - , cmCdf5sVar = cdf5sVar' - , cmDelay = realToFrac forgeDelay - , cmBlockSize = getSizeInBytes blockSize - , cmTraceIt = True - , cmSlotMap = slotMap'} - else pure cm { - cmCdf1sVar = cdf1sVar' - , cmCdf3sVar = cdf3sVar' - , cmCdf5sVar = cdf5sVar' - , cmTraceIt = False - , cmSlotMap = slotMap'} + (TraceLabelPeer _ (BlockFetch.CompletedBlockFetch p _ _ _ forgeDelay blockSize)) = + case pointSlot p of + Origin -> pure cm {cmTraceIt = False} -- Nothing to do for Origin + At (SlotNo slotNo) -> + if Pq.null cmSlotMap && forgeDelay > 20 + then pure cm {cmTraceIt = False} -- During startup wait until we are in sync + else processSlot slotNo + where + processSlot slotNo = + case Pq.lookup (fromIntegral slotNo) cmSlotMap of + Just _ -> pure cm {cmTraceIt = False} -- Duplicate, only track the first + Nothing -> let slotMap' = Pq.insert (fromIntegral slotNo) slotNo forgeDelay cmSlotMap + in if Pq.size slotMap' > 1080 + then trimSlotMap slotMap' slotNo + else updateMetrics slotMap' slotNo + + trimSlotMap slotMap' slotNo = + case Pq.minView slotMap' of + Nothing -> pure cm {cmTraceIt = False} -- Error: Just inserted element + Just (_, minSlotNo, minDelay, slotMap'') -> + if minSlotNo == slotNo + then pure cm { cmTraceIt = False, cmSlotMap = slotMap' } + else let updatedMetrics = updateCDFs minDelay forgeDelay + in pure cm + { cmCdf1sVar = fst3 updatedMetrics + , cmCdf3sVar = snd3 updatedMetrics + , cmCdf5sVar = thd3 updatedMetrics + , cmDelay = realToFrac forgeDelay + , cmBlockSize = getSizeInBytes blockSize + , cmSlotMap = slotMap'' } + + updateMetrics slotMap' _slotNo = + let updatedMetrics = updateCDFs 0 forgeDelay + in if Pq.size slotMap' >= 45 + then pure cm + { cmCdf1sVar = fst3 updatedMetrics + , cmCdf3sVar = snd3 updatedMetrics + , cmCdf5sVar = thd3 updatedMetrics + , cmDelay = realToFrac forgeDelay + , cmBlockSize = getSizeInBytes blockSize + , cmSlotMap = slotMap' } + else pure cm + { cmCdf1sVar = fst3 updatedMetrics + , cmCdf3sVar = snd3 updatedMetrics + , cmCdf5sVar = thd3 updatedMetrics + , cmTraceVars = False + , cmSlotMap = slotMap' } + + updateCDFs minDelay forgeDelay' = + ( incCdf forgeDelay' (decCdf minDelay cmCdf1sVar) + , incCdf forgeDelay' (decCdf minDelay cmCdf3sVar) + , incCdf forgeDelay' (decCdf minDelay cmCdf5sVar) ) + + fst3 (x, _, _) = x + snd3 (_, y, _) = y + thd3 (_, _, z) = z calculateBlockFetchClientMetrics cm _lc _ = pure cm + -------------------------------------------------------------------------------- -- BlockFetchDecision Tracer -------------------------------------------------------------------------------- @@ -865,8 +877,8 @@ instance MetaTrace (TraceBlockFetchServerEvent blk) where severityFor _ _ = Nothing metricsDocFor (Namespace [] ["SendBlock"]) = - [("served.block", "") - ,("served.block.latest", "")] + [("served.block", "This counter metric indicates how many blocks this node has served.") + ,("served.block.latest", "This counter metric indicates how many chain tip blocks this node has served.")] metricsDocFor _ = [] From 468ba675a42b2c17649360d9d669bc76aab2e4b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Fri, 27 Sep 2024 18:46:46 +0200 Subject: [PATCH 4/7] cardano-node: rename P2P metrics --- .../src/Cardano/Node/Tracing/Tracers/P2P.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/P2P.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/P2P.hs index 316c9122369..c8e3ce4b7e0 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/P2P.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/P2P.hs @@ -921,29 +921,29 @@ instance LogFormatting PeerSelectionCounters where PeerSelectionCountersHWC {..} -> -- Deprecated metrics; they will be removed in a future version. [ IntM - "peerSelection.cold" + "peerSelection.Cold" (fromIntegral numberOfColdPeers) , IntM - "peerSelection.warm" + "peerSelection.Warm" (fromIntegral numberOfWarmPeers) , IntM - "peerSelection.hot" + "peerSelection.Hot" (fromIntegral numberOfHotPeers) , IntM - "peerSelection.coldBigLedgerPeers" + "peerSelection.ColdBigLedgerPeers" (fromIntegral numberOfColdBigLedgerPeers) , IntM - "peerSelection.warmBigLedgerPeers" + "peerSelection.WarmBigLedgerPeers" (fromIntegral numberOfWarmBigLedgerPeers) , IntM - "peerSelection.hotBigLedgerPeers" + "peerSelection.HotBigLedgerPeers" (fromIntegral numberOfHotBigLedgerPeers) , IntM - "peerSelection.warmLocalRoots" + "peerSelection.WarmLocalRoots" (fromIntegral $ numberOfActiveLocalRootPeers psc) , IntM - "peerSelection.hotLocalRoots" + "peerSelection.HotLocalRoots" (fromIntegral $ numberOfEstablishedLocalRootPeers psc - numberOfActiveLocalRootPeers psc) ] From a5b99cc2cb19dd39de3ffd4077720a53d0ade5f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Thu, 24 Oct 2024 11:43:11 +0200 Subject: [PATCH 5/7] cardano-node: Forge metrics and CDFCounters --- .../Cardano/Node/Tracing/Tracers/Consensus.hs | 57 ++++++++----------- .../Tracing/OrphanInstances/Consensus.hs | 10 ++-- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index d5906797044..50385c148c2 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -6,6 +6,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} @@ -486,8 +487,8 @@ instance MetaTrace (TraceChainSyncServerEvent blk) where severityFor _ _ = Nothing metricsDocFor (Namespace _ ["Update"]) = - [ ("headersServed", "A counter triggered on any header event") - , ("headersServed.falling", + [ ("served.header", "A counter triggered on any header event") + , ("served.header.falling", "A counter triggered only on header event with falling edge")] metricsDocFor _ = [] @@ -502,15 +503,12 @@ instance MetaTrace (TraceChainSyncServerEvent blk) where -------------------------------------------------------------------------------- data CdfCounter = CdfCounter { - limit :: Int64 - , counter :: Int64 + limit :: ! Int64 + , counter :: ! Int64 } -decCdf :: Ord a => Num a => a -> CdfCounter -> CdfCounter -decCdf v cdf = - if v < fromIntegral (limit cdf) - then cdf {counter = counter cdf - 1} - else cdf +decCdf :: a -> CdfCounter -> CdfCounter +decCdf _v cdf = cdf {counter = counter cdf - 1} incCdf ::Ord a => Num a => a -> CdfCounter -> CdfCounter incCdf v cdf = @@ -521,9 +519,9 @@ incCdf v cdf = data ClientMetrics = ClientMetrics { cmSlotMap :: IntPSQ Word64 NominalDiffTime - , cmCdf1sVar :: CdfCounter - , cmCdf3sVar :: CdfCounter - , cmCdf5sVar :: CdfCounter + , cmCdf1sVar :: ! CdfCounter + , cmCdf3sVar :: ! CdfCounter + , cmCdf5sVar :: ! CdfCounter , cmDelay :: Double , cmBlockSize :: Word32 , cmTraceIt :: Bool @@ -1710,41 +1708,36 @@ instance ( tx ~ GenTx blk 0 ]) + asMetrics (TraceStartLeadershipCheck slot) = - [IntM "aboutToLeadSlotLast" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.about-to-lead" (fromIntegral $ unSlotNo slot)] asMetrics (TraceSlotIsImmutable slot _tipPoint _tipBlkNo) = - [IntM "slotIsImmutable" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.slot-is-immutable" (fromIntegral $ unSlotNo slot)] asMetrics (TraceBlockFromFuture slot _slotNo) = - [IntM "blockFromFuture" (fromIntegral $ unSlotNo slot)] - asMetrics (TraceBlockContext slot _tipBlkNo _tipPoint) = - [IntM "blockContext" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.block-from-future" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNoLedgerState slot _) = - [IntM "couldNotForgeSlotLast" (fromIntegral $ unSlotNo slot)] - asMetrics (TraceLedgerState slot _) = - [IntM "ledgerState" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNoLedgerView slot _) = - [IntM "couldNotForgeSlotLast" (fromIntegral $ unSlotNo slot)] - asMetrics (TraceLedgerView slot) = - [IntM "ledgerView" (fromIntegral $ unSlotNo slot)] - -- see above + [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] + asMetrics (TraceLedgerView slot) = [] asMetrics (TraceNodeCannotForge slot _reason) = - [IntM "nodeCannotForge" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNodeNotLeader slot) = - [IntM "nodeNotLeader" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.nodeNotLeader" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNodeIsLeader slot) = - [IntM "nodeIsLeader" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.node-is-leader" (fromIntegral $ unSlotNo slot)] asMetrics TraceForgeTickedLedgerState {} = [] asMetrics TraceForgingMempoolSnapshot {} = [] asMetrics (TraceForgedBlock slot _ _ _) = - [IntM "forgedSlotLast" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.forged" (fromIntegral $ unSlotNo slot)] asMetrics (TraceDidntAdoptBlock slot _) = - [IntM "notAdoptedSlotLast" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.didnt-adopt" (fromIntegral $ unSlotNo slot)] asMetrics (TraceForgedInvalidBlock slot _ _) = - [IntM "forgedInvalidSlotLast" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.forged-invalid" (fromIntegral $ unSlotNo slot)] asMetrics (TraceAdoptedBlock slot _ _) = - [IntM "adoptedOwnBlockSlotLast" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.adopted" (fromIntegral $ unSlotNo slot)] asMetrics (TraceAdoptionThreadDied slot _) = - [IntM "adoptionThreadDied" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.adoption-thread-died" (fromIntegral $ unSlotNo slot)] instance MetaTrace (TraceForgeEvent blk) where namespaceFor TraceStartLeadershipCheck {} = diff --git a/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs b/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs index 2a3c60f141b..c685dc7b9b1 100644 --- a/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs +++ b/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs @@ -34,14 +34,15 @@ import Ouroboros.Consensus.Block (BlockProtocol, BlockSupportsProtocol Header, RealPoint, blockNo, blockPoint, blockPrevHash, getHeader, headerPoint, pointHash, realPointHash, realPointSlot, withOriginToMaybe) import Ouroboros.Consensus.Block.SupportsSanityCheck -import Ouroboros.Consensus.Genesis.Governor (DensityBounds (..), TraceGDDEvent (..), GDDDebugInfo (..)) +import Ouroboros.Consensus.Genesis.Governor (DensityBounds (..), GDDDebugInfo (..), + TraceGDDEvent (..)) import Ouroboros.Consensus.HeaderValidation import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended import Ouroboros.Consensus.Ledger.Inspect (InspectLedger, LedgerEvent (..), LedgerUpdate, LedgerWarning) -import Ouroboros.Consensus.Ledger.SupportsMempool (ApplyTxErr, GenTx, GenTxId, HasTxId, - LedgerSupportsMempool, TxId, txForgetValidated, txId, ByteSize32 (..)) +import Ouroboros.Consensus.Ledger.SupportsMempool (ApplyTxErr, ByteSize32 (..), GenTx, + GenTxId, HasTxId, LedgerSupportsMempool, TxId, txForgetValidated, txId) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.Mempool (MempoolSize (..), TraceEventMempool (..)) import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server @@ -65,7 +66,8 @@ import qualified Ouroboros.Consensus.Storage.ImmutableDB.API as ImmDB import Ouroboros.Consensus.Storage.ImmutableDB.Chunks.Internal (ChunkNo (..), chunkNoToInt) import qualified Ouroboros.Consensus.Storage.ImmutableDB.Impl.Types as ImmDB -import Ouroboros.Consensus.Storage.LedgerDB (PushGoal (..), PushStart (..), Pushing (..), ReplayStart (..)) +import Ouroboros.Consensus.Storage.LedgerDB (PushGoal (..), PushStart (..), Pushing (..), + ReplayStart (..)) import qualified Ouroboros.Consensus.Storage.LedgerDB as LedgerDB import qualified Ouroboros.Consensus.Storage.VolatileDB.Impl as VolDb import Ouroboros.Consensus.Util.Condense From 387a454628d12c5b13929ce21607d428bc9ae3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Thu, 24 Oct 2024 12:17:12 +0200 Subject: [PATCH 6/7] cardano-node: fetch metrics --- .../Cardano/Node/Tracing/Tracers/Consensus.hs | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index 50385c148c2..115dcfd8632 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -1719,11 +1719,13 @@ instance ( tx ~ GenTx blk [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNoLedgerView slot _) = [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] - asMetrics (TraceLedgerView slot) = [] + asMetrics (TraceLedgerView _) = [] + asMetrics (TraceBlockContext _ _ _) = [] + asMetrics (TraceLedgerState _ _) = [] asMetrics (TraceNodeCannotForge slot _reason) = [IntM "Forge.could-not-forge" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNodeNotLeader slot) = - [IntM "Forge.nodeNotLeader" (fromIntegral $ unSlotNo slot)] + [IntM "Forge.node-not-leader" (fromIntegral $ unSlotNo slot)] asMetrics (TraceNodeIsLeader slot) = [IntM "Forge.node-is-leader" (fromIntegral $ unSlotNo slot)] asMetrics TraceForgeTickedLedgerState {} = [] @@ -1801,21 +1803,18 @@ instance MetaTrace (TraceForgeEvent blk) where severityFor _ _ = Nothing metricsDocFor (Namespace _ ["StartLeadershipCheck"]) = - [("aboutToLeadSlotLast", "")] + [("Forge.about-to-lead", "")] metricsDocFor (Namespace _ ["SlotIsImmutable"]) = - [("slotIsImmutable", "")] + [("Forge.slot-is-immutable", "")] metricsDocFor (Namespace _ ["BlockFromFuture"]) = - [("blockFromFuture", "")] - metricsDocFor (Namespace _ ["BlockContext"]) = - [("blockContext", "")] + [("Forge.block-from-future", "")] + metricsDocFor (Namespace _ ["BlockContext"]) = [] metricsDocFor (Namespace _ ["NoLedgerState"]) = - [("couldNotForgeSlotLast", "")] - metricsDocFor (Namespace _ ["LedgerState"]) = - [("ledgerState", "")] + [("Forge.could-not-forge", "")] + metricsDocFor (Namespace _ ["LedgerState"]) = [] metricsDocFor (Namespace _ ["NoLedgerView"]) = - [("couldNotForgeSlotLast", "")] - metricsDocFor (Namespace _ ["LedgerView"]) = - [("ledgerView", "")] + [("Forge.could-not-forge", "")] + metricsDocFor (Namespace _ ["LedgerView"]) = [] metricsDocFor (Namespace _ ["ForgeStateUpdateError"]) = [ ("operationalCertificateStartKESPeriod", "") , ("operationalCertificateExpiryKESPeriod", "") @@ -1823,23 +1822,23 @@ instance MetaTrace (TraceForgeEvent blk) where , ("remainingKESPeriods", "") ] metricsDocFor (Namespace _ ["NodeCannotForge"]) = - [("nodeCannotForge", "")] + [("Forge.could-not-forge", "")] metricsDocFor (Namespace _ ["NodeNotLeader"]) = - [("nodeNotLeader", "")] + [("Forge.node-not-leader", "")] metricsDocFor (Namespace _ ["NodeIsLeader"]) = - [("nodeIsLeader", "")] + [("Forge.node-is-leader", "")] metricsDocFor (Namespace _ ["ForgeTickedLedgerState"]) = [] metricsDocFor (Namespace _ ["ForgingMempoolSnapshot"]) = [] metricsDocFor (Namespace _ ["ForgedBlock"]) = - [("forgedSlotLast", "")] + [("Forge.forged", "")] metricsDocFor (Namespace _ ["DidntAdoptBlock"]) = - [("notAdoptedSlotLast", "")] + [("Forge.didnt-adopt", "")] metricsDocFor (Namespace _ ["ForgedInvalidBlock"]) = - [("forgedInvalidSlotLast", "")] + [("Forge.forged-invalid", "")] metricsDocFor (Namespace _ ["AdoptedBlock"]) = - [("adoptedOwnBlockSlotLast", "")] + [("Forge.adopted", "")] metricsDocFor (Namespace _ ["AdoptionThreadDied"]) = - [("adoptionThreadDied", "")] + [("Forge.adoption-thread-died", "")] metricsDocFor _ = [] documentFor (Namespace _ ["StartLeadershipCheck"]) = Just From c84c8fce69bbc79ccd5015ddd4e528f1d02e5f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Nicklisch-Franken?= Date: Thu, 24 Oct 2024 15:14:44 +0200 Subject: [PATCH 7/7] cardano-node: fix for client metrics --- .../src/Cardano/Node/Tracing/Tracers/Consensus.hs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs index 115dcfd8632..40fc409eb58 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs @@ -470,9 +470,8 @@ instance ConvertRawHash blk <> [ "risingEdge" .= True | RisingEdge <- [enclosing] ] asMetrics (TraceChainSyncServerUpdate _tip (AddBlock _pt) _blocking FallingEdge) = - [CounterM "headersServed.falling" Nothing] - asMetrics (TraceChainSyncServerUpdate _tip (AddBlock _pt) _blocking _) = - [CounterM "headersServed" Nothing] + [CounterM "served.header" Nothing] + asMetrics (TraceChainSyncServerUpdate _tip (AddBlock _pt) _blocking _) = [] asMetrics _ = [] instance MetaTrace (TraceChainSyncServerEvent blk) where @@ -487,8 +486,7 @@ instance MetaTrace (TraceChainSyncServerEvent blk) where severityFor _ _ = Nothing metricsDocFor (Namespace _ ["Update"]) = - [ ("served.header", "A counter triggered on any header event") - , ("served.header.falling", + [ ("served.header", "A counter triggered only on header event with falling edge")] metricsDocFor _ = [] @@ -622,6 +620,8 @@ calculateBlockFetchClientMetrics cm@ClientMetrics {..} _lc , cmCdf5sVar = thd3 updatedMetrics , cmDelay = realToFrac forgeDelay , cmBlockSize = getSizeInBytes blockSize + , cmTraceVars = True + , cmTraceIt = True , cmSlotMap = slotMap'' } updateMetrics slotMap' _slotNo = @@ -633,12 +633,17 @@ calculateBlockFetchClientMetrics cm@ClientMetrics {..} _lc , cmCdf5sVar = thd3 updatedMetrics , cmDelay = realToFrac forgeDelay , cmBlockSize = getSizeInBytes blockSize + , cmTraceVars = True + , cmTraceIt = True , cmSlotMap = slotMap' } else pure cm { cmCdf1sVar = fst3 updatedMetrics , cmCdf3sVar = snd3 updatedMetrics , cmCdf5sVar = thd3 updatedMetrics + , cmDelay = realToFrac forgeDelay + , cmBlockSize = getSizeInBytes blockSize , cmTraceVars = False + , cmTraceIt = True , cmSlotMap = slotMap' } updateCDFs minDelay forgeDelay' =