diff --git a/fluffy/conf.nim b/fluffy/conf.nim index b165b3f2c..510b8c891 100644 --- a/fluffy/conf.nim +++ b/fluffy/conf.nim @@ -302,6 +302,13 @@ type name: "force-prune" .}: bool + contentRequestRetries* {. + hidden, + desc: "Max number of retries when requesting content over the network.", + defaultValue: 3, + name: "content-request-retries" + .}: int + contentCacheSize* {. hidden, desc: diff --git a/fluffy/fluffy.nim b/fluffy/fluffy.nim index b3084b85d..84a3f6084 100644 --- a/fluffy/fluffy.nim +++ b/fluffy/fluffy.nim @@ -196,6 +196,7 @@ proc run( portalConfig: portalProtocolConfig, dataDir: string config.dataDir, storageCapacity: config.storageCapacityMB * 1_000_000, + contentRequestRetries: config.contentRequestRetries, ) node = PortalNode.new( diff --git a/fluffy/network/history/history_network.nim b/fluffy/network/history/history_network.nim index 37a42067d..311498eb0 100644 --- a/fluffy/network/history/history_network.nim +++ b/fluffy/network/history/history_network.nim @@ -39,6 +39,7 @@ type historicalRoots*: HistoricalRoots processContentLoop: Future[void] statusLogLoop: Future[void] + contentRequestRetries: int Block* = (Header, BlockBody) @@ -407,8 +408,7 @@ proc getContent( ## Public API to get the history network specific types, either from database ## or through a lookup on the Portal Network -const requestRetries = 4 -# TODO: Currently doing 4 retries on lookups but only when the validation fails. +# TODO: Currently doing 3 retries on lookups but only when the validation fails. # This is to avoid nodes that provide garbage from blocking us with getting the # requested data. Might want to also do that on a failed lookup, as perhaps this # could occur when being really unlucky with nodes timing out on requests. @@ -440,7 +440,7 @@ proc getVerifiedBlockHeader*( info "Fetched block header from database" return headerFromDb - for i in 0 ..< requestRetries: + for i in 0 ..< (1 + n.contentRequestRetries): let headerContent = (await n.portalProtocol.contentLookup(contentKey, contentId)).valueOr: warn "Failed fetching block header with proof from the network" @@ -469,7 +469,7 @@ proc getVerifiedBlockHeader*( return Opt.some(header) - # Headers were requested `requestRetries` times and all failed on validation + # Headers were requested `1 + requestRetries` times and all failed on validation return Opt.none(Header) proc getBlockBody*( @@ -492,7 +492,7 @@ proc getBlockBody*( info "Fetched block body from database" return bodyFromDb - for i in 0 ..< requestRetries: + for i in 0 ..< (1 + n.contentRequestRetries): let bodyContent = (await n.portalProtocol.contentLookup(contentKey, contentId)).valueOr: warn "Failed fetching block body from the network" @@ -513,7 +513,7 @@ proc getBlockBody*( return Opt.some(body) - # Bodies were requested `requestRetries` times and all failed on validation + # Bodies were requested `1 + requestRetries` times and all failed on validation return Opt.none(BlockBody) proc getBlock*( @@ -567,7 +567,7 @@ proc getReceipts*( info "Fetched receipts from database" return receiptsFromDb - for i in 0 ..< requestRetries: + for i in 0 ..< (1 + n.contentRequestRetries): let receiptsContent = (await n.portalProtocol.contentLookup(contentKey, contentId)).valueOr: warn "Failed fetching receipts from the network" @@ -587,6 +587,9 @@ proc getReceipts*( return Opt.some(receipts) + # Receipts were requested `1 + requestRetries` times and all failed on validation + return Opt.none(seq[Receipt]) + proc validateContent( n: HistoryNetwork, content: seq[byte], contentKey: ContentKeyByteList ): Future[bool] {.async: (raises: [CancelledError]).} = @@ -661,6 +664,7 @@ proc new*( historicalRoots: HistoricalRoots = loadHistoricalRoots(), bootstrapRecords: openArray[Record] = [], portalConfig: PortalProtocolConfig = defaultPortalProtocolConfig, + contentRequestRetries = 3, ): T = let contentQueue = newAsyncQueue[(Opt[NodeId], ContentKeysList, seq[seq[byte]])](50) @@ -685,6 +689,7 @@ proc new*( contentQueue: contentQueue, accumulator: accumulator, historicalRoots: historicalRoots, + contentRequestRetries: contentRequestRetries, ) proc validateContent( diff --git a/fluffy/network/state/state_network.nim b/fluffy/network/state/state_network.nim index 1c2a5f77b..26fdd1710 100644 --- a/fluffy/network/state/state_network.nim +++ b/fluffy/network/state/state_network.nim @@ -32,6 +32,7 @@ type StateNetwork* = ref object statusLogLoop: Future[void] historyNetwork: Opt[HistoryNetwork] validateStateIsCanonical: bool + contentRequestRetries: int func toContentIdHandler(contentKey: ContentKeyByteList): results.Opt[ContentId] = ok(toContentId(contentKey)) @@ -46,6 +47,7 @@ proc new*( portalConfig: PortalProtocolConfig = defaultPortalProtocolConfig, historyNetwork = Opt.none(HistoryNetwork), validateStateIsCanonical = true, + contentRequestRetries = 3, ): T = let cq = newAsyncQueue[(Opt[NodeId], ContentKeysList, seq[seq[byte]])](50) @@ -67,6 +69,7 @@ proc new*( contentQueue: cq, historyNetwork: historyNetwork, validateStateIsCanonical: validateStateIsCanonical, + contentRequestRetries: contentRequestRetries, ) proc getContent( @@ -87,27 +90,32 @@ proc getContent( info "Fetched state local content value" return Opt.some(contentValue) - let - contentLookupResult = ( - await n.portalProtocol.contentLookup(contentKeyBytes, contentId) - ).valueOr: - warn "Failed fetching state content from the network" - return Opt.none(V) - contentValueBytes = contentLookupResult.content - - let contentValue = V.decode(contentValueBytes).valueOr: - warn "Unable to decode state content value from content lookup" - return Opt.none(V) - - validateRetrieval(key, contentValue).isOkOr: - warn "Validation of retrieved state content failed" - return Opt.none(V) + for i in 0 ..< (1 + n.contentRequestRetries): + let + contentLookupResult = ( + await n.portalProtocol.contentLookup(contentKeyBytes, contentId) + ).valueOr: + warn "Failed fetching state content from the network" + return Opt.none(V) + contentValueBytes = contentLookupResult.content + + let contentValue = V.decode(contentValueBytes).valueOr: + warn "Unable to decode state content value from content lookup" + continue + + validateRetrieval(key, contentValue).isOkOr: + warn "Validation of retrieved state content failed" + continue + + info "Fetched valid state content from the network" + n.portalProtocol.storeContent( + contentKeyBytes, contentId, contentValueBytes, cacheContent = true + ) - n.portalProtocol.storeContent( - contentKeyBytes, contentId, contentValueBytes, cacheContent = true - ) + return Opt.some(contentValue) - Opt.some(contentValue) + # Content was requested `1 + requestRetries` times and all failed on validation + Opt.none(V) proc getAccountTrieNode*( n: StateNetwork, key: AccountTrieNodeKey diff --git a/fluffy/portal_node.nim b/fluffy/portal_node.nim index d6b5bbd8f..2d1736edf 100644 --- a/fluffy/portal_node.nim +++ b/fluffy/portal_node.nim @@ -36,6 +36,7 @@ type portalConfig*: PortalProtocolConfig dataDir*: string storageCapacity*: uint64 + contentRequestRetries*: int PortalNode* = ref object state*: PortalNodeState @@ -136,6 +137,7 @@ proc new*( accumulator, bootstrapRecords = bootstrapRecords, portalConfig = config.portalConfig, + contentRequestRetries = config.contentRequestRetries, ) ) else: @@ -153,6 +155,7 @@ proc new*( portalConfig = config.portalConfig, historyNetwork = historyNetwork, not config.disableStateRootValidation, + contentRequestRetries = config.contentRequestRetries, ) ) else: