perf(trie): batch storage proof jobs at worker level

yongkangc · yongkangc · commit 9329deb30bba · 2025-12-09T03:23:42.000Z
Implement worker-level batching for storage proofs to reduce redundant
trie traversals when multiple proof requests arrive for the same account.

When storage proof requests queue up faster than workers can process them,
jobs for the same account are now merged into a single proof computation.
This reduces trie I/O and computation overhead significantly.
diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs
@@ -41,6 +41,7 @@ use alloy_primitives::{
 use alloy_rlp::{BufMut, Encodable};
 use crossbeam_channel::{unbounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender};
 use dashmap::DashMap;
+use metrics::Histogram;
 use reth_execution_errors::{SparseTrieError, SparseTrieErrorKind};
 use reth_provider::{DatabaseProviderROFactory, ProviderError, ProviderResult};
 use reth_storage_errors::db::DatabaseError;
@@ -79,6 +80,93 @@ use crate::proof_task_metrics::{
 type StorageProofResult = Result<DecodedStorageMultiProof, ParallelStateRootError>;
 type TrieNodeProviderResult = Result<Option<RevealedNode>, SparseTrieError>;
 
+/// Maximum number of storage proof jobs to batch together per account.
+const STORAGE_PROOF_BATCH_LIMIT: usize = 32;
+
+/// Holds batched storage proof jobs for the same account.
+///
+/// When multiple storage proof requests arrive for the same account, they can be merged
+/// into a single proof computation with combined prefix sets and target slots.
+#[derive(Debug)]
+struct BatchedStorageProof {
+    /// The merged prefix set from all batched jobs.
+    prefix_set: PrefixSetMut,
+    /// The merged target slots from all batched jobs.
+    target_slots: B256Set,
+    /// Whether any job requested branch node masks.
+    with_branch_node_masks: bool,
+    /// The `multi_added_removed_keys` from the first job (they should all share the same `Arc`).
+    multi_added_removed_keys: Option<Arc<MultiAddedRemovedKeys>>,
+    /// All senders that need to receive the result.
+    senders: Vec<ProofResultContext>,
+}
+
+impl BatchedStorageProof {
+    /// Creates a new batch from the first storage proof input.
+    fn new(input: StorageProofInput, sender: ProofResultContext) -> Self {
+        // Convert frozen PrefixSet to mutable PrefixSetMut by collecting its keys.
+        let prefix_set = PrefixSetMut::from(input.prefix_set.iter().copied());
+        Self {
+            prefix_set,
+            target_slots: input.target_slots,
+            with_branch_node_masks: input.with_branch_node_masks,
+            multi_added_removed_keys: input.multi_added_removed_keys,
+            senders: vec![sender],
+        }
+    }
+
+    /// Merges another storage proof job into this batch.
+    fn merge(&mut self, input: StorageProofInput, sender: ProofResultContext) {
+        self.prefix_set.extend_keys(input.prefix_set.iter().copied());
+        self.target_slots.extend(input.target_slots);
+        self.with_branch_node_masks |= input.with_branch_node_masks;
+        self.senders.push(sender);
+    }
+
+    /// Converts this batch into a single `StorageProofInput` for computation.
+    fn into_input(self, hashed_address: B256) -> (StorageProofInput, Vec<ProofResultContext>) {
+        let input = StorageProofInput {
+            hashed_address,
+            prefix_set: self.prefix_set.freeze(),
+            target_slots: self.target_slots,
+            with_branch_node_masks: self.with_branch_node_masks,
+            multi_added_removed_keys: self.multi_added_removed_keys,
+        };
+        (input, self.senders)
+    }
+}
+
+/// Metrics for storage worker batching.
+#[derive(Clone, Default)]
+struct StorageWorkerBatchMetrics {
+    /// Histogram of batch sizes (number of jobs merged per computation).
+    #[cfg(feature = "metrics")]
+    batch_size_histogram: Option<Histogram>,
+}
+
+impl StorageWorkerBatchMetrics {
+    #[cfg(feature = "metrics")]
+    fn new() -> Self {
+        Self {
+            batch_size_histogram: Some(metrics::histogram!(
+                "trie.proof_task.storage_worker_batch_size"
+            )),
+        }
+    }
+
+    #[cfg(not(feature = "metrics"))]
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn record_batch_size(&self, _size: usize) {
+        #[cfg(feature = "metrics")]
+        if let Some(h) = &self.batch_size_histogram {
+            h.record(_size as f64);
+        }
+    }
+}
+
 /// A handle that provides type-safe access to proof worker pools.
 ///
 /// The handle stores direct senders to both storage and account worker pools,
@@ -552,7 +640,7 @@ impl TrieNodeProvider for ProofTaskTrieNodeProvider {
     }
 }
 /// Result of a proof calculation, which can be either an account multiproof or a storage proof.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub enum ProofResult {
     /// Account multiproof with statistics
     AccountMultiproof {
@@ -708,11 +796,18 @@ where
     /// 2. Advertises availability
     /// 3. Processes jobs in a loop:
     ///    - Receives job from channel
+    ///    - Drains additional same-account storage proof jobs (batching)
     ///    - Marks worker as busy
-    ///    - Processes the job
+    ///    - Processes the batched jobs as a single proof computation
     ///    - Marks worker as available
     /// 4. Shuts down when channel closes
     ///
+    /// # Batching Strategy
+    ///
+    /// When multiple storage proof requests arrive for the same account, they are merged
+    /// into a single proof computation. This reduces redundant trie traversals when state
+    /// updates arrive faster than proof computation can process them.
+    ///
     /// # Panic Safety
     ///
     /// If this function panics, the worker thread terminates but other workers
@@ -732,6 +827,7 @@ where
         // Create provider from factory
         let provider = task_ctx.factory.database_provider_ro()?;
         let proof_tx = ProofTaskTx::new(provider, worker_id);
+        let batch_metrics = StorageWorkerBatchMetrics::new();
 
         trace!(
             target: "trie::proof_task",
@@ -746,20 +842,98 @@ where
         // Initially mark this worker as available.
         available_workers.fetch_add(1, Ordering::Relaxed);
 
+        // Deferred blinded node jobs to process after batched storage proofs.
+        let mut deferred_blinded_nodes: Vec<(B256, Nibbles, Sender<TrieNodeProviderResult>)> =
+            Vec::new();
+
         while let Ok(job) = work_rx.recv() {
             // Mark worker as busy.
             available_workers.fetch_sub(1, Ordering::Relaxed);
 
             match job {
                 StorageWorkerJob::StorageProof { input, proof_result_sender } => {
-                    Self::process_storage_proof(
-                        worker_id,
-                        &proof_tx,
-                        input,
-                        proof_result_sender,
-                        &mut storage_proofs_processed,
-                        &mut cursor_metrics_cache,
+                    // Start batching: group storage proofs by account.
+                    let mut batches: B256Map<BatchedStorageProof> = B256Map::default();
+                    batches.insert(
+                        input.hashed_address,
+                        BatchedStorageProof::new(input, proof_result_sender),
                     );
+                    let mut total_jobs = 1usize;
+
+                    // Drain additional jobs from the queue.
+                    while total_jobs < STORAGE_PROOF_BATCH_LIMIT {
+                        match work_rx.try_recv() {
+                            Ok(StorageWorkerJob::StorageProof {
+                                input: next_input,
+                                proof_result_sender: next_sender,
+                            }) => {
+                                total_jobs += 1;
+                                let addr = next_input.hashed_address;
+                                match batches.entry(addr) {
+                                    alloy_primitives::map::Entry::Occupied(mut entry) => {
+                                        entry.get_mut().merge(next_input, next_sender);
+                                    }
+                                    alloy_primitives::map::Entry::Vacant(entry) => {
+                                        entry.insert(BatchedStorageProof::new(
+                                            next_input,
+                                            next_sender,
+                                        ));
+                                    }
+                                }
+                            }
+                            Ok(StorageWorkerJob::BlindedStorageNode {
+                                account,
+                                path,
+                                result_sender,
+                            }) => {
+                                // Defer blinded node jobs to process after batched proofs.
+                                deferred_blinded_nodes.push((account, path, result_sender));
+                            }
+                            Err(_) => break,
+                        }
+                    }
+
+                    // Process all batched storage proofs.
+                    for (hashed_address, batch) in batches {
+                        let batch_size = batch.senders.len();
+                        batch_metrics.record_batch_size(batch_size);
+
+                        let (merged_input, senders) = batch.into_input(hashed_address);
+
+                        trace!(
+                            target: "trie::proof_task",
+                            worker_id,
+                            ?hashed_address,
+                            batch_size,
+                            prefix_set_len = merged_input.prefix_set.len(),
+                            target_slots_len = merged_input.target_slots.len(),
+                            "Processing batched storage proof"
+                        );
+
+                        Self::process_batched_storage_proof(
+                            worker_id,
+                            &proof_tx,
+                            hashed_address,
+                            merged_input,
+                            senders,
+                            &mut storage_proofs_processed,
+                            &mut cursor_metrics_cache,
+                        );
+                    }
+
+                    // Process any deferred blinded node jobs.
+                    for (account, path, result_sender) in
+                        std::mem::take(&mut deferred_blinded_nodes)
+                    {
+                        Self::process_blinded_node(
+                            worker_id,
+                            &proof_tx,
+                            account,
+                            path,
+                            result_sender,
+                            &mut storage_nodes_processed,
+                        );
+                    }
                 }
 
                 StorageWorkerJob::BlindedStorageNode { account, path, result_sender } => {
@@ -795,82 +969,103 @@ where
         Ok(())
     }
 
-    /// Processes a storage proof request.
-    fn process_storage_proof<Provider>(
+    /// Processes a batched storage proof request and sends results to all waiting receivers.
+    ///
+    /// This computes a single storage proof with merged targets and sends the same result
+    /// to all original requestors, reducing redundant trie traversals.
+    fn process_batched_storage_proof<Provider>(
         worker_id: usize,
         proof_tx: &ProofTaskTx<Provider>,
+        hashed_address: B256,
         input: StorageProofInput,
-        proof_result_sender: ProofResultContext,
+        senders: Vec<ProofResultContext>,
         storage_proofs_processed: &mut u64,
         cursor_metrics_cache: &mut ProofTaskCursorMetricsCache,
     ) where
         Provider: TrieCursorFactory + HashedCursorFactory,
     {
-        let hashed_address = input.hashed_address;
-        let ProofResultContext { sender, sequence_number: seq, state, start_time } =
-            proof_result_sender;
-
         let mut trie_cursor_metrics = TrieCursorMetricsCache::default();
         let mut hashed_cursor_metrics = HashedCursorMetricsCache::default();
 
-        trace!(
-            target: "trie::proof_task",
-            worker_id,
-            hashed_address = ?hashed_address,
-            prefix_set_len = input.prefix_set.len(),
-            target_slots_len = input.target_slots.len(),
-            "Processing storage proof"
-        );
-
         let proof_start = Instant::now();
         let result = proof_tx.compute_storage_proof(
             input,
             &mut trie_cursor_metrics,
             &mut hashed_cursor_metrics,
         );
-
         let proof_elapsed = proof_start.elapsed();
-        *storage_proofs_processed += 1;
-
-        let result_msg = result.map(|storage_proof| ProofResult::StorageProof {
-            hashed_address,
-            proof: storage_proof,
-        });
 
-        if sender
-            .send(ProofResultMessage {
-                sequence_number: seq,
-                result: result_msg,
-                elapsed: start_time.elapsed(),
-                state,
-            })
-            .is_err()
-        {
-            trace!(
-                target: "trie::proof_task",
-                worker_id,
-                hashed_address = ?hashed_address,
-                storage_proofs_processed,
-                "Proof result receiver dropped, discarding result"
-            );
+        // Send the result to all waiting receivers.
+        let num_senders = senders.len();
+        match result {
+            Ok(storage_proof) => {
+                // Success case: clone the proof for each sender.
+                let proof_result =
+                    ProofResult::StorageProof { hashed_address, proof: storage_proof };
+
+                for ProofResultContext { sender, sequence_number, state, start_time } in senders {
+                    *storage_proofs_processed += 1;
+
+                    if sender
+                        .send(ProofResultMessage {
+                            sequence_number,
+                            result: Ok(proof_result.clone()),
+                            elapsed: start_time.elapsed(),
+                            state,
+                        })
+                        .is_err()
+                    {
+                        trace!(
+                            target: "trie::proof_task",
+                            worker_id,
+                            ?hashed_address,
+                            sequence_number,
+                            "Proof result receiver dropped, discarding result"
+                        );
+                    }
+                }
+            }
+            Err(error) => {
+                // Error case: convert to string for cloning, then send to all receivers.
+                let error_msg = error.to_string();
+
+                for ProofResultContext { sender, sequence_number, state, start_time } in senders {
+                    *storage_proofs_processed += 1;
+
+                    if sender
+                        .send(ProofResultMessage {
+                            sequence_number,
+                            result: Err(ParallelStateRootError::Other(error_msg.clone())),
+                            elapsed: start_time.elapsed(),
+                            state,
+                        })
+                        .is_err()
+                    {
+                        trace!(
+                            target: "trie::proof_task",
+                            worker_id,
+                            ?hashed_address,
+                            sequence_number,
+                            "Proof result receiver dropped, discarding result"
+                        );
+                    }
+                }
+            }
         }
 
         trace!(
             target: "trie::proof_task",
             worker_id,
-            hashed_address = ?hashed_address,
+            ?hashed_address,
             proof_time_us = proof_elapsed.as_micros(),
-            total_processed = storage_proofs_processed,
+            num_senders,
             trie_cursor_duration_us = trie_cursor_metrics.total_duration.as_micros(),
             hashed_cursor_duration_us = hashed_cursor_metrics.total_duration.as_micros(),
-            ?trie_cursor_metrics,
-            ?hashed_cursor_metrics,
-            "Storage proof completed"
+            "Batched storage proof completed"
         );
 
         #[cfg(feature = "metrics")]
         {
-            // Accumulate per-proof metrics into the worker's cache
             let per_proof_cache = ProofTaskCursorMetricsCache {
                 account_trie_cursor: TrieCursorMetricsCache::default(),
                 account_hashed_cursor: HashedCursorMetricsCache::default(),