feat(op-reth): proof store v2#22
feat(op-reth): proof store v2#22dhyaniarun1993 wants to merge 8 commits intochore/proof-store-improvementfrom
Conversation
| // Installs the ExEx and RPC overrides for the given `mdbx` handle and `storage`. | ||
| // Defined as a macro because the builder chain must be monomorphized for each concrete | ||
| // storage type; a generic function cannot be used due to the type-state builder pattern. | ||
| macro_rules! install_proof_history { |
There was a problem hiding this comment.
I don't really understand why we need a macro here. The builder can be generic over the storage and we can use a generic method over the storage instead.
| ctx.node().task_executor().clone(), | ||
| ctx.node().evm_config().clone(), | ||
| let storage: OpProofsStorage<Arc<MdbxProofsStorage>> = mdbx.clone().into(); | ||
| install_proof_history!(mdbx, storage); |
There was a problem hiding this comment.
Also the storage can be directly built from mdbx into that helper method so no need to have two args
| impl Encode for HashedAccountShardedKey { | ||
| type Encoded = [u8; 40]; // 32 (B256) + 8 (BlockNumber) | ||
|
|
||
| fn encode(self) -> Self::Encoded { | ||
| let mut buf = [0u8; 40]; | ||
| buf[..32].copy_from_slice(self.0.key.as_slice()); | ||
| buf[32..].copy_from_slice(&self.0.highest_block_number.to_be_bytes()); | ||
| buf | ||
| } | ||
| } | ||
|
|
||
| impl Decode for HashedAccountShardedKey { | ||
| fn decode(value: &[u8]) -> Result<Self, DatabaseError> { | ||
| if value.len() != 40 { | ||
| return Err(DatabaseError::Decode); | ||
| } | ||
| let key = B256::from_slice(&value[..32]); | ||
| let highest_block_number = | ||
| u64::from_be_bytes(value[32..].try_into().map_err(|_| DatabaseError::Decode)?); | ||
| Ok(Self(ShardedKey::new(key, highest_block_number))) | ||
| } | ||
| } |
There was a problem hiding this comment.
ShardedKey already implements Encode and Decode. Why do we need this newtype? We can just reuse the sharded key type directly. If not, we should just reuse the implementations from ShardedKey instead of redefining new ones
There was a problem hiding this comment.
This makes sense for AccountTrieShardedKey but this implementation looks very similar to ShardedKey
| > + NodeTypesWithDB, | ||
| S: OpProofsStore + Send + Sync + Clone + std::fmt::Debug + 'static, | ||
| { | ||
| #[allow(clippy::useless_conversion)] |
| let proof_window = self.get_proof_window_inner()?; | ||
|
|
||
| if latest_common_block.number < proof_window.earliest.number || | ||
| latest_common_block.number > proof_window.latest.number | ||
| { | ||
| return Err(OpProofsStorageError::ReorgBaseOutOfWindow { | ||
| block_number: latest_common_block.number, | ||
| earliest_block_number: proof_window.earliest.number, | ||
| latest_block_number: proof_window.latest.number, | ||
| }); | ||
| } |
There was a problem hiding this comment.
This makes sense. Let's make sure to add unit tests here as well to prevent regressions in v1.
| use alloy_eips::NumHash; | ||
|
|
||
| pub(crate) struct ProofWindowValue { | ||
| pub earliest: NumHash, | ||
| pub latest: NumHash, | ||
| } |
There was a problem hiding this comment.
nit: Let's move that to mod.rs instead of adding a new file
| /// Keys Storage Trie History by: Hashed Address + Nibbles + Sharded Block. | ||
| /// | ||
| /// Uses **length-prefixed encoding** for the nibble portion to avoid sort | ||
| /// ambiguity in MDBX (same rationale as | ||
| /// [`AccountTrieShardedKey`](super::key::AccountTrieShardedKey)): | ||
| /// | ||
| /// ```text | ||
| /// [hashed_address: 32 bytes] ++ [nibble_count: 1 byte] ++ [nibble_bytes] ++ [block_number: 8 BE bytes] | ||
| /// ``` | ||
| #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] | ||
| pub struct StorageTrieShardedKey { | ||
| /// The hashed address of the account owning the storage trie. | ||
| pub hashed_address: B256, | ||
| /// The trie path (nibbles). | ||
| pub key: StoredNibbles, | ||
| /// Highest block number in this shard (or `u64::MAX` for the sentinel). | ||
| pub highest_block_number: u64, | ||
| } |
There was a problem hiding this comment.
Why is that in value.rs and not in key.rs?
| pub hashed_address: B256, | ||
| /// The sharded key combining the storage key and sharded block number. | ||
| pub sharded_key: ShardedKey<B256>, | ||
| } |
There was a problem hiding this comment.
Same question here, why is it in value.rs?
| pub hashed_address: B256, | ||
| /// Account state before the block. `None` means the account didn't exist. | ||
| pub info: Option<Account>, | ||
| } |
| pub nibbles: StoredNibblesSubKey, | ||
| /// Node value prior to the block being processed, None indicating it didn't exist. | ||
| pub node: Option<BranchNodeCompact>, | ||
| } |
| fn encode(self) -> Self::Encoded { | ||
| let nibble_bytes: Vec<u8> = self.key.0.iter().collect(); | ||
| let nibble_count = nibble_bytes.len() as u8; | ||
| let mut buf = Vec::with_capacity(32 + 1 + nibble_bytes.len() + 8); |
There was a problem hiding this comment.
Can we have some tests to ensure we don't have sort ambiguity with the length prefixed encoding? This should ensure regressions
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { |
There was a problem hiding this comment.
This file is mostly tests. It is really hard to deal with such a big file, let's split it up in it's own module and have one file per cursor struct.
Let's also split the test cases in different files.
theochap
left a comment
There was a problem hiding this comment.
Reviewed all the files except cursor and store. Let's split those in their own submodules and break them up so that each file is ~200-300 loc. Each one is currently 3.5k loc and mostly tests. Makes it really hard to review
| /// 1. Seek the first history shard with `highest_block_number >= max_block_number`. | ||
| /// 2. Within that shard, find the first block strictly `> max_block_number`. | ||
| /// 3. If found → `FromChangeset(block)`. | ||
| /// 4. If the shard boundary was hit (all entries ≤ `max_block_number`), advance to the next shard | ||
| /// for the same key. If found → use its first entry. |
There was a problem hiding this comment.
I think we can remove 4. if we seek the first history shard with highest_block_number > max_block_number and just return FromCurrentState if it doesn't exist.
Also, what is the motivation behind storing the before value from the changeset at a given block. Why not store the after value? Then we only have to do one seek operation
| // 1. Seek the first shard with highest_block_number >= max_block_number. | ||
| let shard = cursor.seek(seek_key)?.filter(|(k, _)| key_filter(k)); |
There was a problem hiding this comment.
Does the key_filter incorporate the inequality check with max_block_number? The comment just before is misleading - we're seeking against seek_key and filtering according to key_filter, this has nothing to do with max_block_number.
| self.cs_next = self.cursor.seek(key)?; | ||
| self.hist_next_key = self | ||
| .history_walk_cursor | ||
| .seek(HashedAccountShardedKey::new(key, 0))? |
There was a problem hiding this comment.
Why is highest_block_number zero here?
| let (min_key, cs_value) = match (&self.cs_next, &self.hist_next_key) { | ||
| (Some((cs_k, cs_v)), Some(h_k)) => { | ||
| if cs_k <= h_k { | ||
| (*cs_k, Some(*cs_v)) | ||
| } else { | ||
| (*h_k, None) | ||
| } | ||
| } | ||
| (Some((cs_k, cs_v)), None) => (*cs_k, Some(*cs_v)), | ||
| (None, Some(h_k)) => (*h_k, None), | ||
| (None, None) => return Ok(None), | ||
| }; |
There was a problem hiding this comment.
Let's add a comment to explain the logic behind this match
| // Single-scan: restore state, collect affected keys, delete changesets | ||
| let acct_trie_keys = self.unwind_and_collect_account_trie(&range)?; | ||
| let stor_trie_keys = self.unwind_and_collect_storage_trie(&range)?; | ||
| let acct_keys = self.unwind_and_collect_hashed_accounts(&range)?; | ||
| let stor_keys = self.unwind_and_collect_hashed_storages(&range)?; | ||
|
|
||
| // Phase B: remove old block numbers from history bitmaps | ||
| self.prune_account_trie_history(&range, &acct_trie_keys)?; | ||
| self.prune_storage_trie_history(&range, &stor_trie_keys)?; | ||
| self.prune_hashed_account_history(&range, &acct_keys)?; | ||
| self.prune_hashed_storage_history(&range, &stor_keys)?; |
There was a problem hiding this comment.
The unwind and prune calls should be refactored in a separated method to improve readability. Same for the other methods in this file
| } | ||
| } | ||
|
|
||
| #[cfg(test)] |
There was a problem hiding this comment.
Same comment regarding splitting up that file. It is way too big
|
|
||
| metrics | ||
| } | ||
| } |
PR just review the changes.
Note: This isn't meant to be merged.