Skip to content

Commit

Permalink
Replace all uses of FxHashMap with IntMap
Browse files Browse the repository at this point in the history
* We either already have a hash, or just want an int mapping.
  • Loading branch information
wcampbell0x2a committed Aug 29, 2024
1 parent 8270fd9 commit 6a10006
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 19 deletions.
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion backhand/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ xz2 = { version = "0.1.7", optional = true }
rust-lzo = { version = "0.6.2", optional = true }
zstd = { version = "0.13.1", optional = true }
zstd-safe = { version = "7.2.1", optional = true }
rustc-hash = "2.0.0"
document-features = { version = "0.2.10", optional = true }
xxhash-rust = { version = "0.8.12", features = ["xxh64"] }
solana-nohash-hasher = "0.2.1"

[features]
default = ["xz", "gzip", "zstd"]
Expand Down
53 changes: 50 additions & 3 deletions backhand/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use std::collections::HashMap;
use std::io::{Read, Seek, Write};

use deku::prelude::*;
use rustc_hash::FxHashMap;
use solana_nohash_hasher::IntMap;
use tracing::trace;
use xxhash_rust::xxh64::xxh64;

use crate::compressor::CompressionAction;
Expand Down Expand Up @@ -104,7 +105,8 @@ pub(crate) struct DataWriter<'a> {
block_size: u32,
fs_compressor: FilesystemCompressor,
/// If some, cache of HashMap<file_len, HashMap<hash, (file_len, Added)>>
dup_cache: Option<HashMap<u64, HashMap<u64, (usize, Added)>>>,
#[allow(clippy::type_complexity)]
dup_cache: Option<IntMap<u64, IntMap<u64, (usize, Added)>>>,
/// Un-written fragment_bytes
pub(crate) fragment_bytes: Vec<u8>,
pub(crate) fragment_table: Vec<Fragment>,
Expand Down Expand Up @@ -193,6 +195,9 @@ impl<'a> DataWriter<'a> {
}

/// Add to data writer, either a Data or Fragment
///
/// If `self.dup_cache` is on, return alrady added `(usize, Added)` if duplicate
/// is found
// TODO: support tail-end fragments (off by default in squashfs-tools/mksquashfs)
pub(crate) fn add_bytes<W: WriteSeek>(
&mut self,
Expand Down Expand Up @@ -233,6 +238,7 @@ impl<'a> DataWriter<'a> {
if let Some(c) = dup_cache.get(&(chunk.len() as u64)) {
let hash = xxh64(chunk, 0);
if let Some(res) = c.get(&hash) {
trace!("duplicate file data found");
return Ok(res.clone());
}
}
Expand Down Expand Up @@ -266,7 +272,7 @@ impl<'a> DataWriter<'a> {
if let Some(entry) = dup_cache.get_mut(&(chunk_len as u64)) {
entry.insert(hash, added.clone());
} else {
let mut hashmap = HashMap::new();
let mut hashmap = IntMap::default();
hashmap.insert(hash, added.clone());
dup_cache.insert(chunk_len as u64, hashmap);
}
Expand Down Expand Up @@ -295,3 +301,44 @@ impl<'a> DataWriter<'a> {
Ok(())
}
}

#[cfg(test)]
mod tests {
use std::io::Cursor;

use super::*;
use crate::{
compression::{Compressor, DefaultCompressor},
DEFAULT_BLOCK_SIZE,
};

#[test]
fn test_duplicate_check() {
let mut data_writer = DataWriter::new(
&DefaultCompressor,
FilesystemCompressor::new(Compressor::Gzip, None).unwrap(),
DEFAULT_BLOCK_SIZE,
true,
);
let bytes = [0xff_u8; DEFAULT_BLOCK_SIZE as usize * 2];
let mut writer = Cursor::new(vec![]);
let added_1 = data_writer.add_bytes(&bytes[..], &mut writer).unwrap();
let added_2 = data_writer.add_bytes(&bytes[..], &mut writer).unwrap();
assert_eq!(added_1, added_2);
}

#[test]
fn test_no_duplicate_check() {
let mut data_writer = DataWriter::new(
&DefaultCompressor,
FilesystemCompressor::new(Compressor::Gzip, None).unwrap(),
DEFAULT_BLOCK_SIZE,
false,
);
let bytes = [0xff_u8; DEFAULT_BLOCK_SIZE as usize * 2];
let mut writer = Cursor::new(vec![]);
let added_1 = data_writer.add_bytes(&bytes[..], &mut writer).unwrap();
let added_2 = data_writer.add_bytes(&bytes[..], &mut writer).unwrap();
assert_ne!(added_1, added_2);
}
}
8 changes: 4 additions & 4 deletions backhand/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::collections::HashMap;
use std::io::{BufRead, Cursor, Read, Seek, SeekFrom, Write};

use deku::prelude::*;
use rustc_hash::FxHashMap;
use solana_nohash_hasher::IntMap;
use tracing::{error, trace};

use crate::error::BackhandError;
Expand Down Expand Up @@ -86,15 +86,15 @@ pub trait SquashFsReader: BufReadSeek + Sized {
&mut self,
superblock: &SuperBlock,
kind: &Kind,
) -> Result<(Inode, FxHashMap<u32, Inode>), BackhandError> {
) -> Result<(Inode, IntMap<u32, Inode>), BackhandError> {
let (map, bytes) = self.uncompress_metadatas(
superblock.inode_table,
superblock,
superblock.dir_table,
kind,
)?;

let mut inodes = FxHashMap::default();
let mut inodes = IntMap::default();
inodes.try_reserve(superblock.inode_count as usize)?;

let byte_len = bytes.len();
Expand Down Expand Up @@ -152,7 +152,7 @@ pub trait SquashFsReader: BufReadSeek + Sized {
superblock: &SuperBlock,
end_ptr: u64,
kind: &Kind,
) -> Result<(FxHashMap<u64, u64>, Vec<u8>), BackhandError> {
) -> Result<(IntMap<u64, u64>, Vec<u8>), BackhandError> {
self.seek(SeekFrom::Start(seek))?;
let mut map = HashMap::default();
let mut all_bytes = vec![];
Expand Down
8 changes: 4 additions & 4 deletions backhand/src/squashfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::sync::Mutex;
use std::sync::{Arc, RwLock};

use deku::prelude::*;
use rustc_hash::FxHashMap;
use solana_nohash_hasher::IntMap;
use tracing::{error, info, trace};

use crate::compressor::{CompressionOptions, Compressor};
Expand Down Expand Up @@ -188,7 +188,7 @@ pub enum Flags {
pub(crate) struct Cache {
/// The first time a fragment bytes is read, those bytes are added to this map with the key
/// representing the start position
pub(crate) fragment_cache: FxHashMap<u64, Vec<u8>>,
pub(crate) fragment_cache: IntMap<u64, Vec<u8>>,
}

/// Squashfs Image initial read information
Expand All @@ -200,11 +200,11 @@ pub struct Squashfs<'b> {
/// Compression options that are used for the Compressor located after the Superblock
pub compression_options: Option<CompressionOptions>,
// Inode Cache `<InodeNumber, Inode>`
pub inodes: FxHashMap<u32, Inode>,
pub inodes: IntMap<u32, Inode>,
/// Root Inode
pub root_inode: Inode,
/// Bytes containing Directory Table `(<OffsetFromImage, OffsetInData>, Data)`
pub dir_blocks: (FxHashMap<u64, u64>, Vec<u8>),
pub dir_blocks: (IntMap<u64, u64>, Vec<u8>),
/// Fragments Lookup Table Cache
pub fragments: Option<Vec<Fragment>>,
/// Export Lookup Table Cache
Expand Down

0 comments on commit 6a10006

Please sign in to comment.