Skip to content

Commit

Permalink
wip counts hard links only once for each dir
Browse files Browse the repository at this point in the history
  • Loading branch information
wookietreiber committed Aug 19, 2024
1 parent 8e7aabf commit b431716
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 49 deletions.
12 changes: 12 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ pub fn build() -> Command {
)
.value_parser(value_parser!(usize));

let count_links = Arg::new("count-links")
.short('l')
.long("count-links")
.action(ArgAction::SetTrue)
.hide_short_help(true)
.long_help(
"Count sizes many times if hard linked. The default behavior is to count each \
hard linked object only once for each point of interest, i.e. once for every \
DIR and once for every sub-directory if max depth is used."
);

let kb_allocated = Arg::new("kb-allocated")
.long("kb-allocated")
.action(ArgAction::SetTrue)
Expand Down Expand Up @@ -95,6 +106,7 @@ pub fn build() -> Command {
.args(filter())
.args(mmapplypolicy())
.arg(max_depth)
.arg(count_links)
.arg(kb_allocated)
.arg(debug)
.arg(help)
Expand Down
4 changes: 4 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub struct Config {
pub dirs: Option<Vec<PathBuf>>,
pub debug: bool,
pub filter: Filter,
pub count_links: bool,
pub max_depth: Option<usize>,
pub mm_nodes: Option<String>,
pub mm_local_work_dir: Option<PathBuf>,
Expand All @@ -60,6 +61,8 @@ impl TryFrom<ArgMatches> for Config {

let filter = Filter::try_from(&args)?;

let count_links = args.get_flag("count-links");

let max_depth = args
.get_one::<usize>("max-depth")
.copied()
Expand All @@ -85,6 +88,7 @@ impl TryFrom<ArgMatches> for Config {
dirs,
debug,
filter,
count_links,
max_depth,
mm_nodes,
mm_local_work_dir,
Expand Down
67 changes: 64 additions & 3 deletions src/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use std::fs::File;
use std::io::{self, Write};
use std::path::Path;

use anyhow::{anyhow, Context, Result};
use bstr::ByteSlice;
use libc::{gid_t, uid_t};

use crate::config::{Config, Filter};
Expand Down Expand Up @@ -56,7 +58,7 @@ fn policy_group(group: gid_t, attribute: &str) -> String {
RULE 'TOTAL'
LIST 'size'
DIRECTORIES_PLUS
SHOW(VARCHAR({attribute}))
SHOW(VARCHAR({attribute}) || ' ' || VARCHAR(NLINK))
WHERE GROUP_ID = {group}
"
)
Expand All @@ -71,7 +73,7 @@ fn policy_user(user: uid_t, attribute: &str) -> String {
RULE 'TOTAL'
LIST 'size'
DIRECTORIES_PLUS
SHOW(VARCHAR({attribute}))
SHOW(VARCHAR({attribute}) || ' ' || VARCHAR(NLINK))
WHERE USER_ID = {user}
"
)
Expand All @@ -86,7 +88,66 @@ fn policy_default(attribute: &str) -> String {
RULE 'TOTAL'
LIST 'size'
DIRECTORIES_PLUS
SHOW(VARCHAR({attribute}))
SHOW(VARCHAR({attribute}) || ' ' || VARCHAR(NLINK))
"
)
}

// inode generation snapid X Y Z -- path
pub struct Entry<'a>(Vec<&'a [u8]>, &'a [u8]);

impl Entry<'_> {
const INVALID: &'static str = "invalid line in policy report";

pub fn inode_str(&self) -> Result<&str> {
self.0[0]
.to_str()
.context("reading inode field from policy report")
}

pub fn bytes_str(&self) -> Result<&str> {
self.0[4]
.to_str()
.context("reading bytes field from policy report")
}

pub fn bytes(&self) -> Result<u64> {
self.bytes_str().and_then(|s| {
s.parse::<u64>()
.context("parsing bytes field from policy report")
})
}

pub fn nlink_str(&self) -> Result<&str> {
self.0[5]
.to_str()
.context("reading number of links field from policy report")
}

pub fn path(&self) -> Result<&Path> {
self.1
.to_path()
.context("parsing path field from policy report")
}
}

impl<'a> TryFrom<&'a Vec<u8>> for Entry<'a> {
type Error = anyhow::Error;

fn try_from(line: &'a Vec<u8>) -> Result<Self> {
let groups = line.split_str(" -- ").collect::<Vec<_>>();

if groups.len() != 2 {
return Err(anyhow!(Entry::INVALID));
}

let fields = groups[0].splitn_str(7, " ").take(6).collect::<Vec<_>>();
let path = groups[1];

if fields.len() == 6 {
Ok(Self(fields, path))
} else {
Err(anyhow!(Entry::INVALID))
}
}
}
141 changes: 95 additions & 46 deletions src/usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{BufReader, Read};
use std::ops::AddAssign;
Expand All @@ -32,14 +32,13 @@ use std::process::{Command, Stdio};

use anyhow::{anyhow, Context, Result};
use bstr::io::BufReadExt;
use bstr::ByteSlice;
use clap::crate_name;
use tempfile::{tempdir, tempdir_in};

use crate::config::Config;
use crate::log;
use crate::output::output;
use crate::policy;
use crate::policy::{self, Entry};

pub fn run(dir: &Path, config: &Config) -> Result<()> {
let tmp = if let Some(ref local_work_dir) = config.mm_local_work_dir {
Expand Down Expand Up @@ -116,17 +115,14 @@ fn sum(dir: &Path, report: &Path, config: &Config) -> Result<()> {
})?;

if let Some(depth) = config.max_depth {
let sizes = sum_depth(dir, depth, report, config.debug)?;
let sizes =
sum_depth(dir, depth, report, config.count_links, config.debug)?;

for (dir, Acc { inodes, bytes }) in sizes {
// drop files and empty directories
// they each have only one entry
if inodes > 1 {
output(&dir, inodes, bytes, config);
}
output(&dir, inodes, bytes, config);
}
} else {
let Acc { inodes, bytes } = sum_total(report)?;
let Acc { inodes, bytes } = sum_total(report, config.count_links)?;
output(dir, inodes, bytes, config);
};

Expand All @@ -137,25 +133,25 @@ fn sum_depth(
dir: &Path,
depth: usize,
report: impl Read,
count_links: bool,
debug: bool,
) -> Result<BTreeMap<PathBuf, Acc>> {
let report = BufReader::new(report);

let mut dir_sums = BTreeMap::new();
// let mut hard_links = HashMap::new();

let prefix_depth = Path::new(dir).iter().count();

for line in report.byte_lines() {
let line = line.context("reading line from policy report")?;
let entry = Entry::try_from(&line)?;

let mut groups = line.splitn_str(2, " -- ");

let meta = groups.next().unwrap();
let bytes = entry.bytes()?;

let bytes = meta.splitn_str(6, " ").nth(4).unwrap();
let bytes = bytes.to_str().unwrap();
let bytes: u64 = bytes.parse().unwrap();
// let nlink = entry.nlink_str()?;

let path = groups.next().unwrap().to_path().unwrap();
let path = entry.path()?;
let path_depth = path.iter().count();
let path_suffix_depth = path_depth - prefix_depth;

Expand All @@ -167,29 +163,52 @@ fn sum_depth(

log::debug(format!("prefix: {prefix:?}"), debug);

dir_sums
.entry(prefix)
.and_modify(|x| *x += bytes)
.or_insert_with(|| Acc { inodes: 1, bytes });
if count_links {
dir_sums
.entry(prefix)
.and_modify(|x| *x += bytes)
.or_insert_with(|| Acc { inodes: 1, bytes });
}
}
}

Ok(dir_sums)
Ok(dir_sums
.into_iter()
.filter(|(_, acc)| acc.inodes > 1)
.collect())
}

fn sum_total(report: impl Read) -> Result<Acc> {
let report = BufReader::new(report);

fn sum_total(report: impl Read, count_links: bool) -> Result<Acc> {
let mut sum = Acc::default();
let mut hard_links = HashMap::new();

for line in report.byte_lines() {
for line in BufReader::new(report).byte_lines() {
let line = line.context("reading line from policy report")?;
let entry = Entry::try_from(&line)?;

let bytes = line.splitn_str(6, " ").nth(4).unwrap();
let bytes = bytes.to_str().unwrap();
let bytes: u64 = bytes.parse().unwrap();
let bytes = entry.bytes()?;

if count_links {
sum += bytes;
continue;
}

sum += bytes;
let nlink = entry.nlink_str()?;

if nlink == "1" {
sum += bytes;
continue;
}

let inode = entry.inode_str()?;
let inode = hard_links
.entry(inode.to_owned())
.and_modify(|c| *c += 1)
.or_insert(1);

if *inode == 1 {
sum += bytes;
}
}

Ok(sum)
Expand Down Expand Up @@ -230,29 +249,59 @@ mod test {
use super::*;

#[test]
fn total_simple() {
let source = "1 1 0 1024 1 -- /data/test/foo\n\
2 1 0 1024 1 -- /data/test/bar\n";
fn total() {
let source = "1 1 0 1024 3 -- /data/test/foo\n\
1 1 0 1024 3 -- /data/test/bar\n\
1 1 0 1024 3 -- /data/test/baz\n\
2 1 0 1024 2 -- /data/test/other\n";

let result = sum_total(source.as_bytes()).unwrap();
let once = sum_total(source.as_bytes(), false).unwrap();
assert_eq!(Acc::from((2, 2048)), once);

assert_eq!(Acc::from((2, 2048)), result);
let many = sum_total(source.as_bytes(), true).unwrap();
assert_eq!(Acc::from((4, 4096)), many);
}

#[test]
fn depth_simple() {
let source = "1 1 0 1024 1 -- /data/test/a/foo\n\
2 1 0 1024 1 -- /data/test/b/bar\n";
fn depth() {
let source = "1 1 0 1024 5 -- /data/test/foo\n\
1 1 0 1024 5 -- /data/test/bar\n\
2 1 0 1024 2 -- /data/test/other\n\
1 1 0 1024 5 -- /data/test/a/foo\n\
1 1 0 1024 5 -- /data/test/a/bar\n\
1 1 0 1024 5 -- /data/test/b/foo\n\
2 1 0 1024 2 -- /data/test/b/other\n";

let mut once = BTreeMap::new();
once.insert("/data/test".into(), Acc::from((2, 2048)));
once.insert("/data/test/a".into(), Acc::from((1, 1024)));
once.insert("/data/test/b".into(), Acc::from((2, 2048)));

let result = sum_depth(
Path::new("/data/test"),
1,
source.as_bytes(),
false,
false,
)
.unwrap();

let mut expected = BTreeMap::new();
expected.insert("/data/test".into(), Acc::from((2, 2048)));
expected.insert("/data/test/a".into(), Acc::from((1, 1024)));
expected.insert("/data/test/b".into(), Acc::from((1, 1024)));
assert_eq!(once, result);

let result =
sum_depth(Path::new("/data/test"), 1, source.as_bytes(), false)
.unwrap();
let mut many = BTreeMap::new();
many.insert("/data/test".into(), Acc::from((7, 7168)));
many.insert("/data/test/a".into(), Acc::from((2, 2048)));
many.insert("/data/test/b".into(), Acc::from((2, 2048)));

let result = sum_depth(
Path::new("/data/test"),
1,
source.as_bytes(),
true,
false,
)
.unwrap();

assert_eq!(expected, result);
assert_eq!(many, result);
}
}

0 comments on commit b431716

Please sign in to comment.