Skip to content

Commit

Permalink
add --gitignore option
Browse files Browse the repository at this point in the history
Using this option will ignore all *.md and *.html files which are
currently ignored by git.

This is via `git ls-files --ignored --others --exclude-standard`.
  • Loading branch information
willcl-ark committed Jun 24, 2024
1 parent a764234 commit 1afd1c7
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 15 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ Another example is to call *mlc* on a certain directory or file:
mlc ./docs
```

Alternatively you may want to ignore all files currently ignored by `git` (requires `git` binary to be found on $PATH) and set a root-dir for relative links:

```bash
mlc --gitignore --root-dir .
```

Call *mlc* with the `--help` flag to display all available cli arguments:

``` bash
Expand All @@ -120,6 +126,7 @@ The following arguments are available:
| `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. |
| `--version` | `-V` | Print current version of mlc |
| `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. For example |
| `--gitignore` | `-g` | Ignore all files currently ignored by git (requires `git` binary to be available on $PATH). |
| `--ignore-links` | `-i` | Comma separated list of links which shall be ignored. Use simple `?` and `*` wildcards. For example `--ignore-links "http*://crates.io*"` will skip all links to the crates.io website. See the [used lib](https://github.com/becheran/wildmatch) for more information. |
| `--markup-types` | `-t` | Comma separated list list of markup types which shall be checked [possible values: md, html] |
| `--root-dir` | `-r` | All links to the file system starting with a slash on linux or backslash on windows will use another virtual root dir. For example the link in a file `[link](/dir/other/file.md)` checked with the cli arg `--root-dir /env/another/dir` will let *mlc* check the existence of `/env/another/dir/dir/other/file.md`. |
Expand All @@ -138,6 +145,8 @@ offline = true
match-file-extension= true
# List of files and directories which will be ignored
ignore-path=["./ignore-me","./src"]
# Ignore all files ignored by git
gitignore = true
# List of links which will be ignored
ignore-links=["http://ignore-me.de/*","http://*.ignoresub-domain/*"]
# List of markup types which shall be checked
Expand Down
29 changes: 22 additions & 7 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ pub fn parse_args() -> Config {
.help("Path to the root folder used to resolve all relative paths")
.required(false),
)

.arg(
Arg::new("gitignore")
.long("gitignore")
.short('g')
.value_name("GIT")
.help("Ignore all files ignored by git")
.action(ArgAction::SetTrue)
.required(false),
)
.get_matches();

let default_dir = format!(".{}", &MAIN_SEPARATOR);
Expand Down Expand Up @@ -148,18 +158,23 @@ pub fn parse_args() -> Config {
}

if let Some(ignore_path) = matches.get_many::<String>("ignore-path") {
opt.ignore_path = Some(ignore_path.map(|x| Path::new(x).to_path_buf()).collect());
}
if opt.ignore_path.is_some() {
opt.ignore_path.as_mut().unwrap().iter_mut().for_each(|p| {
let mut paths: Vec<_> = ignore_path.map(|x| Path::new(x).to_path_buf()).collect();
for p in paths.iter_mut() {
match fs::canonicalize(&p) {
Ok(p) => &p,
Ok(canonical_path) => {
*p = canonical_path;
}
Err(e) => {
println!("⚠ Warn: Ignore path {:?} not found. {:?}.", p, e);
&p
panic!("Exiting due to invalid ignore path.");
}
};
});
}
opt.ignore_path = Some(paths);
}

if matches.get_flag("gitignore") {
opt.gitignore = Some(true);
}

if let Some(root_dir) = matches.get_one::<String>("root-dir") {
Expand Down
84 changes: 76 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ use serde::Deserialize;
use std::collections::HashMap;
use std::env;
use std::fmt;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::time::{sleep_until, Duration, Instant};
Expand Down Expand Up @@ -49,6 +52,8 @@ pub struct OptionalConfig {
pub ignore_path: Option<Vec<PathBuf>>,
#[serde(rename(deserialize = "root-dir"))]
pub root_dir: Option<PathBuf>,
#[serde(rename(deserialize = "gitignore"))]
pub gitignore: Option<bool>,
pub throttle: Option<u32>,
}

Expand Down Expand Up @@ -80,13 +85,14 @@ impl fmt::Display for Config {
f,
"
Debug: {:?}
Dir: {}
Dir: {}
DoNotWarnForRedirectTo: {:?}
Types: {:?}
Types: {:?}
Offline: {}
MatchExt: {}
RootDir: {}
IgnoreLinks: {}
Gitignore: {}
IgnoreLinks: {}
IgnorePath: {:?}
Throttle: {} ms",
self.optional.debug.unwrap_or(false),
Expand All @@ -96,6 +102,7 @@ Throttle: {} ms",
self.optional.offline.unwrap_or_default(),
self.optional.match_file_extension.unwrap_or_default(),
root_dir_str,
self.optional.gitignore.unwrap_or_default(),
ignore_str.join(","),
ignore_path_str,
self.optional.throttle.unwrap_or(0)
Expand Down Expand Up @@ -125,6 +132,33 @@ fn find_all_links(config: &Config) -> Vec<MarkupLink> {
links
}

fn find_git_ignored_files() -> Option<Vec<PathBuf>> {
let output = Command::new("git")
.arg("ls-files")
.arg("--ignored")
.arg("--others")
.arg("--exclude-standard")
.output()
.expect("Failed to execute 'git' command");

if output.status.success() {
let ignored_files = String::from_utf8(output.stdout)
.expect("Invalid UTF-8 sequence")
.lines()
.filter(|line| line.ends_with(".md") || line.ends_with(".html"))
.filter_map(|line| fs::canonicalize(Path::new(line.trim())).ok())
.collect::<Vec<_>>();
Some(ignored_files)
} else {
eprintln!(
"git ls-files command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
None
}
}


fn print_helper(
link: &MarkupLink,
status_code: &colored::ColoredString,
Expand Down Expand Up @@ -168,7 +202,41 @@ pub async fn run(config: &Config) -> Result<(), ()> {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
};

let gitignored_files: Option<Vec<PathBuf>> = if config.optional.gitignore.is_some() {
let files = find_git_ignored_files();
debug!("Found gitignored files: {:?}", files);
files
} else {
None
};

let is_gitignore_enabled = gitignored_files.is_some();

for link in &links {
let canonical_link_source = match fs::canonicalize(&link.source) {
Ok(path) => path,
Err(e) => {
warn!("Failed to canonicalize link source: {}. Error: {:?}", link.source, e);
continue;
}
};

if is_gitignore_enabled {
if let Some(ref gif) = gitignored_files {
if gif.iter().any(|path| path == &canonical_link_source) {
print_helper(
link,
&"Skip".green(),
"Ignore link because it is ignored by git.",
false,
);
skipped += 1;
continue;
}
}
}

if ignore_links.iter().any(|m| m.matches(&link.target)) {
print_helper(
link,
Expand All @@ -179,6 +247,7 @@ pub async fn run(config: &Config) -> Result<(), ()> {
skipped += 1;
continue;
}

let link_type = get_link_type(&link.target);
let target = resolve_target_link(link, &link_type, config).await;
let t = Target { target, link_type };
Expand All @@ -190,11 +259,10 @@ pub async fn run(config: &Config) -> Result<(), ()> {
}
}

let do_not_warn_for_redirect_to: Arc<Vec<WildMatch>> =
Arc::new(match &config.optional.do_not_warn_for_redirect_to {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
});
let do_not_warn_for_redirect_to: Arc<Vec<WildMatch>> = Arc::new(match &config.optional.do_not_warn_for_redirect_to {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
});

let throttle = config.optional.throttle.unwrap_or_default() > 0;
info!("Throttle HTTP requests to same host: {:?}", throttle);
Expand Down
2 changes: 2 additions & 0 deletions tests/end_to_end.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ async fn end_to_end() {
fs::canonicalize("./benches/benchmark/markdown/ignore_me_dir").unwrap(),
]),
root_dir: None,
gitignore: None,
},
};
if let Err(e) = mlc::run(&config).await {
Expand All @@ -46,6 +47,7 @@ async fn end_to_end_different_root() {
ignore_path: None,
throttle: None,
root_dir: Some(test_files),
gitignore: None,
},
};
if let Err(e) = mlc::run(&config).await {
Expand Down

0 comments on commit 1afd1c7

Please sign in to comment.