Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find executable code for coverage analysis #23

Merged
merged 7 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ hakana-analyzer = { path = "src/analyzer" }
hakana-str = { path = "src/str" }
hakana-language-server = { path = "src/language_server" }
hakana-workhorse = { path = "src/file_scanner_analyzer" }
executable-finder = { path = "src/executable_code_finder" }
mimalloc = { version = "*", default-features = false }
tower-lsp = { version = "=0.20.0", features = ["proposed"] }
tokio = { version = "1.26.0", features = ["full"] }
Expand All @@ -33,7 +34,8 @@ members = [
"src/file_scanner_analyzer",
"src/language_server",
"src/logger",
"src/js_interop"
"src/js_interop",
"src/executable_code_finder",
]
exclude = ["third-party"]

Expand Down
1 change: 1 addition & 0 deletions src/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ edition = "2021"

[dependencies]
hakana-workhorse = { path = "../file_scanner_analyzer" }
executable-finder = { path = "../executable_code_finder" }
hakana-analyzer = { path = "../analyzer" }
hakana-logger = { path = "../logger" }
hakana-code-info = { path = "../code_info" }
Expand Down
58 changes: 58 additions & 0 deletions src/cli/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,16 @@ pub fn init(
.arg(arg!(<TEST> "The test to run"))
.arg_required_else_help(true),
)
.subcommand(
Command::new("find-executable")
.about("Finds all executable lines of code")
.arg(arg!(--"root" <PATH>).required(false).help(
"The root directory that Hakana runs in. Defaults to the current directory",
))
.arg(arg!(--"output" <PATH>).required(true).help(
"File to save output to"
)),
)
.get_matches();

let cwd = (env::current_dir()).unwrap().to_str().unwrap().to_string();
Expand Down Expand Up @@ -650,6 +660,15 @@ pub fn init(
random_seed,
);
}
Some(("find-executable", sub_matches)) => {
do_find_executable(
sub_matches,
&root_dir,
&cwd,
threads,
logger,
);
}
_ => unreachable!(), // If all subcommands are defined above, anything else is unreachable!()
}

Expand Down Expand Up @@ -718,6 +737,45 @@ fn do_fix(
}
}

fn do_find_executable(
sub_matches: &clap::ArgMatches,
root_dir: &str,
cwd: &String,
threads: u8,
logger: Logger,
) {
let output_file = sub_matches.value_of("output").unwrap().to_string();
let config = config::Config::new(root_dir.to_string(), FxHashSet::default());

match executable_finder::scan_files(
&vec![root_dir.to_string()],
None,
&Arc::new(config),
threads,
Arc::new(logger),
) {
Ok(file_infos) => {
let output_path = if output_file.starts_with('/') {
output_file
} else {
format!("{}/{}", cwd, output_file)
};
let mut out = fs::File::create(Path::new(&output_path)).unwrap();
match write!(out, "{}", serde_json::to_string_pretty(&file_infos).unwrap()) {
Ok(_) => {
println!("Done")
}
Err(err) => {
println!("error: {}", err)
}
}
}
Err(_) => {
println!("error")
}
}
}

fn do_remove_unused_fixmes(
sub_matches: &clap::ArgMatches,
root_dir: &String,
Expand Down
18 changes: 18 additions & 0 deletions src/executable_code_finder/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "executable-finder"
version = "0.1.0"
edition = "2021"

[dependencies]
hakana-analyzer = { path = "../analyzer" }
hakana-logger = { path = "../logger" }
hakana-code-info = { path = "../code_info" }
hakana-str = { path = "../str" }
hakana-workhorse = { path = "../file_scanner_analyzer" }
oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" }
indicatif = "0.17.0-rc.11"
rustc-hash = "1.1.0"
serde = { version = "1.0", features = ["derive"] }

[lib]
path = "lib.rs"
271 changes: 271 additions & 0 deletions src/executable_code_finder/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
use hakana_analyzer::config::Config;
use hakana_logger::Logger;
use hakana_code_info::code_location::FilePath;
use hakana_code_info::file_info::ParserError;
use hakana_str::{Interner, ThreadedInterner};
use hakana_workhorse::file::VirtualFileSystem;
use hakana_workhorse::scanner::add_builtins_to_scan;
use indicatif::{ProgressBar, ProgressStyle};
use oxidized::aast::Stmt_;
use oxidized::ast::Pos;
use oxidized::{aast, aast_visitor::{visit, AstParams, Node, Visitor}};
use rustc_hash::FxHashMap;
use serde::Serialize;
use std::sync::{Arc, Mutex};
use std::time::Instant;

#[derive(Debug, Serialize)]
pub struct ExecutableLines {
pub path: String,
pub executable_lines: Vec<String>,
}

pub fn scan_files(
scan_dirs: &Vec<String>,
cache_dir: Option<&String>,
config: &Arc<Config>,
threads: u8,
logger: Arc<Logger>,
) -> Result<Vec<ExecutableLines>, ()> {
logger.log_debug_sync(&format!("{:#?}", scan_dirs));

let mut files_to_scan = vec![];
let mut files_to_analyze = vec![];
let mut interner = Interner::default();
let existing_file_system = None;

get_filesystem(
&mut files_to_scan,
&mut interner,
&logger,
scan_dirs,
&existing_file_system,
config,
cache_dir,
&mut files_to_analyze,
);

let executable_lines = Arc::new(Mutex::new(vec![]));

if !files_to_scan.is_empty() {
let file_scanning_now = Instant::now();

let bar = if logger.show_progress() {
let pb = ProgressBar::new(files_to_scan.len() as u64);
let sty =
ProgressStyle::with_template("{bar:40.green/yellow} {pos:>7}/{len:7}").unwrap();
pb.set_style(sty);
Some(Arc::new(pb))
} else {
None
};

let files_processed: Arc<Mutex<u64>> = Arc::new(Mutex::new(0));

let mut group_size = threads as usize;
let mut path_groups = FxHashMap::default();
if files_to_scan.len() < 4 * group_size {
group_size = 1;
}

for (i, str_path) in files_to_scan.into_iter().enumerate() {
let group = i % group_size;
path_groups
.entry(group)
.or_insert_with(Vec::new)
.push(FilePath(interner.get(str_path.as_str()).unwrap()));
}

let interner = Arc::new(Mutex::new(interner));
let mut handles = vec![];

for (_, path_group) in path_groups {
let interner = interner.clone();
let bar = bar.clone();
let files_processed = files_processed.clone();
let logger = logger.clone();
let executable_lines = executable_lines.clone();
let root_dir = config.root_dir.clone();

let handle = std::thread::spawn(move || {
let new_interner = ThreadedInterner::new(interner);

for file_path in &path_group {
let res = scan_file(&new_interner, &root_dir, *file_path, &logger.clone());
let mut executable_lines = executable_lines.lock().unwrap();
if !res.executable_lines.is_empty() {
executable_lines.push(res);
}
let mut tally = files_processed.lock().unwrap();
*tally += 1;
update_progressbar(*tally, bar.clone());
}
});

handles.push(handle);
}

for handle in handles {
handle.join().unwrap();
}

if let Some(bar) = &bar {
bar.finish_and_clear();
}

if logger.can_log_timing() {
logger.log_sync(&format!(
"Scanning files took {:.2?}",
file_scanning_now.elapsed()
));
}
}

Ok(Arc::try_unwrap(executable_lines).unwrap().into_inner().unwrap())
}

fn get_filesystem(
files_to_scan: &mut Vec<String>,
interner: &mut Interner,
logger: &Logger,
scan_dirs: &Vec<String>,
existing_file_system: &Option<VirtualFileSystem>,
config: &Arc<Config>,
cache_dir: Option<&String>,
files_to_analyze: &mut Vec<String>,
) -> VirtualFileSystem {
let mut file_system = VirtualFileSystem::default();

add_builtins_to_scan(files_to_scan, interner, &mut file_system);

logger.log_sync("Looking for Hack files");

for scan_dir in scan_dirs {
logger.log_debug_sync(&format!(" - in {}", scan_dir));

files_to_scan.extend(file_system.find_files_in_dir(
scan_dir,
interner,
existing_file_system,
config,
cache_dir.is_some() || config.ast_diff,
files_to_analyze,
));
}

file_system
}


fn update_progressbar(percentage: u64, bar: Option<Arc<ProgressBar>>) {
if let Some(bar) = bar {
bar.set_position(percentage);
}
}

pub(crate) fn scan_file(
interner: &ThreadedInterner,
root_dir: &str,
file_path: FilePath,
logger: &Logger,
) -> ExecutableLines {
let interner = interner
.parent
.lock()
.unwrap();
let str_path = interner
.lookup(&file_path.0)
.to_string();

logger.log_debug_sync(&format!("scanning {}", str_path));
let aast = hakana_workhorse::get_aast_for_path(file_path, &str_path);
let aast = match aast {
Ok(aast) => aast,
Err(_) => panic!("invalid file: {}", str_path)
};
let mut checker = Scanner {};
let mut context = Vec::new();
match visit(&mut checker, &mut context, &aast.0) {
Pchelolo marked this conversation as resolved.
Show resolved Hide resolved
Ok(_) => ExecutableLines {
path: file_path.get_relative_path(&interner, root_dir),
executable_lines: context,
},
Err(_) => panic!("invalid file: {}", str_path)
}
}

struct Scanner {}

impl<'ast> Visitor<'ast> for Scanner {
type Params = AstParams<Vec<String>, ParserError>;

fn object(&mut self) -> &mut dyn Visitor<'ast, Params=Self::Params> {
self
}

fn visit_stmt(&mut self, c: &mut Vec<String>, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> {
match &p.1 {
Stmt_::For(boxed) => {
push_start(&p.0, c); // The line where for loop is declared is coverable
boxed.1.recurse(c, self)
}
Stmt_::Foreach(boxed) => {
push_start(&p.0, c); // The line where foreach loop is declared is coverable
boxed.2.recurse(c, self)
}
Stmt_::Do(boxed) => {
push_pos(&boxed.1.1, c);
boxed.0.recurse(c, self)
}
Stmt_::While(boxed) => {
push_pos(&boxed.0.1, c);
boxed.1.recurse(c, self)
}
Stmt_::If(boxed) => {
push_pos(&boxed.0.1, c); // if expression
boxed.1.recurse(c, self)?;
boxed.2.recurse(c, self)
}
Stmt_::Switch(boxed) => {
// Skipping the switch statement, it's never covered by HHVM
for case_stmt in &boxed.1 {
push_pos(&case_stmt.0.1, c);
case_stmt.recurse(c, self)?;
}
boxed.2.recurse(c, self)
}
Stmt_::Block(boxed) => {
boxed.recurse(c, self)
}
Stmt_::Expr(boxed) => {
let start = boxed.1.to_raw_span().start.line();
let end = boxed.1.to_raw_span().end.line();
if start == end {
c.push(format!("{}-{}", start, end));
} else {
// Multi-line expressions seem to miss the first line in HHVM coverage
c.push(format!("{}-{}", start + 1, end));
}
Ok(())
}
_ => {
let result = p.recurse(c, self);
push_pos(&p.0, c);
result
}
}
}
}

fn push_start(p: &Pos, res: &mut Vec<String>) {
let start = p.to_raw_span().start.line();
res.push(format!("{}-{}", start, start));
}

fn push_pos(p: &Pos, res: &mut Vec<String>) {
let start = p.to_raw_span().start.line();
let end = p.to_raw_span().end.line();
if start != 0 && end != 0 {
res.push(format!("{}-{}", start, end));
}
}
Loading