From d24fd67810217f31fcc87fe54ae3a0d45571471b Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Wed, 14 Aug 2024 08:49:56 -0700 Subject: [PATCH] Recursively scan all files in the repo --- src/cli/lib.rs | 48 +++--- src/executable_code_finder/Cargo.toml | 7 + src/executable_code_finder/lib.rs | 224 +++++++++++++++++++++++--- 3 files changed, 231 insertions(+), 48 deletions(-) diff --git a/src/cli/lib.rs b/src/cli/lib.rs index 586ae2c1..b1decdf1 100644 --- a/src/cli/lib.rs +++ b/src/cli/lib.rs @@ -7,7 +7,7 @@ use hakana_reflection_info::analysis_result::{ }; use hakana_reflection_info::data_flow::graph::{GraphKind, WholeProgramKind}; use hakana_reflection_info::issue::IssueKind; -use hakana_str::{Interner, ThreadedInterner}; +use hakana_str::Interner; use indexmap::IndexMap; use rand::Rng; use rustc_hash::FxHashSet; @@ -17,9 +17,7 @@ use std::fs::{self, File}; use std::io::Write; use std::path::Path; use std::process::exit; -use std::sync::{Arc, Mutex}; -use hakana_reflection_info::code_location::FilePath; -use hakana_workhorse::file::VirtualFileSystem; +use std::sync::Arc; use test_runners::test_runner::TestRunner; pub mod test_runners; @@ -441,8 +439,8 @@ pub fn init( .subcommand( Command::new("find-executable") .about("Finds all executable lines of code") - .arg(arg!(--"file" ).required(true).help( - "THe file path to process", + .arg(arg!(--"root" ).required(false).help( + "The root directory that Hakana runs in. Defaults to the current directory", )) ) .get_matches(); @@ -662,7 +660,8 @@ pub fn init( Some(("find-executable", sub_matches)) => { do_find_executable( sub_matches, - root_dir, + &root_dir, + threads, logger, ); } @@ -736,30 +735,21 @@ fn do_fix( fn do_find_executable( sub_matches: &clap::ArgMatches, - root_dir: String, - _logger: Logger, + root_dir: &str, + threads: u8, + logger: Logger, ) { - let mut root_owned: String = root_dir.to_owned(); - let file = sub_matches - .value_of("file") - .unwrap(); - root_owned.push_str("/"); - root_owned.push_str(file); + let _output_file = sub_matches.value_of("output").map(|f| f.to_string()); + let _output_format = sub_matches.value_of("json-format").map(|f| f.to_string()); - println!("{}", root_owned); - - let interner = Arc::new(Mutex::new(Interner::default())); - let mut threaded_interner = ThreadedInterner::new(interner.clone()); - let interned_file_path = FilePath(threaded_interner.intern(root_owned.clone())); - let mut file_system = VirtualFileSystem::default(); - - file_system - .file_hashes_and_times - .insert(interned_file_path, (0, 0)); - - let aast = hakana_workhorse::get_aast_for_path(interned_file_path, root_owned.as_str()); - - executable_finder::collect_executable_lines(&aast.unwrap().0); + let config = config::Config::new(root_dir.to_string(), FxHashSet::default()); + let _ = executable_finder::scan_files( + &vec![root_dir.to_string()], + None, + &Arc::new(config), + threads, + Arc::new(logger), + ); } fn do_remove_unused_fixmes( diff --git a/src/executable_code_finder/Cargo.toml b/src/executable_code_finder/Cargo.toml index b20034a9..f202881d 100644 --- a/src/executable_code_finder/Cargo.toml +++ b/src/executable_code_finder/Cargo.toml @@ -4,7 +4,14 @@ version = "0.1.0" edition = "2021" [dependencies] +hakana-analyzer = { path = "../analyzer" } +hakana-logger = { path = "../logger" } +hakana-reflection-info = { path = "../code_info" } +hakana-str = { path = "../str" } +hakana-workhorse = { path = "../file_scanner_analyzer" } oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" } +indicatif = "0.17.0-rc.11" +rustc-hash = "1.1.0" [lib] path = "lib.rs" \ No newline at end of file diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs index 449f430d..9711d633 100644 --- a/src/executable_code_finder/lib.rs +++ b/src/executable_code_finder/lib.rs @@ -1,39 +1,225 @@ -use oxidized::{ - aast, - aast_visitor::{visit, AstParams, Node, Visitor}, -}; +use std::sync::{Arc, Mutex}; +use std::time::Instant; +use hakana_analyzer::config::Config; +use hakana_logger::Logger; +use hakana_reflection_info::code_location::FilePath; +use hakana_str::{Interner, ThreadedInterner}; +use hakana_workhorse::file::{VirtualFileSystem}; +use hakana_workhorse::scanner::{add_builtins_to_scan}; +use indicatif::{ProgressBar, ProgressStyle}; +use oxidized::{aast, aast_visitor::{visit, AstParams, Node, Visitor}}; +use rustc_hash::FxHashMap; +use hakana_reflection_info::file_info::ParserError; struct Context { } -struct Scanner { +pub fn scan_files( + scan_dirs: &Vec, + cache_dir: Option<&String>, + config: &Arc, + threads: u8, + logger: Arc, +) -> Result<(),()> { + logger.log_debug_sync(&format!("{:#?}", scan_dirs)); + + let mut files_to_scan = vec![]; + let mut files_to_analyze = vec![]; + let mut interner= Interner::default(); + let existing_file_system = None; + + get_filesystem( + &mut files_to_scan, + &mut interner, + &logger, + scan_dirs, + &existing_file_system, + config, + cache_dir, + &mut files_to_analyze, + ); + + let invalid_files = Arc::new(Mutex::new(vec![])); + + if !files_to_scan.is_empty() { + let file_scanning_now = Instant::now(); + + let bar = if logger.show_progress() { + let pb = ProgressBar::new(files_to_scan.len() as u64); + let sty = + ProgressStyle::with_template("{bar:40.green/yellow} {pos:>7}/{len:7}").unwrap(); + pb.set_style(sty); + Some(Arc::new(pb)) + } else { + None + }; + + let files_processed: Arc> = Arc::new(Mutex::new(0)); + + let mut group_size = threads as usize; + + let mut path_groups = FxHashMap::default(); + + if files_to_scan.len() < 4 * group_size { + group_size = 1; + } + + for (i, str_path) in files_to_scan.into_iter().enumerate() { + let group = i % group_size; + path_groups + .entry(group) + .or_insert_with(Vec::new) + .push(FilePath(interner.get(str_path.as_str()).unwrap())); + } + + let interner = Arc::new(Mutex::new(interner)); + let mut handles = vec![]; + + for (_, path_group) in path_groups { + let interner = interner.clone(); + let bar = bar.clone(); + let files_processed = files_processed.clone(); + let logger = logger.clone(); + let invalid_files = invalid_files.clone(); + + let handle = std::thread::spawn(move || { + let mut new_context = Context {}; + let new_interner = ThreadedInterner::new(interner); + for file_path in &path_group { + let str_path = new_interner + .parent + .lock() + .unwrap() + .lookup(&file_path.0) + .to_string(); + + println!("{}", str_path); + + match scan_file(&str_path, *file_path, &mut new_context, &logger.clone(), ) { + Err(_) => { + invalid_files.lock().unwrap().push(*file_path); + } + Ok(_) => {} + }; + + let mut tally = files_processed.lock().unwrap(); + *tally += 1; + + update_progressbar(*tally, bar.clone()); + } + + //resolved_names.lock().unwrap().extend(local_resolved_names); + + //let mut codebases = codebases.lock().unwrap(); + //codebases.push(new_codebase); + }); + + handles.push(handle); + } + + for handle in handles { + handle.join().unwrap(); + } + + if let Some(bar) = &bar { + bar.finish_and_clear(); + } + + if logger.can_log_timing() { + logger.log_sync(&format!( + "Scanning files took {:.2?}", + file_scanning_now.elapsed() + )); + } + } + + let _invalid_files = Arc::try_unwrap(invalid_files) + .unwrap() + .into_inner() + .unwrap(); + + Ok(()) } -impl<'ast> Visitor<'ast> for Scanner { - type Params = AstParams; +fn get_filesystem( + files_to_scan: &mut Vec, + interner: &mut Interner, + logger: &Logger, + scan_dirs: &Vec, + existing_file_system: &Option, + config: &Arc, + cache_dir: Option<&String>, + files_to_analyze: &mut Vec, +) -> VirtualFileSystem { + let mut file_system = VirtualFileSystem::default(); - fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { - self + add_builtins_to_scan(files_to_scan, interner, &mut file_system); + + logger.log_sync("Looking for Hack files"); + + for scan_dir in scan_dirs { + logger.log_debug_sync(&format!(" - in {}", scan_dir)); + + files_to_scan.extend(file_system.find_files_in_dir( + scan_dir, + interner, + existing_file_system, + config, + cache_dir.is_some() || config.ast_diff, + files_to_analyze, + )); } - fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ()> { - let result = p.recurse(c, self); + file_system +} - println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); - result +fn update_progressbar(percentage: u64, bar: Option>) { + if let Some(bar) = bar { + bar.set_position(percentage); } } -pub fn collect_executable_lines( - program: &aast::Program<(), ()>, -) { - let mut checker = Scanner { +pub(crate) fn scan_file( + str_path: &str, + file_path: FilePath, + context: &mut Context, + logger: &Logger, +) -> Result<(), ParserError>{ + logger.log_debug_sync(&format!("scanning {}", str_path)); + + let aast = hakana_workhorse::get_aast_for_path(file_path, str_path); + + let aast = match aast { + Ok(aast) => aast, + Err(err) => { + return Err(err); + } }; - let mut context = Context { + let mut checker = Scanner { }; - visit(&mut checker, &mut context, program).unwrap(); + visit(&mut checker, context, &aast.0) +} + +struct Scanner { + } + +impl<'ast> Visitor<'ast> for Scanner { + type Params = AstParams; + + fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { + self + } + + fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> { + let result = p.recurse(c, self); + + //println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); + + result + } +} \ No newline at end of file