diff --git a/Cargo.lock b/Cargo.lock index 32f50e6..f572fbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -448,6 +448,18 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "1.7.0" @@ -594,6 +606,15 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashlink" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452c155cb93fecdfb02a73dd57b5d8e442c2063bd7aac72f1bc5e4263a43086" +dependencies = [ + "hashbrown", +] + [[package]] name = "heck" version = "0.3.3" @@ -790,6 +811,17 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "libsqlite3-sys" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8664486da51de68fbb3331d37c2a0fff4b60e988f284670a6a0833a8e6406ad" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -867,7 +899,7 @@ dependencies = [ [[package]] name = "monocle" -version = "0.0.3" +version = "0.0.4" dependencies = [ "anyhow", "bgpkit-broker", @@ -876,9 +908,13 @@ dependencies = [ "clap 3.2.8", "config", "dirs", + "flate2", "ipnetwork", "itertools", "rayon", + "regex", + "reqwest", + "rusqlite", "serde", "serde_json", "tabled", @@ -1267,6 +1303,20 @@ dependencies = [ "serde", ] +[[package]] +name = "rusqlite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rust-ini" version = "0.18.0" diff --git a/Cargo.toml b/Cargo.toml index c75a866..010876c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monocle" -version = "0.0.3" +version = "0.0.4" authors = ["Mingwei Zhang "] edition = "2021" readme = "README.md" @@ -8,7 +8,7 @@ license = "MIT" repository = "https://github.com/bgpkit/monocle" documentation = "https://docs.rs/monocle" description = """ -A commandline application to search, parse, and process BGP information stored in MRT files. +A commandline application to search, parse, and process BGP information in public sources. """ keywords = ["bgp", "bgpkit", "mrt"] @@ -31,4 +31,11 @@ chrono = "0.4" anyhow = "1.0" tabled = "0.7" config = {version = "0.13.1", features = ["toml"]} -dirs = "4" \ No newline at end of file +dirs = "4" +rusqlite = { version = "0.28.0", features = ["bundled"] } +reqwest = {version = "0.11", features = ["blocking"]} +flate2 = "1" +regex = "1.6.0" + +[features] +scouter = [] \ No newline at end of file diff --git a/README.md b/README.md index 1020fc9..8e1d260 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ See through all BGP data with a monocle. -![](https://spaces.bgpkit.org/assets/monocle/monocle-200px.jpg) +![](https://spaces.bgpkit.org/assets/monocle/monocle-emoji.png) *Still in early prototype phase. You are warned.* @@ -18,30 +18,36 @@ Subcommands: - `parse`: parse individual MRT files - `search`: search for matching messages from all available public MRT files - `time`: utility to convert time between unix timestamp and RFC3339 string +- `whois`: search AS and organization information by ASN or name Top-level help menu: ```text -monocle 0.0.1 +➜ ~ monocle +monocle 0.0.4 Mingwei Zhang -A commandline application to search, parse, and process BGP information stored in MRT files. +A commandline application to search, parse, and process BGP information in public sources. USAGE: - monocle + monocle [OPTIONS] OPTIONS: - -h, --help Print help information - -V, --version Print version information + -c, --config configuration file path, by default $HOME/.monocle.toml is used + --debug Print debug information + -h, --help Print help information + -V, --version Print version information SUBCOMMANDS: - help Print this message or the help of the given subcommand(s) - parse Parse individual MRT files given a file path, local or remote - scouter Investigative toolbox - search Search BGP messages from all available public MRT files - time Time conversion utilities + help Print this message or the help of the given subcommand(s) + parse Parse individual MRT files given a file path, local or remote + search Search BGP messages from all available public MRT files + time Time conversion utilities + whois ASN and organization lookup utility ``` ### `monocle parse` +Parsing single MRT file given a local path or a remote URL. + ```text ➜ monocle git:(main) ✗ monocle parse --help monocle-parse 0.0.1 @@ -72,6 +78,9 @@ OPTIONS: ### `monocle search` +Search for BGP messages across publicly available BGP route collectors and parse relevant +MRT files in parallel. More filters can be used to search for messages that match your criteria. + ```text ➜ monocle git:(main) ✗ monocle search --help monocle-search 0.0.1 @@ -101,6 +110,8 @@ OPTIONS: ### `monocle time` +Convert between UNIX timestamp and RFC3339 time strings. + ```text ➜ ~ monocle time --help monocle-time 0.0.3 @@ -144,6 +155,46 @@ Example runs: Input time must be either Unix timestamp or time string compliant with RFC3339 ``` +### `monocle whois` + +Search AS/organization-level information with ASN or organization name. + +```text +➜ ~ monocle whois --help +monocle-whois 0.0.4 +ASN and organization lookup utility + +USAGE: + monocle whois [OPTIONS] + +ARGS: + Search query, an ASN (e.g. "400644") or a name (e.g. "bgpkit") + +OPTIONS: + -a, --asn-only Search by ASN only + -h, --help Print help information + -n, --name-only Search AS and Org name only + -u, --update Refresh local as2org database + -V, --version Print version information +``` + +Example queries: +```text +➜ ~ monocle whois 400644 ++--------+------------+------------+--------------+-------------+----------+ +| asn | as_name | org_name | org_id | org_country | org_size | ++--------+------------+------------+--------------+-------------+----------+ +| 400644 | BGPKIT-LLC | BGPKIT LLC | BL-1057-ARIN | US | 1 | ++--------+------------+------------+--------------+-------------+----------+ + +➜ ~ monocle whois bgpkit ++--------+------------+------------+--------------+-------------+----------+ +| asn | as_name | org_name | org_id | org_country | org_size | ++--------+------------+------------+--------------+-------------+----------+ +| 400644 | BGPKIT-LLC | BGPKIT LLC | BL-1057-ARIN | US | 1 | ++--------+------------+------------+--------------+-------------+----------+ +``` + ## Built with ❤️ by BGPKIT Team BGPKIT is a small-team focuses on building the best open-source tooling for BGP data processing in Rust. We have over 10 years of diff --git a/src/as2org.rs b/src/as2org.rs new file mode 100644 index 0000000..9dae48a --- /dev/null +++ b/src/as2org.rs @@ -0,0 +1,430 @@ +/// AS2Org data handling utility. +/// +/// Data source: +/// The CAIDA AS Organizations Dataset, +/// http://www.caida.org/data/as-organizations +use std::fs::File; +use std::io::{BufRead, BufReader, Read}; + +use serde::{Serialize, Deserialize}; +use anyhow::{anyhow, Result}; +use flate2::read::GzDecoder; +use regex::Regex; +use rusqlite::Statement; +use tabled::Tabled; +use tracing::info; +use crate::MonocleDatabase; + + +/// Organization JSON format +/// +/// -------------------- +/// Organization fields +/// -------------------- +/// org_id : unique ID for the given organization +/// some will be created by the WHOIS entry and others will be +/// created by our scripts +/// changed : the changed date provided by its WHOIS entry +/// name : name could be selected from the AUT entry tied to the +/// organization, the AUT entry with the largest customer cone, +/// listed for the organization (if there existed an stand alone +/// organization), or a human maintained file. +/// country : some WHOIS provide as a individual field. In other cases +/// we inferred it from the addresses +/// source : the RIR or NIR database which was contained this entry +#[derive(Debug, Serialize, Deserialize)] +pub struct JsonOrg { + #[serde(alias="organizationId")] + org_id: String, + + changed: Option, + + #[serde(default)] + name: String, + + country: String, + + /// The RIR or NIR database that contained this entry + source: String, + + #[serde(alias="type")] + data_type: String +} + +/// AS Json format +/// +/// ---------- +/// AS fields +/// ---------- +/// asn : the AS number +/// changed : the changed date provided by its WHOIS entry +/// name : the name provide for the individual AS number +/// org_id : maps to an organization entry +/// opaque_id : opaque identifier used by RIR extended delegation format +/// source : the RIR or NIR database which was contained this entry +#[derive(Debug, Serialize, Deserialize)] +pub struct JsonAs { + + asn: String, + + changed: Option, + + #[serde(default)] + name: String, + + #[serde(alias="opaqueId")] + opaque_id: Option, + + #[serde(alias="organizationId")] + org_id: String, + + /// The RIR or NIR database that contained this entry + source: String, + + #[serde(rename="type")] + data_type: String +} + +#[derive(Debug)] +pub enum DataEntry { + Org(JsonOrg), + As(JsonAs), +} + +pub struct As2org { + db: MonocleDatabase, +} + +#[derive(Debug)] +pub enum SearchType { + AsnOnly, + NameOnly, + Guess, +} + +impl Default for SearchType { + fn default() -> Self { + SearchType::Guess + } +} + +#[derive(Debug, Tabled)] +pub struct SearchResult { + asn: u32, + as_name: String, + org_name: String, + org_id: String, + org_country: String, + org_size: u32 +} + +fn stmt_to_results(stmt: &mut Statement) -> Result> { + let res_iter = stmt.query_map([], |row| { + Ok(SearchResult { + asn: row.get(0)?, + as_name: row.get(1)?, + org_name: row.get(2)?, + org_id: row.get(3)?, + org_country: row.get(4)?, + org_size: row.get(5)? + }) + })?; + Ok( + res_iter.filter_map(|x| x.ok()).collect() + ) +} + +impl As2org { + + pub fn new(db_path: &Option) -> Result { + let mut db = MonocleDatabase::new(db_path)?; + As2org::initialize_db(&mut db); + Ok(As2org{ db }) + } + + pub fn is_db_empty(&self) -> bool { + let count: u32 = self.db.conn.query_row("select count(*) from as2org_as", [], + |row| row.get(0), + ).unwrap(); + count == 0 + } + + fn initialize_db(db: &mut MonocleDatabase) { + db.conn.execute(r#" + create table if not exists as2org_as ( + asn INTEGER PRIMARY KEY, + name TEXT, + org_id TEXT, + source TEXT + ); + "#,[]).unwrap(); + db.conn.execute(r#" + create table if not exists as2org_org ( + org_id TEXT PRIMARY KEY, + name TEXT, + country TEXT, + source TEXT + ); + "#,[]).unwrap(); + + // views + + db.conn.execute(r#" + create view if not exists as2org_both as + select a.asn, a.name as 'as_name', b.name as 'org_name', b.org_id, b.country + from as2org_as as a join as2org_org as b on a.org_id = b.org_id + ; + "#,[]).unwrap(); + + db.conn.execute(r#" + create view if not exists as2org_count as + select org_id, org_name, count(*) as count + from as2org_both group by org_name + order by count desc; + "#,[]).unwrap(); + + db.conn.execute(r#" + create view if not exists as2org_all as + select a.*, b.count + from as2org_both as a join as2org_count as b on a.org_id = b.org_id; + "#,[]).unwrap(); + } + + fn insert_as(&self, as_entry: &JsonAs) -> Result<()> { + self.db.conn.execute( r#" + INSERT INTO as2org_as (asn, name, org_id, source) + VALUES (?1, ?2, ?3, ?4) + "#, ( + as_entry.asn.parse::().unwrap(), + as_entry.name.as_str(), + as_entry.org_id.as_str(), + as_entry.source.as_str(), + ) + )?; + Ok(()) + } + + fn insert_org(&self, org_entry: &JsonOrg) -> Result<()> { + self.db.conn.execute( r#" + INSERT INTO as2org_org (org_id, name, country, source) + VALUES (?1, ?2, ?3, ?4) + "#, ( + org_entry.org_id.as_str(), + org_entry.name.as_str(), + org_entry.country.as_str(), + org_entry.source.as_str(), + ) + )?; + Ok(()) + } + + pub fn clear_db(&self) { + self.db.conn.execute(r#" + DELETE FROM as2org_as + "#, [] + ).unwrap(); + self.db.conn.execute(r#" + DELETE FROM as2org_org + "#, [] + ).unwrap(); + } + + /// parse as2org data and insert into monocle sqlite database + pub fn parse_insert_as2org(&self, url: Option<&str>) -> Result<()>{ + self.clear_db(); + let url = match url { + Some(u) => u.to_string(), + None => As2org::get_most_recent_data() + }; + info!("start parsing as2org file at {}", url.as_str()); + let entries = As2org::parse_as2org_file(url.as_str())?; + info!("parsing as2org file done. inserting to sqlite db now"); + for entry in &entries { + match entry { + DataEntry::Org(e) => { + self.insert_org(e)?; + } + DataEntry::As(e) => { + self.insert_as(e)?; + } + } + } + info!("as2org data loading finished"); + Ok(()) + } + + pub fn search(&self, query: &str, search_type: &SearchType) -> Result>{ + let res: Vec; + match search_type { + SearchType::AsnOnly => { + let asn = query.parse::()?; + let mut stmt = self.db.conn.prepare( + format!( + "SELECT asn, as_name, org_name, org_id, country, count FROM as2org_all where asn='{}'", asn).as_str() + )?; + res = stmt_to_results(&mut stmt)?; + } + SearchType::NameOnly => { + let mut stmt = self.db.conn.prepare( + format!( + "SELECT asn, as_name, org_name, org_id, country, count FROM as2org_all where org_name like '%{}%' or as_name like '%{}%' order by count desc", query, query).as_str() + )?; + res = stmt_to_results(&mut stmt)?; + } + SearchType::Guess => { + match query.parse::() { + Ok(asn) => { + let mut stmt = self.db.conn.prepare( + format!( + "SELECT asn, as_name, org_name, org_id, country, count FROM as2org_all where asn='{}'", asn).as_str() + )?; + res = stmt_to_results(&mut stmt)?; + } + Err(_) => { + let mut stmt = self.db.conn.prepare( + format!( + "SELECT asn, as_name, org_name, org_id, country, count FROM as2org_all where org_name like '%{}%' order by count desc", query).as_str() + )?; + res = stmt_to_results(&mut stmt)?; + } + } + } + } + Ok(res) + } + + /// parse remote AS2Org file into Vec of DataEntry + pub fn parse_as2org_file(path: &str) -> Result> { + let mut res: Vec = vec![]; + + let raw_reader: Box = match path.starts_with("http") { + true => { + let response = reqwest::blocking::get(path)?; + Box::new(response) + } + false => { + Box::new(File::open(path)?) + } + }; + + let reader = BufReader::new(GzDecoder::new(raw_reader)); + for line in reader.lines() { + let line = line?; + if line.contains(r#""type":"ASN""#) { + let data = serde_json::from_str::(line.as_str()); + match data { + Ok(data) => { + res.push(DataEntry::As(data)); + } + Err(e) => { + eprintln!("error parsing line:\n{}", line.as_str()); + return Err(anyhow!(e)) + } + } + } else { + let data = serde_json::from_str::(line.as_str()); + match data { + Ok(data) => { + res.push(DataEntry::Org(data)); + } + Err(e) => { + eprintln!("error parsing line:\n{}", line.as_str()); + return Err(anyhow!(e)) + } + } + } + } + Ok(res) + } + + pub fn get_most_recent_data() -> String { + let data_link: Regex = Regex::new(r#".*(........\.as-org2info\.jsonl\.gz).*"#).unwrap(); + let content = reqwest::blocking::get("https://publicdata.caida.org/datasets/as-organizations/").unwrap().text().unwrap(); + let res: Vec = data_link.captures_iter(content.as_str()).filter_map(|cap| { + let link = cap[1].to_owned(); + Some(link) + }).collect(); + let file = res.last().unwrap().to_string(); + + format!("https://publicdata.caida.org/datasets/as-organizations/{}", file) + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parsing_json_organization() { + let test_str1 = r#"{"changed":"20121010","country":"US","name":"99MAIN NETWORK SERVICES","organizationId":"9NS-ARIN","source":"ARIN","type":"Organization"} +"#; + let test_str2 = r#"{"country":"JP","name":"Nagasaki Cable Media Inc.","organizationId":"@aut-10000-JPNIC","source":"JPNIC","type":"Organization"} +"#; + assert!(serde_json::from_str::(test_str1).is_ok()); + assert!(serde_json::from_str::(test_str2).is_ok()); + } + #[test] + fn test_parsing_json_as() { + let test_str1 = r#"{"asn":"400644","changed":"20220418","name":"BGPKIT-LLC","opaqueId":"059b5fb85e8a50e0f722f235be7457a0_ARIN","organizationId":"BL-1057-ARIN","source":"ARIN","type":"ASN"}"#; + assert!(serde_json::from_str::(test_str1).is_ok()); + } + + #[test] + fn test_creating_db() { + let as2org = As2org::new(&Some("./test.sqlite3".to_string())).unwrap(); + // approximately one minute insert time + let _res = as2org.parse_insert_as2org(Some("tests/test-as2org.jsonl.gz")); + + as2org.clear_db(); + } + + #[test] + fn test_search() { + let as2org = As2org::new(&Some("./test.sqlite3".to_string())).unwrap(); + as2org.clear_db(); + assert_eq!(as2org.is_db_empty(), true); + as2org.parse_insert_as2org(Some("tests/test-as2org.jsonl.gz")).unwrap(); + + let res = as2org.search("400644", &SearchType::AsnOnly); + assert!(res.is_ok()); + let data = res.unwrap(); + assert_eq!(data.len(), 1); + assert_eq!(data[0].asn, 400644); + + let res = as2org.search("0", &SearchType::AsnOnly); + assert!(res.is_ok()); + let data = res.unwrap(); + assert_eq!(data.len(), 0); + + let res = as2org.search("bgpkit", &SearchType::NameOnly); + assert!(res.is_ok()); + let data = res.unwrap(); + assert_eq!(data.len(), 1); + assert_eq!(data[0].asn, 400644); + + let res = as2org.search("400644", &SearchType::Guess); + assert!(res.is_ok()); + let data = res.unwrap(); + assert_eq!(data.len(), 1); + assert_eq!(data[0].asn, 400644); + + let res = as2org.search("bgpkit", &SearchType::Guess); + assert!(res.is_ok()); + let data = res.unwrap(); + assert_eq!(data.len(), 1); + assert_eq!(data[0].asn, 400644); + assert_eq!(data[0].as_name, "BGPKIT-LLC"); + assert_eq!(data[0].org_name, "BGPKIT LLC"); + assert_eq!(data[0].org_id, "BL-1057-ARIN"); + assert_eq!(data[0].org_country, "US"); + assert_eq!(data[0].org_size, 1); + + as2org.clear_db(); + } + + #[test] + fn test_crawling() { + println!("{}", As2org::get_most_recent_data()); + } +} \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index 6e77bdb..b77777b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,31 +3,43 @@ use std::path::Path; use config::Config; pub struct MonocleConfig { - pub config: HashMap, + /// path to the directory to hold Monocle's data + pub data_dir: String, } -const EMPTY_CONFIG: &str = "# monocle configuration file\n"; +const EMPTY_CONFIG: &str = r#"### monocle configuration file + +### directory for cached data used by monocle +# data_dir="~/.monocle" +"#; impl MonocleConfig { - pub fn load(path: &Option) -> MonocleConfig { + /// function to create and initialize a new configuration + pub fn new(path: &Option) -> MonocleConfig { let mut builder = Config::builder(); + // by default use $HOME/.monocle.toml as the configuration file path + let home_dir = dirs::home_dir().unwrap().to_str().unwrap().to_owned(); + // config dir + let monocle_dir = format!("{}/.monocle", home_dir.as_str()); + // Add in toml configuration file match path { Some(p) => { - if Path::new(p.as_str()).exists(){ - builder = builder.add_source(config::File::with_name(p.as_str())); + let path = Path::new(p.as_str()); + if path.exists(){ + builder = builder.add_source(config::File::with_name(path.to_str().unwrap())); } else { std::fs::write(p.as_str(), EMPTY_CONFIG).expect("Unable to create config file"); } } None => { - // by default use $HOME/.monocle.toml as the configuration file path - let p = format!("{}/.monocle.toml", dirs::home_dir().unwrap().to_str().unwrap()); + std::fs::create_dir_all(monocle_dir.as_str()).unwrap(); + let p = format!("{}/monocle.toml", monocle_dir.as_str()); if Path::new(p.as_str()).exists(){ builder = builder.add_source(config::File::with_name(p.as_str())); } else { - std::fs::write(p.as_str(), EMPTY_CONFIG).expect("Unable to create config file"); + std::fs::write(p.as_str(), EMPTY_CONFIG).expect(format!("Unable to create config file {}", p.as_str()).as_str()); } } } @@ -38,6 +50,20 @@ impl MonocleConfig { let settings = builder.build() .unwrap(); let config = settings.try_deserialize::>() .unwrap(); - MonocleConfig{ config } + + // check data directory config + let data_dir = match config.get("data_dir") { + Some(p) => { + let path = Path::new(p); + path.to_str().unwrap().to_string() + }, + None => { + let dir = format!("{}/.monocle/", dirs::home_dir().unwrap().to_str().unwrap()); + std::fs::create_dir_all(dir.as_str()).unwrap(); + dir + } + }; + + MonocleConfig{ data_dir } } } \ No newline at end of file diff --git a/src/database.rs b/src/database.rs new file mode 100644 index 0000000..963906c --- /dev/null +++ b/src/database.rs @@ -0,0 +1,20 @@ +use rusqlite::Connection; +use anyhow::Result; + +pub struct MonocleDatabase { + pub conn: Connection, +} + +impl MonocleDatabase { + pub fn new(path: &Option) -> Result { + let conn = match path { + Some(p) => { + Connection::open(p.as_str())? + } + None => { + Connection::open_in_memory()? + } + }; + Ok(MonocleDatabase{conn}) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 697c601..ec33613 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ mod config; +mod as2org; +mod database; use std::net::IpAddr; use bgpkit_parser::BgpkitParser; @@ -8,6 +10,8 @@ use chrono::{DateTime, NaiveDateTime, Utc}; use tabled::{Table, Tabled}; pub use crate::config::MonocleConfig; +pub use crate::database::MonocleDatabase; +pub use crate::as2org::*; pub fn parser_with_filters( file_path: &str, diff --git a/src/monocle.rs b/src/monocle.rs index c19c6a1..cbc1a40 100644 --- a/src/monocle.rs +++ b/src/monocle.rs @@ -4,7 +4,7 @@ use std::io::Write; use std::net::IpAddr; use std::path::PathBuf; -use monocle::{MonocleConfig, parser_with_filters, string_to_time, time_to_table}; +use monocle::{As2org, MonocleConfig, parser_with_filters, SearchType, string_to_time, time_to_table}; use rayon::prelude::*; use std::sync::mpsc::{channel, Receiver, Sender}; use std::thread; @@ -14,6 +14,7 @@ use tracing::{info, Level}; use anyhow::{anyhow, Result}; use bgpkit_parser::BgpElem; use chrono::DateTime; +use tabled::Table; trait Validate{ fn validate(&self) -> Result<()>; @@ -27,6 +28,10 @@ struct Cli { #[clap(short, long)] config: Option, + /// Print debug information + #[clap(long)] + debug: bool, + #[clap(subcommand)] command: Commands, } @@ -190,10 +195,6 @@ enum Commands { /// Search BGP messages from all available public MRT files. Search { - /// Print debug information - #[clap(long)] - debug: bool, - /// Dry-run, do not download or parse. #[clap(long)] dry_run: bool, @@ -210,12 +211,30 @@ enum Commands { #[clap(flatten)] filters: SearchFilters, }, + /// ASN and organization lookup utility. + Whois { + /// Search query, an ASN (e.g. "400644") or a name (e.g. "bgpkit") + query: String, + + /// Search AS and Org name only + #[clap(short, long)] + name_only: bool, + + /// Search by ASN only + #[clap(short, long)] + asn_only: bool, + + /// Refresh local as2org database + #[clap(short, long)] + update: bool, + }, /// Time conversion utilities Time { /// Time stamp or time string to convert #[clap()] time: Option, }, + #[cfg(feature = "webp")] /// Investigative toolbox Scouter { /// Measure the power of your enemy @@ -240,7 +259,15 @@ fn elem_to_string(elem: &BgpElem, json: bool, pretty: bool) -> String { fn main() { let cli = Cli::parse(); - let _config = MonocleConfig::load(&cli.config); + let config = MonocleConfig::new(&cli.config); + + if cli.debug { + tracing_subscriber::fmt() + // filter spans/events with level TRACE or higher. + .with_max_level(Level::INFO) + .init(); + } + // You can check for the existence of subcommands, and if found use their // matches just as you would the top level cmd @@ -281,19 +308,12 @@ fn main() { } } }, - Commands::Search { debug, dry_run, json, pretty, filters } => { + Commands::Search { dry_run, json, pretty, filters } => { if let Err(e) = filters.validate() { eprintln!("{}", e.to_string()); return } - if debug { - tracing_subscriber::fmt() - // filter spans/events with level TRACE or higher. - .with_max_level(Level::INFO) - .init(); - } - let broker = bgpkit_broker::BgpkitBroker::new("https://api.broker.bgpkit.com/v2"); let ts_start = string_to_time(filters.start_ts.as_str()).unwrap().to_string(); let ts_end = string_to_time(filters.end_ts.as_str()).unwrap().to_string(); @@ -363,9 +383,40 @@ fn main() { // wait for the output thread to stop writer_thread.join().unwrap(); + } + Commands::Whois { query, name_only, asn_only ,update} => { + let data_dir = config.data_dir.as_str(); + let as2org = As2org::new(&Some(format!("{}/monocle-data.sqlite3", data_dir))).unwrap(); + + if update{ + // if update flag is set, clear existing as2org data and re-download later + as2org.clear_db(); + } + + if as2org.is_db_empty() { + println!("bootstrapping as2org data now... (it will take about one minute)"); + as2org.parse_insert_as2org(None).unwrap(); + println!("bootstrapping as2org data finished"); + } + + let search_type: SearchType = match (name_only, asn_only) { + (true, false) => { + SearchType::NameOnly + } + (false, true) => { + SearchType::AsnOnly + } + (false, false) => { + SearchType::Guess + } + (true, true) => { + eprintln!("name-only and asn-only cannot be both true"); + return + } + }; - /* - */ + let res = as2org.search(query.as_str(), &search_type).unwrap(); + println!("{}", Table::new(res).to_string()); } Commands::Time { time} => { match time_to_table(&time) { @@ -377,6 +428,7 @@ fn main() { } } } + #[cfg(feature = "webp")] Commands::Scouter { power: _ } => { diff --git a/tests/test-as2org.jsonl.gz b/tests/test-as2org.jsonl.gz new file mode 100644 index 0000000..6a77483 Binary files /dev/null and b/tests/test-as2org.jsonl.gz differ