diff --git a/Cargo.lock b/Cargo.lock index 498c419..5eb3989 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,7 +8,7 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ - "memchr", + "memchr", ] [[package]] @@ -130,6 +130,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -154,10 +160,10 @@ version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] @@ -166,9 +172,9 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] @@ -183,7 +189,8 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "regex", + "once_cell", + "regex", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ac4b00a..11292f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,5 @@ edition = "2021" [dependencies] anyhow = "1.0.86" clap = { version = "4.5.8", features = ["derive"] } +once_cell = "1.19.0" regex = "1.10.5" diff --git a/src/cli.rs b/src/cli.rs index 475ad57..db56671 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,14 +1,19 @@ -use std::{fs, path}; +//! This module contains the command line interface for the tool -use anyhow::{Context, Result}; -use clap::Parser; +use { + anyhow::{Context, Result}, + clap::Parser, + std::{fs, path}, +}; +/// Command line arguments for the tool #[derive(Parser, Debug)] #[command( name = "rust test to DejaGnu", long_about = "A tool to convert rust tests into DejaGnu tests format" )] pub struct Arguments { + /// The rust source file to convert into `DejaGnu` format #[arg( short = 'f', long = "file", @@ -17,6 +22,7 @@ pub struct Arguments { )] pub source_file: path::PathBuf, + /// optional `stderr` file #[arg( short = 'e', long = "stderr", @@ -32,6 +38,7 @@ pub fn parse_arguments_and_read_file(args: &Arguments) -> Result<(String, Option let source_code = fs::read_to_string(&args.source_file) .with_context(|| format!("could not read sourcefile `{}`", args.source_file.display()))?; + // Read the stderr file if it exists let err_file = match &args.stderr_file { Some(stderr_file) => Some(fs::read_to_string(stderr_file).with_context(|| { @@ -65,6 +72,8 @@ mod tests { assert_eq!(args.stderr_file, Some(path::PathBuf::from("test.stderr"))); } + /// clap reports most development errors as `debug_assert!`s + /// See this for more details, [here](https://docs.rs/clap/4.5.15/clap/_derive/_tutorial/chapter_4/index.html) #[test] fn debug_args() { use clap::CommandFactory; diff --git a/src/errors.rs b/src/errors.rs index 05de780..7845b08 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,10 +1,21 @@ -use std::{cell::OnceCell, fmt, str::FromStr}; - -use regex::Regex; - -use self::WhichLine::*; +//! This module contains the logic for parsing rustc error messages. + +use { + self::WhichLine::*, + std::{fmt, str::FromStr}, +}; + +// https://docs.rs/once_cell/1.19.0/once_cell/#lazily-compiled-regex +#[macro_export] +macro_rules! regex { + ($re:literal $(,)?) => {{ + static RE: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); + RE.get_or_init(|| regex::Regex::new($re).unwrap()) + }}; +} -// https://rustc-dev-guide.rust-lang.org/tests/ui.html#error-levels +/// Represents the different kinds of Rustc compiler messages. +/// See [rustc dev guide](https://rustc-dev-guide.rust-lang.org/tests/ui.html#error-levels) #[derive(Copy, Clone, Debug, PartialEq)] pub enum RustcErrorKind { Help, @@ -48,6 +59,7 @@ impl fmt::Display for RustcErrorKind { } } +/// To store information from rustc source file #[derive(Debug)] pub struct Error { pub line_num: usize, @@ -60,11 +72,14 @@ pub struct Error { /// What kind of message we expect (e.g., warning, error, suggestion). /// `None` if not specified or unknown message kind. pub kind: Option, + ///Note: if we are loading this from rustc source file, this might be incomplete pub msg: String, pub error_code: Option, } impl fmt::Display for Error { + /// Formats the `Error` for display according to `DejaGnu` format + /// See `DejaGnu` documentation [here](https://gcc.gnu.org/onlinedocs/gccint/testsuites/directives-used-within-dejagnu-tests/syntax-and-descriptions-of-test-directives.html) fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use RustcErrorKind::*; @@ -98,6 +113,9 @@ impl fmt::Display for Error { } } +/// Represents the line in the rustc source code where an error occurred. +/// Luckily, rust compile test only stores error messages on and after the line where the error occurred. +/// But `DejaGnu` can process error messages on the previous line, the current line, or the next line. #[derive(PartialEq, Debug)] enum WhichLine { ThisLine, @@ -105,8 +123,10 @@ enum WhichLine { AdjustBackward(usize), } +/// The main function for loading errors from source file and from optional stderr file. pub fn load_error(text_file: &str, stderr_file: Option<&str>) -> Vec { let mut last_unfollow_error = None; + // For storing the errors let mut errors = Vec::new(); for (line_num, line) in text_file.lines().enumerate() { @@ -119,24 +139,31 @@ pub fn load_error(text_file: &str, stderr_file: Option<&str>) -> Vec { } } + // If stderr file is not provided, return the errors if stderr_file.is_none() { return errors; } // TODO: improve this code incrementally + // parsing error related information from `.stderr` file let error_code_stderr = parse_error_code(stderr_file.expect("stderr file is not found")); + // TODO: We need to load error messages from `.stderr` instead of source file become sometimes source file contains incomplete error messages + // finding the error code w.r.t line number and error message + // TODO: sometimes, the error message might not be same but this doesn't matter as we are not comparing the row number for the message for error in errors.iter_mut() { for error_code in error_code_stderr.iter() { if error.line_num == error_code.line_number - && error.msg == error_code.error_message_detail + || error.msg == error_code.error_message_detail { error.error_code = Some(error_code.error_code.clone()); } } } + // return error detail with error codes errors } +/// To represent information from `stderr` file #[derive(Debug)] struct StderrResult { error_code: String, @@ -145,17 +172,15 @@ struct StderrResult { } fn is_error_code(s: &str) -> bool { - let re: OnceCell = OnceCell::new(); - let regex = re.get_or_init(|| Regex::new(r"^E\d{4}$").unwrap()); - regex.is_match(s) + regex!(r"^E\d{4}$").is_match(s) } +/// Parses error codes from the `stderr` file fn parse_error_code(stderr_content: &str) -> Vec { // Modified regex pattern with named capture groups - let re: OnceCell = OnceCell::new(); - let error_pattern = re.get_or_init(|| { - Regex::new(r"error\[(?PE\d{4})\]: (?P.+?)\n\s+-->.+:(?P\d+):").unwrap() - }); + let error_pattern = regex!( + r"error\[(?PE\d{4})\]: (?P.+?)\n\s+-->.+:(?P\d+):" + ); let mut results = Vec::new(); @@ -187,6 +212,7 @@ fn parse_error_code(stderr_content: &str) -> Vec { results } +/// Parses error details from a source line. fn parse_expected( last_nonfollow_error: Option, line_num: usize, @@ -197,11 +223,8 @@ fn parse_expected( // //~| // //~^ // //~^^^^^ - let re: OnceCell = OnceCell::new(); - let captures = re - .get_or_init(|| Regex::new(r"//(?:\[(?P[\w\-,]+)])?~(?P\||\^*)").unwrap()) - .captures(line)?; + let captures = regex!(r"//(?:\[(?P[\w\-,]+)])?~(?P\||\^*)").captures(line)?; let (follow, adjusts) = match &captures["adjust"] { "|" => (true, 0), @@ -227,6 +250,7 @@ fn parse_expected( let msg = msg.trim().to_owned(); + // If we find `//~|` or `//~^`, we need to adjust the line number. let mut relative_line_num = line_num as i32; let (which, line_num) = if follow { assert_eq!(adjusts, 0, "use either //~| or //~^, not both."); diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 0000000..d2faf94 --- /dev/null +++ b/src/header.rs @@ -0,0 +1,96 @@ +//! This module contains the logic for parsing rust test headers +//! See [rustc dev guide](https://rustc-dev-guide.rust-lang.org/tests/headers.html#test-headers) + +#[derive(Debug)] +pub struct HeaderLine<'ln> { + pub line_number: usize, + /// The main part of the header directive, after removing the comment prefix + /// and the optional revision specifier. + pub _directive: &'ln str, + /// DejaGnu formatted header line + pub dejagnu_header: String, +} + +pub fn parse_additional_options(code: &str) -> Vec { + let mut headers = Vec::new(); + + for (line_number, line) in code.lines().enumerate() { + let line = line.trim(); + if line.is_empty() || line.starts_with("fn") || line.starts_with("mod") { + continue; + } + if is_header_line(line) { + if let Some(header_info) = add_additional_options(line, line_number) { + headers.push(header_info); + } + } + } + headers +} + +pub fn is_header_line(line: &str) -> bool { + line.trim_start().starts_with("//@") +} + +fn add_additional_options(code: &str, line_number: usize) -> Option { + //TODO: If we know the file extension, then update this to + // let comment = if testfile.extension().is_some_and(|e| e == "rs") { "//@" } else { "#" }; + let comment = "//@"; + + if let Some((_header_revision, non_revisioned_directive_line)) = line_directive(comment, code) { + // The non_revisioned_directive_line is the directive without the "//@" prefix + let edition = parse_edition(non_revisioned_directive_line); + edition.as_ref()?; + Some(HeaderLine { + line_number: line_number + 1, // 1 based-indexed instead of zero based + _directive: "edition", + dejagnu_header: to_dejagnu_edition(edition.unwrap().as_str()), + }) + } else { + None + } +} + +fn line_directive<'line>( + comment: &str, + original_line: &'line str, +) -> Option<(Option<&'line str>, &'line str)> { + let after_comment = original_line + .trim_start() + .strip_prefix(comment)? + .trim_start(); + + if let Some(after_open_bracket) = after_comment.strip_prefix('[') { + let Some((line_revision, directive)) = after_open_bracket.split_once(']') else { + panic!( + "malformed condition directive: expected `{comment}[foo]`, found `{original_line}`" + ) + }; + + Some((Some(line_revision), directive.trim_start())) + } else { + Some((None, after_comment)) + } +} + +fn parse_edition(line: &str) -> Option { + parse_name_value_directive(line, "edition") +} + +fn parse_name_value_directive(line: &str, directive: &str) -> Option { + let colon = directive.len(); + + if line.starts_with(directive) && line.as_bytes().get(colon) == Some(&b':') { + let value = line[(colon + 1)..].to_owned(); + Some(value) + } else { + None + } +} + +fn to_dejagnu_edition(edition: &str) -> String { + format!( + "// {{ dg-additional-options \"-frust-edition={}\" }}", + edition + ) +} diff --git a/src/known-directives.rs b/src/known-directives.rs new file mode 100644 index 0000000..3b258f3 --- /dev/null +++ b/src/known-directives.rs @@ -0,0 +1,233 @@ +// Copied from https://github.com/rust-lang/rust/blob/master/src/tools/compiletest/src/command-list.rs +pub const KNOWN_DIRECTIVE_NAMES: &[&str] = &[ + "assembly-output", + "aux-bin", + "aux-build", + "aux-codegen-backend", + "aux-crate", + "build-aux-docs", + "build-fail", + "build-pass", + "check-fail", + "check-pass", + "check-run-results", + "check-stdout", + "check-test-line-numbers-match", + "compare-output-lines-by-subset", + "compile-flags", + "doc-flags", + "dont-check-compiler-stderr", + "dont-check-compiler-stdout", + "dont-check-failure-status", + "edition", + "error-pattern", + "exec-env", + "failure-status", + "filecheck-flags", + "forbid-output", + "force-host", + "ignore-16bit", + "ignore-32bit", + "ignore-64bit", + "ignore-aarch64", + "ignore-aarch64-unknown-linux-gnu", + "ignore-android", + "ignore-apple", + "ignore-arm", + "ignore-avr", + "ignore-beta", + "ignore-cdb", + "ignore-compare-mode-next-solver", + "ignore-compare-mode-polonius", + "ignore-cross-compile", + "ignore-debug", + "ignore-eabi", + "ignore-emscripten", + "ignore-endian-big", + "ignore-freebsd", + "ignore-fuchsia", + "ignore-gdb", + "ignore-gdb-version", + "ignore-gnu", + "ignore-haiku", + "ignore-horizon", + "ignore-i686-pc-windows-gnu", + "ignore-i686-pc-windows-msvc", + "ignore-illumos", + "ignore-ios", + "ignore-linux", + "ignore-lldb", + "ignore-llvm-version", + "ignore-loongarch64", + "ignore-macabi", + "ignore-macos", + "ignore-mode-assembly", + "ignore-mode-codegen", + "ignore-mode-codegen-units", + "ignore-mode-coverage-map", + "ignore-mode-coverage-run", + "ignore-mode-crashes", + "ignore-mode-debuginfo", + "ignore-mode-incremental", + "ignore-mode-js-doc-test", + "ignore-mode-mir-opt", + "ignore-mode-pretty", + "ignore-mode-run-make", + "ignore-mode-run-pass-valgrind", + "ignore-mode-rustdoc", + "ignore-mode-rustdoc-json", + "ignore-mode-ui", + "ignore-mode-ui-fulldeps", + "ignore-msp430", + "ignore-msvc", + "ignore-musl", + "ignore-netbsd", + "ignore-nightly", + "ignore-none", + "ignore-nto", + "ignore-nvptx64", + "ignore-nvptx64-nvidia-cuda", + "ignore-openbsd", + "ignore-pass", + "ignore-remote", + "ignore-riscv64", + "ignore-s390x", + "ignore-sgx", + "ignore-spirv", + "ignore-stable", + "ignore-stage1", + "ignore-stage2", + "ignore-test", + "ignore-thumb", + "ignore-thumbv8m.base-none-eabi", + "ignore-thumbv8m.main-none-eabi", + "ignore-tvos", + "ignore-unix", + "ignore-unknown", + "ignore-uwp", + "ignore-visionos", + "ignore-vxworks", + "ignore-wasi", + "ignore-wasm", + "ignore-wasm32", + "ignore-wasm32-bare", + "ignore-wasm64", + "ignore-watchos", + "ignore-windows", + "ignore-windows-gnu", + "ignore-windows-msvc", + "ignore-x32", + "ignore-x86", + "ignore-x86_64", + "ignore-x86_64-apple-darwin", + "ignore-x86_64-unknown-linux-gnu", + "incremental", + "known-bug", + "llvm-cov-flags", + "min-cdb-version", + "min-gdb-version", + "min-lldb-version", + "min-llvm-version", + "min-system-llvm-version", + "needs-asm-support", + "needs-dlltool", + "needs-dynamic-linking", + "needs-force-clang-based-tests", + "needs-git-hash", + "needs-llvm-components", + "needs-profiler-support", + "needs-relocation-model-pic", + "needs-run-enabled", + "needs-rust-lld", + "needs-rust-lldb", + "needs-sanitizer-address", + "needs-sanitizer-cfi", + "needs-sanitizer-dataflow", + "needs-sanitizer-hwaddress", + "needs-sanitizer-kcfi", + "needs-sanitizer-leak", + "needs-sanitizer-memory", + "needs-sanitizer-memtag", + "needs-sanitizer-safestack", + "needs-sanitizer-shadow-call-stack", + "needs-sanitizer-support", + "needs-sanitizer-thread", + "needs-symlink", + "needs-threads", + "needs-unwind", + "needs-wasmtime", + "needs-xray", + "no-auto-check-cfg", + "no-prefer-dynamic", + "normalize-stderr-32bit", + "normalize-stderr-64bit", + "normalize-stderr-test", + "normalize-stdout-test", + "only-16bit", + "only-32bit", + "only-64bit", + "only-aarch64", + "only-aarch64-unknown-linux-gnu", + "only-apple", + "only-arm", + "only-avr", + "only-beta", + "only-bpf", + "only-cdb", + "only-gnu", + "only-i686-pc-windows-gnu", + "only-i686-pc-windows-msvc", + "only-ios", + "only-linux", + "only-loongarch64", + "only-loongarch64-unknown-linux-gnu", + "only-macos", + "only-mips", + "only-mips64", + "only-msp430", + "only-msvc", + "only-nightly", + "only-nvptx64", + "only-riscv64", + "only-sparc", + "only-sparc64", + "only-stable", + "only-thumb", + "only-tvos", + "only-unix", + "only-visionos", + "only-wasm32", + "only-wasm32-bare", + "only-wasm32-wasip1", + "only-watchos", + "only-windows", + "only-windows-gnu", + "only-windows-msvc", + "only-x86", + "only-x86_64", + "only-x86_64-fortanix-unknown-sgx", + "only-x86_64-pc-windows-gnu", + "only-x86_64-pc-windows-msvc", + "only-x86_64-unknown-linux-gnu", + "pp-exact", + "pretty-compare-only", + "pretty-expanded", + "pretty-mode", + "regex-error-pattern", + "remap-src-base", + "revisions", + "run-fail", + "run-flags", + "run-pass", + "run-rustfix", + "rustc-env", + "rustfix-only-machine-applicable", + "should-fail", + "should-ice", + "stderr-per-bitwidth", + "test-mir-pass", + "unique-doc-out-dir", + "unset-exec-env", + "unset-rustc-env", + "unused-revision-names", +]; diff --git a/src/main.rs b/src/main.rs index 3fb2047..f57dd4b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,13 @@ -use anyhow::{Context, Result}; -use clap::Parser; +//! The main entry point of the program. + +use { + anyhow::{Context, Result}, + clap::Parser, +}; mod cli; mod errors; +mod header; mod transform; fn main() -> Result<()> { diff --git a/src/transform.rs b/src/transform.rs index 685613f..c8f712b 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1,46 +1,66 @@ -use std::cell::OnceCell; +//! This module contains the code transformation logic. -use anyhow::Result; -use regex::Regex; +use { + crate::{ + errors, + header::{is_header_line, parse_additional_options}, + regex, + }, + anyhow::Result, +}; -use crate::errors; - -/// This function takes the rust code as input -/// and returns the code with DejaGnu directive +/// Transform code to `DejaGnu` format pub fn transform_code(code: &str, stderr_file: Option<&str>) -> Result { + // Load the rustc error messages, codes, lines and relative line numbers let errors = errors::load_error(code, stderr_file); + // For storing the transformed code let mut new_code = String::new(); + let additional_options = parse_additional_options(code); let mut line_num = 1; + // finding the respective line number and adding the error code for line in code.lines() { let mut new_line = line.to_string(); - // TODO: This is not the efficient way to find respective line number - for error in errors.iter() { - if (error.line_num as i32 - error.relative_line_num) != line_num { - continue; + + if is_header_line(line) { + for header in additional_options.iter() { + if header.line_number != line_num { + continue; + } + new_line = header.dejagnu_header.to_string(); + break; } - // In rustc test suites, the error directive is - // on the same line or the next line not on the previous line - // For the error on the next line - if error.relative_line_num != 0 { - new_line = format!("{}", error); - } else { - // For the error on the same line - let re: OnceCell = OnceCell::new(); + } else { + // TODO: This is not the efficient way to find respective line number + for error in errors.iter() { + // Checking the original line number + if (error.line_num as i32 - error.relative_line_num) != line_num as i32 { + continue; + } + // In rustc test suites, the error directive is + // on the same line or on the next line, but not on the previous line + // See this: https://rustc-dev-guide.rust-lang.org/tests/ui.html#error-annotations + // For the error on the next line + if error.relative_line_num != 0 { + // We simply add the error message, not to worry about the code + // The error was printed by our overloaded `Display` trait + new_line = format!("{}", error); + } else { + // For the error on the same line, we need to add error message at the end of the line + let captures = regex!(r"//(?:\[(?P[\w\-,]+)])?~(?P\||\^*)") + .captures(line) + .expect("Could not find the error directive"); - let captures = re - .get_or_init(|| { - Regex::new(r"//(?:\[(?P[\w\-,]+)])?~(?P\||\^*)").unwrap() - }) - .captures(line) - .expect("Could not find the error directive"); + // Get the part of comment before the sigil (e.g. `~^` or ~|) + let whole_match = captures.get(0).unwrap(); + // Get the existing source code before the error directive //~ ERROR or similar to this + let before_match = &line[..whole_match.start()]; - // Get the part of comment before the sigil (e.g. `~^` or ~|) - let whole_match = captures.get(0).unwrap(); - let before_match = &line[..whole_match.start()]; - new_line = format!("{}{}", before_match, error); + // The error was printed by our overloaded `Display` trait + new_line = format!("{}{}", before_match, error); + } + break; } - break; } new_code.push_str(&new_line); new_code.push('\n');