From 914905615b4efee30f4106cd0f4d57a3cd0ae523 Mon Sep 17 00:00:00 2001 From: RustyYato Date: Mon, 8 Apr 2024 12:11:18 -0700 Subject: [PATCH 1/4] remove error branch from LUT if it is unreachable --- logos-codegen/src/generator/fork.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/logos-codegen/src/generator/fork.rs b/logos-codegen/src/generator/fork.rs index a0631f93..db0e99e9 100644 --- a/logos-codegen/src/generator/fork.rs +++ b/logos-codegen/src/generator/fork.rs @@ -87,9 +87,18 @@ impl<'a> Generator<'a> { }) .collect::(); - let jumps = &jumps; + let may_error = table.iter().any(|&idx| idx == 0); + + let jumps = jumps.as_slice(); let table = table.iter().copied().map(|idx| &jumps[idx as usize]); + let jumps = if may_error { jumps } else { &jumps[1..] }; + let error_branch = if may_error { + Some(quote!(Jump::__ => #miss)) + } else { + None + }; + quote! { enum Jump { #(#jumps,)* @@ -105,7 +114,7 @@ impl<'a> Generator<'a> { match LUT[#byte as usize] { #branches - Jump::__ => #miss, + #error_branch } } } From 0dfcf0080c52c79ce5c5d1e4ce3edbf021390c28 Mon Sep 17 00:00:00 2001 From: RustyYato Date: Fri, 3 May 2024 12:30:15 -0700 Subject: [PATCH 2/4] add codegen tests for LUT --- .../tests/data/no_error_lut/fmt_output.rs | 195 ++++++++++++++++++ logos-cli/tests/data/no_error_lut/input.rs | 10 + logos-cli/tests/data/no_error_lut/output.rs | 1 + .../tests/data/{ => simple}/fmt_output.rs | 0 logos-cli/tests/data/{ => simple}/input.rs | 0 logos-cli/tests/data/{ => simple}/output.rs | 0 logos-cli/tests/tests.rs | 111 ++++++---- 7 files changed, 279 insertions(+), 38 deletions(-) create mode 100644 logos-cli/tests/data/no_error_lut/fmt_output.rs create mode 100644 logos-cli/tests/data/no_error_lut/input.rs create mode 100644 logos-cli/tests/data/no_error_lut/output.rs rename logos-cli/tests/data/{ => simple}/fmt_output.rs (100%) rename logos-cli/tests/data/{ => simple}/input.rs (100%) rename logos-cli/tests/data/{ => simple}/output.rs (100%) diff --git a/logos-cli/tests/data/no_error_lut/fmt_output.rs b/logos-cli/tests/data/no_error_lut/fmt_output.rs new file mode 100644 index 00000000..22a5032b --- /dev/null +++ b/logos-cli/tests/data/no_error_lut/fmt_output.rs @@ -0,0 +1,195 @@ +#[derive()] +enum Token { + Newline, + AnyUnicode, + Any, +} +impl<'s> ::logos::Logos<'s> for Token { + type Error = (); + type Extras = (); + type Source = [u8]; + fn lex(lex: &mut ::logos::Lexer<'s, Self>) { + use logos::internal::{CallbackResult, LexerInternal}; + type Lexer<'s> = ::logos::Lexer<'s, Token>; + fn _end<'s>(lex: &mut Lexer<'s>) { + lex.end() + } + fn _error<'s>(lex: &mut Lexer<'s>) { + lex.bump_unchecked(1); + lex.error(); + } + macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } + #[inline] + fn goto1_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Ok(Token::Newline)); + } + #[inline] + fn goto11_ctx11_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Ok(Token::Any)); + } + #[inline] + fn goto2_ctx11_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Ok(Token::AnyUnicode)); + } + #[inline] + fn goto16_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 2usize]>() { + Some([128u8..=159u8, 128u8..=191u8]) => { + lex.bump_unchecked(2usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto17_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 3usize]>() { + Some([144u8..=191u8, 128u8..=191u8, 128u8..=191u8]) => { + lex.bump_unchecked(3usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto2_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Ok(Token::AnyUnicode)); + } + #[inline] + fn goto13_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 1usize]>() { + Some([128u8..=191u8]) => { + lex.bump_unchecked(1usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto18_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 3usize]>() { + Some([128u8..=191u8, 128u8..=191u8, 128u8..=191u8]) => { + lex.bump_unchecked(3usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto15_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 2usize]>() { + Some([128u8..=191u8, 128u8..=191u8]) => { + lex.bump_unchecked(2usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto14_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 2usize]>() { + Some([160u8..=191u8, 128u8..=191u8]) => { + lex.bump_unchecked(2usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto19_ctx11_x<'s>(lex: &mut Lexer<'s>) { + match lex.read::<&[u8; 3usize]>() { + Some([128u8..=143u8, 128u8..=191u8, 128u8..=191u8]) => { + lex.bump_unchecked(3usize); + goto2_ctx11_x(lex) + } + _ => goto11_ctx11_x(lex), + } + } + #[inline] + fn goto11_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Ok(Token::Any)); + } + #[inline] + fn goto20<'s>(lex: &mut Lexer<'s>) { + enum Jump { + J1, + J16, + J17, + J2, + J13, + J18, + J15, + J14, + J19, + J11, + } + const LUT: [Jump; 256] = { + use Jump::*; + [ + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J1, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, + J2, J2, J2, J2, J2, J2, J2, J2, J11, J11, J11, J11, J11, J11, J11, J11, J11, + J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, + J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, + J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, + J11, J11, J11, J11, J11, J11, J11, J11, J11, J13, J13, J13, J13, J13, J13, J13, + J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, + J13, J13, J13, J13, J13, J13, J13, J14, J15, J15, J15, J15, J15, J15, J15, J15, + J15, J15, J15, J15, J16, J15, J15, J17, J18, J18, J18, J19, J11, J11, J11, J11, + J11, J11, J11, J11, J11, J11, J11, + ] + }; + let byte = match lex.read::() { + Some(byte) => byte, + None => return _end(lex), + }; + match LUT[byte as usize] { + Jump::J1 => { + lex.bump_unchecked(1usize); + goto1_x(lex) + } + Jump::J16 => { + lex.bump_unchecked(1usize); + goto16_ctx11_x(lex) + } + Jump::J17 => { + lex.bump_unchecked(1usize); + goto17_ctx11_x(lex) + } + Jump::J2 => { + lex.bump_unchecked(1usize); + goto2_x(lex) + } + Jump::J13 => { + lex.bump_unchecked(1usize); + goto13_ctx11_x(lex) + } + Jump::J18 => { + lex.bump_unchecked(1usize); + goto18_ctx11_x(lex) + } + Jump::J15 => { + lex.bump_unchecked(1usize); + goto15_ctx11_x(lex) + } + Jump::J14 => { + lex.bump_unchecked(1usize); + goto14_ctx11_x(lex) + } + Jump::J19 => { + lex.bump_unchecked(1usize); + goto19_ctx11_x(lex) + } + Jump::J11 => { + lex.bump_unchecked(1usize); + goto11_x(lex) + } + } + } + goto20(lex) + } +} diff --git a/logos-cli/tests/data/no_error_lut/input.rs b/logos-cli/tests/data/no_error_lut/input.rs new file mode 100644 index 00000000..84504272 --- /dev/null +++ b/logos-cli/tests/data/no_error_lut/input.rs @@ -0,0 +1,10 @@ +#[derive(Logos)] +#[logos(source = [u8])] +enum Token { + #[token("\n")] + Newline, + #[regex(".")] + AnyUnicode, + #[regex(b".", priority = 0)] + Any, +} diff --git a/logos-cli/tests/data/no_error_lut/output.rs b/logos-cli/tests/data/no_error_lut/output.rs new file mode 100644 index 00000000..3bec6730 --- /dev/null +++ b/logos-cli/tests/data/no_error_lut/output.rs @@ -0,0 +1 @@ +# [derive ()] enum Token { Newline , AnyUnicode , Any , }impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } } \ No newline at end of file diff --git a/logos-cli/tests/data/fmt_output.rs b/logos-cli/tests/data/simple/fmt_output.rs similarity index 100% rename from logos-cli/tests/data/fmt_output.rs rename to logos-cli/tests/data/simple/fmt_output.rs diff --git a/logos-cli/tests/data/input.rs b/logos-cli/tests/data/simple/input.rs similarity index 100% rename from logos-cli/tests/data/input.rs rename to logos-cli/tests/data/simple/input.rs diff --git a/logos-cli/tests/data/output.rs b/logos-cli/tests/data/simple/output.rs similarity index 100% rename from logos-cli/tests/data/output.rs rename to logos-cli/tests/data/simple/output.rs diff --git a/logos-cli/tests/tests.rs b/logos-cli/tests/tests.rs index f52c8ddb..71fbd33d 100644 --- a/logos-cli/tests/tests.rs +++ b/logos-cli/tests/tests.rs @@ -4,47 +4,80 @@ use assert_cmd::Command; use assert_fs::{assert::PathAssert, fixture::FileWriteStr, NamedTempFile}; use predicates::prelude::*; -const INPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/input.rs"); -const OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/output.rs"); -const FMT_OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/fmt_output.rs"); +struct Fixture { + input: &'static str, + output: &'static str, + fmt_output: &'static str, +} + +const FIXTURES: &[Fixture] = &[ + Fixture { + input: concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/simple/input.rs"), + output: concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/simple/output.rs"), + fmt_output: concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/data/simple/fmt_output.rs" + ), + }, + Fixture { + input: concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/data/no_error_lut/input.rs" + ), + output: concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/data/no_error_lut/output.rs" + ), + fmt_output: concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/data/no_error_lut/fmt_output.rs" + ), + }, +]; #[test] fn test_codegen() { - let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + for fixture in FIXTURES.iter() { + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); - let mut cmd = Command::cargo_bin("logos-cli").unwrap(); - cmd.arg(INPUT_FILE) - .arg("--output") - .arg(tempfile.path()) - .assert() - .success(); + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(fixture.input) + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); - tempfile.assert(normalize_newlines(OUTPUT_FILE)); + tempfile.assert(normalize_newlines(fixture.output)); + } } #[test] fn test_codegen_check() { - Command::cargo_bin("logos-cli") - .unwrap() - .arg(INPUT_FILE) - .arg("--check") - .arg("--output") - .arg(OUTPUT_FILE) - .assert() - .success(); + for fixture in FIXTURES.iter() { + Command::cargo_bin("logos-cli") + .unwrap() + .arg(fixture.input) + .arg("--check") + .arg("--output") + .arg(fixture.output) + .assert() + .success(); + } } #[test] fn test_codegen_check_format() { - Command::cargo_bin("logos-cli") - .unwrap() - .arg(INPUT_FILE) - .arg("--format") - .arg("--check") - .arg("--output") - .arg(FMT_OUTPUT_FILE) - .assert() - .success(); + for fixture in FIXTURES.iter() { + Command::cargo_bin("logos-cli") + .unwrap() + .arg(fixture.input) + .arg("--format") + .arg("--check") + .arg("--output") + .arg(fixture.fmt_output) + .assert() + .success(); + } } #[test] @@ -55,7 +88,7 @@ fn test_codegen_fail_check() { Command::cargo_bin("logos-cli") .unwrap() - .arg(INPUT_FILE) + .arg(FIXTURES[0].input) .arg("--check") .arg("--output") .arg(tempfile.path()) @@ -65,17 +98,19 @@ fn test_codegen_fail_check() { #[test] fn test_codegen_format() { - let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + for fixture in FIXTURES { + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); - let mut cmd = Command::cargo_bin("logos-cli").unwrap(); - cmd.arg(INPUT_FILE) - .arg("--format") - .arg("--output") - .arg(tempfile.path()) - .assert() - .success(); + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(fixture.input) + .arg("--format") + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); - tempfile.assert(normalize_newlines(FMT_OUTPUT_FILE)); + tempfile.assert(normalize_newlines(fixture.fmt_output)); + } } fn normalize_newlines(s: impl AsRef) -> impl Predicate { From 4896861552a090bae2b5061723d1061d6b75114f Mon Sep 17 00:00:00 2001 From: RustyYato Date: Mon, 6 May 2024 11:59:30 -0700 Subject: [PATCH 3/4] add explicit codegen tests --- logos-codegen/Cargo.toml | 1 + logos-codegen/tests/codegen.rs | 31 +++++++++++++++++++ .../tests/data/no_error_lut/input.rs | 10 ++++++ .../tests/data/no_error_lut/output.rs | 1 + logos-codegen/tests/data/simple/input.rs | 5 +++ logos-codegen/tests/data/simple/output.rs | 1 + 6 files changed, 49 insertions(+) create mode 100644 logos-codegen/tests/codegen.rs create mode 100644 logos-codegen/tests/data/no_error_lut/input.rs create mode 100644 logos-codegen/tests/data/no_error_lut/output.rs create mode 100644 logos-codegen/tests/data/simple/input.rs create mode 100644 logos-codegen/tests/data/simple/output.rs diff --git a/logos-codegen/Cargo.toml b/logos-codegen/Cargo.toml index d7bc2c16..8b171c9f 100644 --- a/logos-codegen/Cargo.toml +++ b/logos-codegen/Cargo.toml @@ -9,6 +9,7 @@ syn = { version = "2.0.13", features = ["full"] } [dev-dependencies] pretty_assertions = "1.4.0" +rstest = "0.18.2" [features] # Enables debug messages diff --git a/logos-codegen/tests/codegen.rs b/logos-codegen/tests/codegen.rs new file mode 100644 index 00000000..5f53c0db --- /dev/null +++ b/logos-codegen/tests/codegen.rs @@ -0,0 +1,31 @@ +use std::{error::Error, io, path::PathBuf}; + +#[rstest::rstest] +#[case("simple")] +#[case("no_error_lut")] +pub fn test_codegen(#[case] fixture: &str) -> Result<(), Box> { + let mut fixture_dir = PathBuf::new(); + fixture_dir.push(env!("CARGO_MANIFEST_DIR")); + fixture_dir.push("tests"); + fixture_dir.push("data"); + fixture_dir.push(fixture); + + let input = fixture_dir.join("input.rs"); + fixture_dir.push("output.rs"); + let output_file = fixture_dir; + + let input = std::fs::read_to_string(input)?; + let output = std::fs::read_to_string(&output_file)?; + + let generated = logos_codegen::generate(input.parse()?); + let generated = generated.to_string(); + + if std::env::var("BLESS_CODEGEN").is_ok_and(|value| value == "1") { + std::fs::write(&output_file, &generated)?; + return Ok(()); + } + + assert_eq!(generated, output, "Codegen test failed: `{fixture}`, run tests again with env var `BLESS_CODEGEN=1` to bless these changes"); + + Ok(()) +} diff --git a/logos-codegen/tests/data/no_error_lut/input.rs b/logos-codegen/tests/data/no_error_lut/input.rs new file mode 100644 index 00000000..84504272 --- /dev/null +++ b/logos-codegen/tests/data/no_error_lut/input.rs @@ -0,0 +1,10 @@ +#[derive(Logos)] +#[logos(source = [u8])] +enum Token { + #[token("\n")] + Newline, + #[regex(".")] + AnyUnicode, + #[regex(b".", priority = 0)] + Any, +} diff --git a/logos-codegen/tests/data/no_error_lut/output.rs b/logos-codegen/tests/data/no_error_lut/output.rs new file mode 100644 index 00000000..4c0edd35 --- /dev/null +++ b/logos-codegen/tests/data/no_error_lut/output.rs @@ -0,0 +1 @@ +impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } } \ No newline at end of file diff --git a/logos-codegen/tests/data/simple/input.rs b/logos-codegen/tests/data/simple/input.rs new file mode 100644 index 00000000..d2f0517d --- /dev/null +++ b/logos-codegen/tests/data/simple/input.rs @@ -0,0 +1,5 @@ +#[derive(Logos, Debug, Clone, Copy, PartialEq)] +enum Token { + #[regex("a-z")] + Letter, +} diff --git a/logos-codegen/tests/data/simple/output.rs b/logos-codegen/tests/data/simple/output.rs new file mode 100644 index 00000000..e8a4dc3b --- /dev/null +++ b/logos-codegen/tests/data/simple/output.rs @@ -0,0 +1 @@ +impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = str ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Letter)) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } } \ No newline at end of file From d2e0ebfab2da7e60122e504e6bd4978b5f660b23 Mon Sep 17 00:00:00 2001 From: RustyYato Date: Mon, 6 May 2024 12:06:10 -0700 Subject: [PATCH 4/4] Revert "add codegen tests for LUT" This reverts commit 0dfcf0080c52c79ce5c5d1e4ce3edbf021390c28. --- .../tests/data/{simple => }/fmt_output.rs | 0 logos-cli/tests/data/{simple => }/input.rs | 0 .../tests/data/no_error_lut/fmt_output.rs | 195 ------------------ logos-cli/tests/data/no_error_lut/input.rs | 10 - logos-cli/tests/data/no_error_lut/output.rs | 1 - logos-cli/tests/data/{simple => }/output.rs | 0 logos-cli/tests/tests.rs | 111 ++++------ 7 files changed, 38 insertions(+), 279 deletions(-) rename logos-cli/tests/data/{simple => }/fmt_output.rs (100%) rename logos-cli/tests/data/{simple => }/input.rs (100%) delete mode 100644 logos-cli/tests/data/no_error_lut/fmt_output.rs delete mode 100644 logos-cli/tests/data/no_error_lut/input.rs delete mode 100644 logos-cli/tests/data/no_error_lut/output.rs rename logos-cli/tests/data/{simple => }/output.rs (100%) diff --git a/logos-cli/tests/data/simple/fmt_output.rs b/logos-cli/tests/data/fmt_output.rs similarity index 100% rename from logos-cli/tests/data/simple/fmt_output.rs rename to logos-cli/tests/data/fmt_output.rs diff --git a/logos-cli/tests/data/simple/input.rs b/logos-cli/tests/data/input.rs similarity index 100% rename from logos-cli/tests/data/simple/input.rs rename to logos-cli/tests/data/input.rs diff --git a/logos-cli/tests/data/no_error_lut/fmt_output.rs b/logos-cli/tests/data/no_error_lut/fmt_output.rs deleted file mode 100644 index 22a5032b..00000000 --- a/logos-cli/tests/data/no_error_lut/fmt_output.rs +++ /dev/null @@ -1,195 +0,0 @@ -#[derive()] -enum Token { - Newline, - AnyUnicode, - Any, -} -impl<'s> ::logos::Logos<'s> for Token { - type Error = (); - type Extras = (); - type Source = [u8]; - fn lex(lex: &mut ::logos::Lexer<'s, Self>) { - use logos::internal::{CallbackResult, LexerInternal}; - type Lexer<'s> = ::logos::Lexer<'s, Token>; - fn _end<'s>(lex: &mut Lexer<'s>) { - lex.end() - } - fn _error<'s>(lex: &mut Lexer<'s>) { - lex.bump_unchecked(1); - lex.error(); - } - macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } - #[inline] - fn goto1_x<'s>(lex: &mut Lexer<'s>) { - lex.set(Ok(Token::Newline)); - } - #[inline] - fn goto11_ctx11_x<'s>(lex: &mut Lexer<'s>) { - lex.set(Ok(Token::Any)); - } - #[inline] - fn goto2_ctx11_x<'s>(lex: &mut Lexer<'s>) { - lex.set(Ok(Token::AnyUnicode)); - } - #[inline] - fn goto16_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 2usize]>() { - Some([128u8..=159u8, 128u8..=191u8]) => { - lex.bump_unchecked(2usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto17_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 3usize]>() { - Some([144u8..=191u8, 128u8..=191u8, 128u8..=191u8]) => { - lex.bump_unchecked(3usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto2_x<'s>(lex: &mut Lexer<'s>) { - lex.set(Ok(Token::AnyUnicode)); - } - #[inline] - fn goto13_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 1usize]>() { - Some([128u8..=191u8]) => { - lex.bump_unchecked(1usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto18_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 3usize]>() { - Some([128u8..=191u8, 128u8..=191u8, 128u8..=191u8]) => { - lex.bump_unchecked(3usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto15_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 2usize]>() { - Some([128u8..=191u8, 128u8..=191u8]) => { - lex.bump_unchecked(2usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto14_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 2usize]>() { - Some([160u8..=191u8, 128u8..=191u8]) => { - lex.bump_unchecked(2usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto19_ctx11_x<'s>(lex: &mut Lexer<'s>) { - match lex.read::<&[u8; 3usize]>() { - Some([128u8..=143u8, 128u8..=191u8, 128u8..=191u8]) => { - lex.bump_unchecked(3usize); - goto2_ctx11_x(lex) - } - _ => goto11_ctx11_x(lex), - } - } - #[inline] - fn goto11_x<'s>(lex: &mut Lexer<'s>) { - lex.set(Ok(Token::Any)); - } - #[inline] - fn goto20<'s>(lex: &mut Lexer<'s>) { - enum Jump { - J1, - J16, - J17, - J2, - J13, - J18, - J15, - J14, - J19, - J11, - } - const LUT: [Jump; 256] = { - use Jump::*; - [ - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J1, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, J2, - J2, J2, J2, J2, J2, J2, J2, J2, J11, J11, J11, J11, J11, J11, J11, J11, J11, - J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, - J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, - J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, J11, - J11, J11, J11, J11, J11, J11, J11, J11, J11, J13, J13, J13, J13, J13, J13, J13, - J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, J13, - J13, J13, J13, J13, J13, J13, J13, J14, J15, J15, J15, J15, J15, J15, J15, J15, - J15, J15, J15, J15, J16, J15, J15, J17, J18, J18, J18, J19, J11, J11, J11, J11, - J11, J11, J11, J11, J11, J11, J11, - ] - }; - let byte = match lex.read::() { - Some(byte) => byte, - None => return _end(lex), - }; - match LUT[byte as usize] { - Jump::J1 => { - lex.bump_unchecked(1usize); - goto1_x(lex) - } - Jump::J16 => { - lex.bump_unchecked(1usize); - goto16_ctx11_x(lex) - } - Jump::J17 => { - lex.bump_unchecked(1usize); - goto17_ctx11_x(lex) - } - Jump::J2 => { - lex.bump_unchecked(1usize); - goto2_x(lex) - } - Jump::J13 => { - lex.bump_unchecked(1usize); - goto13_ctx11_x(lex) - } - Jump::J18 => { - lex.bump_unchecked(1usize); - goto18_ctx11_x(lex) - } - Jump::J15 => { - lex.bump_unchecked(1usize); - goto15_ctx11_x(lex) - } - Jump::J14 => { - lex.bump_unchecked(1usize); - goto14_ctx11_x(lex) - } - Jump::J19 => { - lex.bump_unchecked(1usize); - goto19_ctx11_x(lex) - } - Jump::J11 => { - lex.bump_unchecked(1usize); - goto11_x(lex) - } - } - } - goto20(lex) - } -} diff --git a/logos-cli/tests/data/no_error_lut/input.rs b/logos-cli/tests/data/no_error_lut/input.rs deleted file mode 100644 index 84504272..00000000 --- a/logos-cli/tests/data/no_error_lut/input.rs +++ /dev/null @@ -1,10 +0,0 @@ -#[derive(Logos)] -#[logos(source = [u8])] -enum Token { - #[token("\n")] - Newline, - #[regex(".")] - AnyUnicode, - #[regex(b".", priority = 0)] - Any, -} diff --git a/logos-cli/tests/data/no_error_lut/output.rs b/logos-cli/tests/data/no_error_lut/output.rs deleted file mode 100644 index 3bec6730..00000000 --- a/logos-cli/tests/data/no_error_lut/output.rs +++ /dev/null @@ -1 +0,0 @@ -# [derive ()] enum Token { Newline , AnyUnicode , Any , }impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } } \ No newline at end of file diff --git a/logos-cli/tests/data/simple/output.rs b/logos-cli/tests/data/output.rs similarity index 100% rename from logos-cli/tests/data/simple/output.rs rename to logos-cli/tests/data/output.rs diff --git a/logos-cli/tests/tests.rs b/logos-cli/tests/tests.rs index 71fbd33d..f52c8ddb 100644 --- a/logos-cli/tests/tests.rs +++ b/logos-cli/tests/tests.rs @@ -4,80 +4,47 @@ use assert_cmd::Command; use assert_fs::{assert::PathAssert, fixture::FileWriteStr, NamedTempFile}; use predicates::prelude::*; -struct Fixture { - input: &'static str, - output: &'static str, - fmt_output: &'static str, -} - -const FIXTURES: &[Fixture] = &[ - Fixture { - input: concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/simple/input.rs"), - output: concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/simple/output.rs"), - fmt_output: concat!( - env!("CARGO_MANIFEST_DIR"), - "/tests/data/simple/fmt_output.rs" - ), - }, - Fixture { - input: concat!( - env!("CARGO_MANIFEST_DIR"), - "/tests/data/no_error_lut/input.rs" - ), - output: concat!( - env!("CARGO_MANIFEST_DIR"), - "/tests/data/no_error_lut/output.rs" - ), - fmt_output: concat!( - env!("CARGO_MANIFEST_DIR"), - "/tests/data/no_error_lut/fmt_output.rs" - ), - }, -]; +const INPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/input.rs"); +const OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/output.rs"); +const FMT_OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/fmt_output.rs"); #[test] fn test_codegen() { - for fixture in FIXTURES.iter() { - let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); - let mut cmd = Command::cargo_bin("logos-cli").unwrap(); - cmd.arg(fixture.input) - .arg("--output") - .arg(tempfile.path()) - .assert() - .success(); + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(INPUT_FILE) + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); - tempfile.assert(normalize_newlines(fixture.output)); - } + tempfile.assert(normalize_newlines(OUTPUT_FILE)); } #[test] fn test_codegen_check() { - for fixture in FIXTURES.iter() { - Command::cargo_bin("logos-cli") - .unwrap() - .arg(fixture.input) - .arg("--check") - .arg("--output") - .arg(fixture.output) - .assert() - .success(); - } + Command::cargo_bin("logos-cli") + .unwrap() + .arg(INPUT_FILE) + .arg("--check") + .arg("--output") + .arg(OUTPUT_FILE) + .assert() + .success(); } #[test] fn test_codegen_check_format() { - for fixture in FIXTURES.iter() { - Command::cargo_bin("logos-cli") - .unwrap() - .arg(fixture.input) - .arg("--format") - .arg("--check") - .arg("--output") - .arg(fixture.fmt_output) - .assert() - .success(); - } + Command::cargo_bin("logos-cli") + .unwrap() + .arg(INPUT_FILE) + .arg("--format") + .arg("--check") + .arg("--output") + .arg(FMT_OUTPUT_FILE) + .assert() + .success(); } #[test] @@ -88,7 +55,7 @@ fn test_codegen_fail_check() { Command::cargo_bin("logos-cli") .unwrap() - .arg(FIXTURES[0].input) + .arg(INPUT_FILE) .arg("--check") .arg("--output") .arg(tempfile.path()) @@ -98,19 +65,17 @@ fn test_codegen_fail_check() { #[test] fn test_codegen_format() { - for fixture in FIXTURES { - let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); - let mut cmd = Command::cargo_bin("logos-cli").unwrap(); - cmd.arg(fixture.input) - .arg("--format") - .arg("--output") - .arg(tempfile.path()) - .assert() - .success(); + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(INPUT_FILE) + .arg("--format") + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); - tempfile.assert(normalize_newlines(fixture.fmt_output)); - } + tempfile.assert(normalize_newlines(FMT_OUTPUT_FILE)); } fn normalize_newlines(s: impl AsRef) -> impl Predicate {