From bb588073ab286a9f1f8d839ab2cebe13011dd22c Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Tue, 17 Oct 2023 00:59:15 -0700 Subject: [PATCH] Fix parser bug where "type" was misinterpreted as a keyword inside a match (#3950) Fixes #3790 Slightly hacky, but I think this is correct and it should also improve performance somewhat. --- CHANGES.md | 2 ++ src/blib2to3/pgen2/parse.py | 19 ++++++++++++++++++- tests/data/cases/pattern_matching_complex.py | 4 ++++ tests/data/cases/type_aliases.py | 9 +++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 610a9de0e43..f89b1b9df0a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -37,6 +37,8 @@ +- Fix bug where attributes named `type` were not acccepted inside `match` statements + (#3950) - Add support for PEP 695 type aliases containing lambdas and other unusual expressions (#3949) diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index 299cc24a15f..ad51a3dad08 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -211,6 +211,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: # See note in docstring above. TL;DR this is ignored. self.convert = convert or lam_sub self.is_backtracking = False + self.last_token: Optional[int] = None def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None: """Prepare for parsing. @@ -236,6 +237,7 @@ def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None: self.rootnode: Optional[NL] = None self.used_names: Set[str] = set() self.proxy = proxy + self.last_token = None def addtoken(self, type: int, value: str, context: Context) -> bool: """Add a token; return True iff this is the end of the program.""" @@ -317,6 +319,7 @@ def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> boo dfa, state, node = self.stack[-1] states, first = dfa # Done with this token + self.last_token = type return False else: @@ -343,9 +346,23 @@ def classify(self, type: int, value: str, context: Context) -> List[int]: return [self.grammar.keywords[value]] elif value in self.grammar.soft_keywords: assert type in self.grammar.tokens + # Current soft keywords (match, case, type) can only appear at the + # beginning of a statement. So as a shortcut, don't try to treat them + # like keywords in any other context. + # ('_' is also a soft keyword in the real grammar, but for our grammar + # it's just an expression, so we don't need to treat it specially.) + if self.last_token not in ( + None, + token.INDENT, + token.DEDENT, + token.NEWLINE, + token.SEMI, + token.COLON, + ): + return [self.grammar.tokens[type]] return [ - self.grammar.soft_keywords[value], self.grammar.tokens[type], + self.grammar.soft_keywords[value], ] ilabel = self.grammar.tokens.get(type) diff --git a/tests/data/cases/pattern_matching_complex.py b/tests/data/cases/pattern_matching_complex.py index b4355c7333a..10b4d26e289 100644 --- a/tests/data/cases/pattern_matching_complex.py +++ b/tests/data/cases/pattern_matching_complex.py @@ -143,3 +143,7 @@ y = 1 case []: y = 2 +# issue 3790 +match (X.type, Y): + case _: + pass diff --git a/tests/data/cases/type_aliases.py b/tests/data/cases/type_aliases.py index 9631bfd5ccc..7c2009e8202 100644 --- a/tests/data/cases/type_aliases.py +++ b/tests/data/cases/type_aliases.py @@ -5,6 +5,8 @@ type Alias[T]=lambda: T type And[T]=T and T type IfElse[T]=T if T else T +type One = int; type Another = str +class X: type InClass = int type = aliased print(type(42)) @@ -16,6 +18,13 @@ type Alias[T] = lambda: T type And[T] = T and T type IfElse[T] = T if T else T +type One = int +type Another = str + + +class X: + type InClass = int + type = aliased print(type(42))