diff --git a/src/index.ts b/src/index.ts index ea048f8..62af82d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -129,15 +129,15 @@ /* Use as a command line script */ /* npm run start:dev -- test.py */ -import {Tokenizer} from "./tokenizer"; -import {Parser} from "./parser"; -import {Translator} from "./translator"; -import {Program} from "estree"; -import {Resolver} from "./resolver"; +import { Tokenizer } from "./tokenizer"; +import { Parser } from "./parser"; +import { Translator } from "./translator"; +import { Program } from "estree"; +import { Resolver } from "./resolver"; export function parsePythonToEstreeAst(code: string, - variant: number = 1, - doValidate: boolean = false) : Program { + variant: number = 1, + doValidate: boolean = false): Program { const script = code + '\n' const tokenizer = new Tokenizer(script) const tokens = tokenizer.scanEverything() @@ -152,7 +152,6 @@ export function parsePythonToEstreeAst(code: string, export * from './errors'; - // import {ParserErrors, ResolverErrors, TokenizerErrors} from "./errors"; // import fs from "fs"; // const BaseParserError = ParserErrors.BaseParserError; diff --git a/src/parser.ts b/src/parser.ts index 370b6d0..48f7372 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -39,10 +39,10 @@ IN THE SOFTWARE. **/ -import {SPECIAL_IDENTIFIER_TOKENS, Token} from "./tokenizer"; -import {TokenType} from "./tokens"; -import {ExprNS, StmtNS} from "./ast-types"; -import {ParserErrors} from "./errors"; +import { SPECIAL_IDENTIFIER_TOKENS, Token } from "./tokenizer"; +import { TokenType } from "./tokens"; +import { ExprNS, StmtNS } from "./ast-types"; +import { ParserErrors } from "./errors"; type Expr = ExprNS.Expr; type Stmt = StmtNS.Stmt; @@ -156,7 +156,7 @@ export class Parser { } else if (this.check(TokenType.NAME, ...PSEUD_NAMES, TokenType.NUMBER, TokenType.PASS, TokenType.BREAK, TokenType.CONTINUE, TokenType.RETURN, TokenType.FROM, TokenType.GLOBAL, TokenType.NONLOCAL, - TokenType.ASSERT, TokenType.LPAR, ...SPECIAL_IDENTIFIER_TOKENS)) { + TokenType.ASSERT, TokenType.LPAR, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) { return this.simple_stmt(); } const startToken = this.peek(); @@ -165,7 +165,7 @@ export class Parser { this.parse_invalid(startToken, endToken); } catch (e) { if (e instanceof ParserErrors.BaseParserError) { - throw(e) + throw (e) } } throw new ParserErrors.GenericUnexpectedSyntaxError(startToken.line, startToken.col, this.source, @@ -255,7 +255,7 @@ export class Parser { res = new StmtNS.NonLocal(startToken, startToken, this.advance()); } else if (this.match(TokenType.ASSERT)) { res = new StmtNS.Assert(startToken, startToken, this.test()); - } else if (this.check(TokenType.LPAR, TokenType.NUMBER, ...SPECIAL_IDENTIFIER_TOKENS)) { + } else if (this.check(TokenType.LPAR, TokenType.NUMBER, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) { res = new StmtNS.SimpleExpr(startToken, startToken, this.test()); } else { throw new Error("Unreachable code path"); diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ca37c51..842dbbb 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -150,6 +150,9 @@ export class Tokenizer { private advance() { const res = this.source[this.current]; + if (this.peek() == '\n') { + this.line += 1; + } this.current += 1; this.col += 1; return res; @@ -178,10 +181,19 @@ export class Tokenizer { private addStringToken(type: TokenType) { const line = this.line const col = this.col; + // Remove starting and ending quotes when slicing + // Ensures that string is parsed properly const lexeme = this.source.slice(this.start + 1, this.current - 1); this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length)) } + private addMultiLineStringToken(type: TokenType) { + const line = this.line + const col = this.col; + // Remove three starting and ending quotes when slicing + const lexeme = this.source.slice(this.start + 3, this.current - 3); + this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length)) + } // Checks that the current character matches a pattern. If so the character is consumed, else nothing is consumed. private matches(pattern: string): boolean { if (this.isAtEnd()) { @@ -432,26 +444,45 @@ export class Tokenizer { break; // String case '"': - while (this.peek() != '"' && this.peek() != '\n' && !this.isAtEnd()) { - this.advance(); - } - if (this.peek() === '\n' || this.isAtEnd()) { - throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current); - } - // Consume closing " - this.advance(); - this.addStringToken(TokenType.STRING); - break; - case '\'': - while (this.peek() != '\'' && this.peek() != '\n' && !this.isAtEnd()) { + case "'": + let quote = c; + if (this.peek() == quote) { // handle multi-line string + this.advance(); // second quote found and consumed + if (this.peek() != quote) { // empty string "" + this.addStringToken(TokenType.STRING); + break; + } + this.advance(); // third quote consumed + while (this.peek() != quote && !this.isAtEnd()) { + this.advance(); // advance until ending quote found + } + if (this.isAtEnd()) { + throw new TokenizerErrors.UnterminatedStringError(this.line, + this.col, this.source, this.start, this.current); + } + this.advance(); // consume first ending quote + if (this.peek() != quote) { + throw new TokenizerErrors.UnterminatedStringError(this.line, + this.col, this.source, this.start, this.current); + } + this.advance(); // consume second ending quote + if (this.peek() != quote) { + throw new TokenizerErrors.UnterminatedStringError(this.line, + this.col, this.source, this.start, this.current); + } + this.advance(); // consume third ending quote + this.addMultiLineStringToken(TokenType.STRING); + } else { // other case, single-line string + while (this.peek() != quote && this.peek() != '\n' && !this.isAtEnd()) { + this.advance(); + } + if (this.peek() === '\n' || this.isAtEnd()) { + throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current); + } + // Consume Closing " this.advance(); + this.addStringToken(TokenType.STRING); } - if (this.peek() === '\n' || this.isAtEnd()) { - throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current); - } - // Consume closing ' - this.advance(); - this.addStringToken(TokenType.STRING); break; // Number... I wish JS had match statements :( case '0':