Skip to content

Commit

Permalink
Fix issue #8 (#16)
Browse files Browse the repository at this point in the history
* Fix issue8.

* Cleaning up code

* Reduced nested conditionals

* Update tokenizer.ts

* Fixed typo

* Fixed typos
  • Loading branch information
JJtan2002 authored Mar 17, 2024
1 parent 17c51ee commit 215754a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 33 deletions.
15 changes: 7 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@
/* Use as a command line script */
/* npm run start:dev -- test.py */

import {Tokenizer} from "./tokenizer";
import {Parser} from "./parser";
import {Translator} from "./translator";
import {Program} from "estree";
import {Resolver} from "./resolver";
import { Tokenizer } from "./tokenizer";
import { Parser } from "./parser";
import { Translator } from "./translator";
import { Program } from "estree";
import { Resolver } from "./resolver";

export function parsePythonToEstreeAst(code: string,
variant: number = 1,
doValidate: boolean = false) : Program {
variant: number = 1,
doValidate: boolean = false): Program {
const script = code + '\n'
const tokenizer = new Tokenizer(script)
const tokens = tokenizer.scanEverything()
Expand All @@ -152,7 +152,6 @@ export function parsePythonToEstreeAst(code: string,

export * from './errors';


// import {ParserErrors, ResolverErrors, TokenizerErrors} from "./errors";
// import fs from "fs";
// const BaseParserError = ParserErrors.BaseParserError;
Expand Down
14 changes: 7 additions & 7 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
IN THE SOFTWARE.
**/

import {SPECIAL_IDENTIFIER_TOKENS, Token} from "./tokenizer";
import {TokenType} from "./tokens";
import {ExprNS, StmtNS} from "./ast-types";
import {ParserErrors} from "./errors";
import { SPECIAL_IDENTIFIER_TOKENS, Token } from "./tokenizer";
import { TokenType } from "./tokens";
import { ExprNS, StmtNS } from "./ast-types";
import { ParserErrors } from "./errors";

type Expr = ExprNS.Expr;
type Stmt = StmtNS.Stmt;
Expand Down Expand Up @@ -156,7 +156,7 @@ export class Parser {
} else if (this.check(TokenType.NAME, ...PSEUD_NAMES, TokenType.NUMBER,
TokenType.PASS, TokenType.BREAK, TokenType.CONTINUE,
TokenType.RETURN, TokenType.FROM, TokenType.GLOBAL, TokenType.NONLOCAL,
TokenType.ASSERT, TokenType.LPAR, ...SPECIAL_IDENTIFIER_TOKENS)) {
TokenType.ASSERT, TokenType.LPAR, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
return this.simple_stmt();
}
const startToken = this.peek();
Expand All @@ -165,7 +165,7 @@ export class Parser {
this.parse_invalid(startToken, endToken);
} catch (e) {
if (e instanceof ParserErrors.BaseParserError) {
throw(e)
throw (e)
}
}
throw new ParserErrors.GenericUnexpectedSyntaxError(startToken.line, startToken.col, this.source,
Expand Down Expand Up @@ -255,7 +255,7 @@ export class Parser {
res = new StmtNS.NonLocal(startToken, startToken, this.advance());
} else if (this.match(TokenType.ASSERT)) {
res = new StmtNS.Assert(startToken, startToken, this.test());
} else if (this.check(TokenType.LPAR, TokenType.NUMBER, ...SPECIAL_IDENTIFIER_TOKENS)) {
} else if (this.check(TokenType.LPAR, TokenType.NUMBER, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
res = new StmtNS.SimpleExpr(startToken, startToken, this.test());
} else {
throw new Error("Unreachable code path");
Expand Down
67 changes: 49 additions & 18 deletions src/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ export class Tokenizer {

private advance() {
const res = this.source[this.current];
if (this.peek() == '\n') {
this.line += 1;
}
this.current += 1;
this.col += 1;
return res;
Expand Down Expand Up @@ -178,10 +181,19 @@ export class Tokenizer {
private addStringToken(type: TokenType) {
const line = this.line
const col = this.col;
// Remove starting and ending quotes when slicing
// Ensures that string is parsed properly
const lexeme = this.source.slice(this.start + 1, this.current - 1);
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
}

private addMultiLineStringToken(type: TokenType) {
const line = this.line
const col = this.col;
// Remove three starting and ending quotes when slicing
const lexeme = this.source.slice(this.start + 3, this.current - 3);
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
}
// Checks that the current character matches a pattern. If so the character is consumed, else nothing is consumed.
private matches(pattern: string): boolean {
if (this.isAtEnd()) {
Expand Down Expand Up @@ -432,26 +444,45 @@ export class Tokenizer {
break;
// String
case '"':
while (this.peek() != '"' && this.peek() != '\n' && !this.isAtEnd()) {
this.advance();
}
if (this.peek() === '\n' || this.isAtEnd()) {
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
}
// Consume closing "
this.advance();
this.addStringToken(TokenType.STRING);
break;
case '\'':
while (this.peek() != '\'' && this.peek() != '\n' && !this.isAtEnd()) {
case "'":
let quote = c;
if (this.peek() == quote) { // handle multi-line string
this.advance(); // second quote found and consumed
if (this.peek() != quote) { // empty string ""
this.addStringToken(TokenType.STRING);
break;
}
this.advance(); // third quote consumed
while (this.peek() != quote && !this.isAtEnd()) {
this.advance(); // advance until ending quote found
}
if (this.isAtEnd()) {
throw new TokenizerErrors.UnterminatedStringError(this.line,
this.col, this.source, this.start, this.current);
}
this.advance(); // consume first ending quote
if (this.peek() != quote) {
throw new TokenizerErrors.UnterminatedStringError(this.line,
this.col, this.source, this.start, this.current);
}
this.advance(); // consume second ending quote
if (this.peek() != quote) {
throw new TokenizerErrors.UnterminatedStringError(this.line,
this.col, this.source, this.start, this.current);
}
this.advance(); // consume third ending quote
this.addMultiLineStringToken(TokenType.STRING);
} else { // other case, single-line string
while (this.peek() != quote && this.peek() != '\n' && !this.isAtEnd()) {
this.advance();
}
if (this.peek() === '\n' || this.isAtEnd()) {
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
}
// Consume Closing "
this.advance();
this.addStringToken(TokenType.STRING);
}
if (this.peek() === '\n' || this.isAtEnd()) {
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
}
// Consume closing '
this.advance();
this.addStringToken(TokenType.STRING);
break;
// Number... I wish JS had match statements :(
case '0':
Expand Down

0 comments on commit 215754a

Please sign in to comment.