Skip to content

Commit

Permalink
Add --pyink-ipynb-unicode-escape option for unicode escaping
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 686918137
  • Loading branch information
AleksMat authored and copybara-github committed Oct 18, 2024
1 parent f5ea101 commit fd0cfac
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 33 deletions.
65 changes: 42 additions & 23 deletions patches/pyink.patch
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

COMPILED = Path(__file__).suffix in (".pyd", ".so")

@@ -338,6 +345,53 @@ def validate_regex(
@@ -338,6 +345,61 @@ def validate_regex(
),
)
@click.option(
Expand Down Expand Up @@ -67,6 +67,14 @@
+ ),
+)
+@click.option(
+ "--pyink-ipynb-unicode-escape",
+ is_flag=True,
+ help=(
+ "Enable serialization of Jupyter notebook content into a JSON form"
+ " where characters <, >, and & are unicode escaped."
+ ),
+)
+@click.option(
+ "--pyink-annotation-pragmas",
+ type=str,
+ multiple=True,
Expand All @@ -91,26 +99,28 @@
"--check",
is_flag=True,
help=(
@@ -530,6 +584,11 @@ def main( # noqa: C901
@@ -530,6 +592,12 @@ def main( # noqa: C901
preview: bool,
unstable: bool,
enable_unstable_feature: list[Preview],
+ pyink: bool,
+ pyink_indentation: str,
+ pyink_ipynb_indentation: str,
+ pyink_ipynb_unicode_escape: bool,
+ pyink_annotation_pragmas: list[str],
+ pyink_use_majority_quotes: bool,
quiet: bool,
verbose: bool,
required_version: Optional[str],
@@ -636,7 +695,15 @@ def main( # noqa: C901
@@ -636,7 +704,16 @@ def main( # noqa: C901
preview=preview,
unstable=unstable,
python_cell_magics=set(python_cell_magics),
- enabled_features=set(enable_unstable_feature),
+ is_pyink=pyink,
+ pyink_indentation=int(pyink_indentation),
+ pyink_ipynb_indentation=int(pyink_ipynb_indentation),
+ pyink_ipynb_unicode_escape=pyink_ipynb_unicode_escape,
+ pyink_annotation_pragmas=(
+ tuple(pyink_annotation_pragmas) or DEFAULT_ANNOTATION_PRAGMAS
+ ),
Expand All @@ -120,7 +130,7 @@
)

lines: list[tuple[int, int]] = []
@@ -1132,6 +1199,17 @@ def validate_metadata(nb: MutableMapping
@@ -1132,6 +1209,17 @@ def validate_metadata(nb: MutableMapping
if language is not None and language != "python":
raise NothingChanged from None

Expand All @@ -138,15 +148,15 @@

def format_ipynb_string(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
"""Format Jupyter notebook.
@@ -1143,7 +1221,6 @@ def format_ipynb_string(src_contents: st
@@ -1143,7 +1231,6 @@ def format_ipynb_string(src_contents: st
raise NothingChanged

trailing_newline = src_contents[-1] == "\n"
- modified = False
nb = json.loads(src_contents)
validate_metadata(nb)
for cell in nb["cells"]:
@@ -1155,14 +1232,15 @@ def format_ipynb_string(src_contents: st
@@ -1155,14 +1242,17 @@ def format_ipynb_string(src_contents: st
pass
else:
cell["source"] = dst.splitlines(keepends=True)
Expand All @@ -164,13 +174,15 @@
+ dst_contents = json.dumps(
+ nb, indent=mode.pyink_ipynb_indentation, ensure_ascii=False
+ )
+ if mode.pyink_ipynb_unicode_escape:
+ dst_contents = ink.unicode_escape_json(dst_contents)
+ if trailing_newline:
+ dst_contents = dst_contents + "\n"
+ return dst_contents


def format_str(
@@ -1223,6 +1301,8 @@ def _format_str_once(
@@ -1223,6 +1313,8 @@ def _format_str_once(
future_imports = get_future_imports(src_node)
versions = detect_target_versions(src_node, future_imports=future_imports)

Expand Down Expand Up @@ -973,7 +985,7 @@


@dataclass
@@ -237,12 +261,20 @@ class Mode:
@@ -237,12 +261,21 @@ class Mode:
target_versions: set[TargetVersion] = field(default_factory=set)
line_length: int = DEFAULT_LINE_LENGTH
string_normalization: bool = True
Expand All @@ -990,11 +1002,12 @@
+ is_pyink: bool = False
+ pyink_indentation: Literal[2, 4] = 4
+ pyink_ipynb_indentation: Literal[1, 2] = 1
+ pyink_ipynb_unicode_escape: bool = False
+ pyink_annotation_pragmas: tuple[str, ...] = DEFAULT_ANNOTATION_PRAGMAS
unstable: bool = False
enabled_features: set[Preview] = field(default_factory=set)

@@ -254,6 +286,9 @@ class Mode:
@@ -254,6 +287,9 @@ class Mode:
except those in UNSTABLE_FEATURES are enabled. Any features in
`self.enabled_features` are also enabled.
"""
Expand All @@ -1004,7 +1017,7 @@
if self.unstable:
return True
if feature in self.enabled_features:
@@ -285,12 +320,27 @@ class Mode:
@@ -285,12 +321,28 @@ class Mode:
version_str,
str(self.line_length),
str(int(self.string_normalization)),
Expand All @@ -1019,6 +1032,7 @@
+ str(int(self.is_pyink)),
+ str(self.pyink_indentation),
+ str(self.pyink_ipynb_indentation),
+ str(int(self.pyink_ipynb_unicode_escape)),
+ sha256(str(self.pyink_annotation_pragmas).encode()).hexdigest()[:8],
features_and_magics,
]
Expand Down Expand Up @@ -1439,18 +1453,23 @@
format_cell,
format_file_contents,
format_file_in_place,
@@ -27,8 +28,10 @@ pytest.importorskip("IPython", reason="I
@@ -27,8 +28,15 @@ pytest.importorskip("IPython", reason="I
pytest.importorskip("tokenize_rt", reason="tokenize-rt is an optional dependency")

JUPYTER_MODE = Mode(is_ipynb=True)
+PYINK_JUPYTER_MODE = Mode(is_ipynb=True, pyink_indentation=2, pyink_ipynb_indentation=2)
+PYINK_JUPYTER_MODE = Mode(
+ is_ipynb=True,
+ pyink_indentation=2,
+ pyink_ipynb_indentation=2,
+ pyink_ipynb_unicode_escape=True,
+)

EMPTY_CONFIG = DATA_DIR / "empty_pyproject.toml"
+PYINK_OVERRIDE_CONFIG = DATA_DIR / "pyink_configs" / "overrides.toml"

runner = CliRunner()

@@ -174,6 +177,22 @@ def test_cell_magic_with_magic() -> None
@@ -174,6 +182,22 @@ def test_cell_magic_with_magic() -> None


@pytest.mark.parametrize(
Expand All @@ -1473,7 +1492,7 @@
"mode, expected_output, expectation",
[
pytest.param(
@@ -224,6 +243,13 @@ def test_cell_magic_with_custom_python_m
@@ -224,6 +248,13 @@ def test_cell_magic_with_custom_python_m
format_cell(src, fast=True, mode=JUPYTER_MODE)


Expand All @@ -1487,7 +1506,7 @@
def test_cell_magic_nested() -> None:
src = "%%time\n%%time\n2+2"
result = format_cell(src, fast=True, mode=JUPYTER_MODE)
@@ -397,6 +423,45 @@ def test_entire_notebook_no_trailing_new
@@ -397,6 +428,45 @@ def test_entire_notebook_no_trailing_new
assert result == expected


Expand All @@ -1513,8 +1532,8 @@
+ ' "%%time\\n",\n'
+ ' "\\n",\n'
+ ' "a = 1\\n",\n'
+ ' "if a == 1:\\n",\n'
+ ' " print(\\"\\")"\n'
+ ' "if a \\u003c 1 or a \\u003e 1:\\n",\n'
+ ' " print(\\"\\u0026\\u003c\\u003e\\")"\n'
+ " ]\n"
+ " }\n"
+ " ],\n"
Expand All @@ -1533,7 +1552,7 @@
def test_entire_notebook_without_changes() -> None:
content = read_jupyter_notebook("jupyter", "notebook_without_changes")
with pytest.raises(NothingChanged):
@@ -448,6 +513,30 @@ def test_ipynb_diff_with_no_change() ->
@@ -448,6 +518,30 @@ def test_ipynb_diff_with_no_change() ->
assert expected in result.output


Expand All @@ -1546,18 +1565,18 @@
+ f"--config={PYINK_OVERRIDE_CONFIG}",
+ ],
+ )
+ expected = """00:00:cell_1
+ expected = """cell_1
+@@ -1,6 +1,5 @@
+- %%time
++%%time
+
+-a=1
+-if a ==1:
+- print("")
+-if a <1 or a>1:
+- print("&<>")
+-
++a = 1
++if a == 1:
++ print("")"""
++if a < 1 or a > 1:
++ print("&<>")"""
+ assert expected in result.output
+
+
Expand Down
12 changes: 12 additions & 0 deletions src/pyink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,14 @@ def validate_regex(
" notebook."
),
)
@click.option(
"--pyink-ipynb-unicode-escape",
is_flag=True,
help=(
"Enable serialization of Jupyter notebook content into a JSON form"
" where characters <, >, and & are unicode escaped."
),
)
@click.option(
"--pyink-annotation-pragmas",
type=str,
Expand Down Expand Up @@ -587,6 +595,7 @@ def main( # noqa: C901
pyink: bool,
pyink_indentation: str,
pyink_ipynb_indentation: str,
pyink_ipynb_unicode_escape: bool,
pyink_annotation_pragmas: list[str],
pyink_use_majority_quotes: bool,
quiet: bool,
Expand Down Expand Up @@ -698,6 +707,7 @@ def main( # noqa: C901
is_pyink=pyink,
pyink_indentation=int(pyink_indentation),
pyink_ipynb_indentation=int(pyink_ipynb_indentation),
pyink_ipynb_unicode_escape=pyink_ipynb_unicode_escape,
pyink_annotation_pragmas=(
tuple(pyink_annotation_pragmas) or DEFAULT_ANNOTATION_PRAGMAS
),
Expand Down Expand Up @@ -1238,6 +1248,8 @@ def format_ipynb_string(src_contents: str, *, fast: bool, mode: Mode) -> FileCon
dst_contents = json.dumps(
nb, indent=mode.pyink_ipynb_indentation, ensure_ascii=False
)
if mode.pyink_ipynb_unicode_escape:
dst_contents = ink.unicode_escape_json(dst_contents)
if trailing_newline:
dst_contents = dst_contents + "\n"
return dst_contents
Expand Down
20 changes: 20 additions & 0 deletions src/pyink/ink.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@ def get_code_start(src: str) -> str:
return ""


def unicode_escape_json(src: str) -> str:
"""Escapes problematic unicode characters in JSON string.
This mimicks the implementation in Colab backend and converts characters
<, >, and & to their unicode representations. More info in
go/unicode-escaping-in-colab.
Args:
src: A serialized JSON string.
Returns:
A serialized JSON string with unicode escaped characters.
"""
def _match_to_unicode(match: re.Match[str]) -> str:
char = match.group(0)
return f"\\u{hex(ord(char))[2:].zfill(4)}"

return re.sub(r"[<>&]", _match_to_unicode, src)


def convert_unchanged_lines(src_node: Node, lines: Collection[tuple[int, int]]):
"""Converts unchanged lines to STANDALONE_COMMENT.
Expand Down
2 changes: 2 additions & 0 deletions src/pyink/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ class Mode:
is_pyink: bool = False
pyink_indentation: Literal[2, 4] = 4
pyink_ipynb_indentation: Literal[1, 2] = 1
pyink_ipynb_unicode_escape: bool = False
pyink_annotation_pragmas: tuple[str, ...] = DEFAULT_ANNOTATION_PRAGMAS
unstable: bool = False
enabled_features: set[Preview] = field(default_factory=set)
Expand Down Expand Up @@ -331,6 +332,7 @@ def get_cache_key(self) -> str:
str(int(self.is_pyink)),
str(self.pyink_indentation),
str(self.pyink_ipynb_indentation),
str(int(self.pyink_ipynb_unicode_escape)),
sha256(str(self.pyink_annotation_pragmas).encode()).hexdigest()[:8],
features_and_magics,
]
Expand Down
4 changes: 2 additions & 2 deletions tests/data/pyink_configs/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
" %%time\n",
"\n",
"a=1\n",
"if a ==1:\n",
" print(\"\")\n"
"if a <1 or a>1:\n",
" print(\"&<>\")\n"
]
}
],
Expand Down
1 change: 1 addition & 0 deletions tests/data/pyink_configs/overrides.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[tool.pyink]
pyink-indentation = 2
pyink-ipynb-indentation = 2
pyink-ipynb-unicode-escape = true
pyink-annotation-pragmas = ["@param", "type: ignore"]
21 changes: 13 additions & 8 deletions tests/test_ipynb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
pytest.importorskip("tokenize_rt", reason="tokenize-rt is an optional dependency")

JUPYTER_MODE = Mode(is_ipynb=True)
PYINK_JUPYTER_MODE = Mode(is_ipynb=True, pyink_indentation=2, pyink_ipynb_indentation=2)
PYINK_JUPYTER_MODE = Mode(
is_ipynb=True,
pyink_indentation=2,
pyink_ipynb_indentation=2,
pyink_ipynb_unicode_escape=True,
)

EMPTY_CONFIG = DATA_DIR / "empty_pyproject.toml"
PYINK_OVERRIDE_CONFIG = DATA_DIR / "pyink_configs" / "overrides.toml"
Expand Down Expand Up @@ -445,8 +450,8 @@ def test_entire_notebook_with_pyink_overrides() -> None:
' "%%time\\n",\n'
' "\\n",\n'
' "a = 1\\n",\n'
' "if a == 1:\\n",\n'
' " print(\\"\\")"\n'
' "if a \\u003c 1 or a \\u003e 1:\\n",\n'
' " print(\\"\\u0026\\u003c\\u003e\\")"\n'
" ]\n"
" }\n"
" ],\n"
Expand Down Expand Up @@ -522,18 +527,18 @@ def test_ipynb_diff_with_pyink_overrides() -> None:
f"--config={PYINK_OVERRIDE_CONFIG}",
],
)
expected = """00:00:cell_1
expected = """cell_1
@@ -1,6 +1,5 @@
- %%time
+%%time
-a=1
-if a ==1:
- print("")
-if a <1 or a>1:
- print("&<>")
-
+a = 1
+if a == 1:
+ print("")"""
+if a < 1 or a > 1:
+ print("&<>")"""
assert expected in result.output


Expand Down

0 comments on commit fd0cfac

Please sign in to comment.