Skip to content

Commit

Permalink
Fix skipping Jupyter cells with unknown %% magic
Browse files Browse the repository at this point in the history
  • Loading branch information
AleksMat committed Sep 18, 2024
1 parent bbfdba3 commit 817e9d4
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 26 deletions.
27 changes: 1 addition & 26 deletions src/black/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
put_trailing_semicolon_back,
remove_trailing_semicolon,
unmask_cell,
validate_cell,
)
from black.linegen import LN, LineGenerator, transform_line
from black.lines import EmptyLineTracker, LinesBlock
Expand Down Expand Up @@ -1084,32 +1085,6 @@ def format_file_contents(
return dst_contents


def validate_cell(src: str, mode: Mode) -> None:
"""Check that cell does not already contain TransformerManager transformations,
or non-Python cell magics, which might cause tokenizer_rt to break because of
indentations.
If a cell contains ``!ls``, then it'll be transformed to
``get_ipython().system('ls')``. However, if the cell originally contained
``get_ipython().system('ls')``, then it would get transformed in the same way:
>>> TransformerManager().transform_cell("get_ipython().system('ls')")
"get_ipython().system('ls')\n"
>>> TransformerManager().transform_cell("!ls")
"get_ipython().system('ls')\n"
Due to the impossibility of safely roundtripping in such situations, cells
containing transformed magics will be ignored.
"""
if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
raise NothingChanged
if (
src[:2] == "%%"
and src.split()[0][2:] not in PYTHON_CELL_MAGICS | mode.python_cell_magics
):
raise NothingChanged


def format_cell(src: str, *, fast: bool, mode: Mode) -> str:
"""Format code in given cell of Jupyter notebook.
Expand Down
45 changes: 45 additions & 0 deletions src/black/handle_ipynb_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ast
import collections
import dataclasses
import re
import secrets
import sys
from functools import lru_cache
Expand All @@ -14,6 +15,7 @@
else:
from typing_extensions import TypeGuard

from black.mode import Mode
from black.output import out
from black.report import NothingChanged

Expand Down Expand Up @@ -64,6 +66,34 @@ def jupyter_dependencies_are_installed(*, warn: bool) -> bool:
return installed


def validate_cell(src: str, mode: Mode) -> None:
"""Check that cell does not already contain TransformerManager transformations,
or non-Python cell magics, which might cause tokenizer_rt to break because of
indentations.
If a cell contains ``!ls``, then it'll be transformed to
``get_ipython().system('ls')``. However, if the cell originally contained
``get_ipython().system('ls')``, then it would get transformed in the same way:
>>> TransformerManager().transform_cell("get_ipython().system('ls')")
"get_ipython().system('ls')\n"
>>> TransformerManager().transform_cell("!ls")
"get_ipython().system('ls')\n"
Due to the impossibility of safely roundtripping in such situations, cells
containing transformed magics will be ignored.
"""
if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
raise NothingChanged

line = _get_code_start(src)
if line.startswith("%%") and (
line.split(maxsplit=1)[0][2:]
not in PYTHON_CELL_MAGICS | mode.python_cell_magics
):
raise NothingChanged


def remove_trailing_semicolon(src: str) -> tuple[str, bool]:
"""Remove trailing semicolon from Jupyter notebook cell.
Expand Down Expand Up @@ -276,6 +306,21 @@ def unmask_cell(src: str, replacements: list[Replacement]) -> str:
return src


def _get_code_start(src: str) -> str:
"""Provides the first line where the code starts.
Iterates over lines of code until it finds the first line that doesn't
contain only empty spaces and comments. It removes any empty spaces at the
start of the line and returns it. If such line doesn't exist, it returns an
empty string.
"""
for match in re.finditer(".+", src):
line = match.group(0).lstrip()
if line and not line.startswith("#"):
return line
return ""


def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
"""Check if attribute is IPython magic.
Expand Down
16 changes: 16 additions & 0 deletions tests/test_ipynb.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,22 @@ def test_cell_magic_with_custom_python_magic(
assert result == expected_output


@pytest.mark.parametrize(
"src",
(
" %%custom_magic \nx=2",
"\n\n%%custom_magic\nx=2",
"# comment\n%%custom_magic\nx=2",
"\n \n # comment with %%time\n\t\n %%custom_magic # comment \nx=2",
),
)
def test_cell_magic_with_custom_python_magic_after_spaces_and_comments_noop(
src: str,
) -> None:
with pytest.raises(NothingChanged):
format_cell(src, fast=True, mode=JUPYTER_MODE)


def test_cell_magic_nested() -> None:
src = "%%time\n%%time\n2+2"
result = format_cell(src, fast=True, mode=JUPYTER_MODE)
Expand Down

0 comments on commit 817e9d4

Please sign in to comment.