diff --git a/CHANGELOG.md b/CHANGELOG.md index a9bec257a..f42aceddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,13 @@ Semantic versioning in our case means: But, in the future we might change the configuration names / logic, change the client facing API, change code conventions signigicantly, etc. - +## 0.16.0 + +### Features + +- Forbid using open without specifying encoding. + + ## 0.15.3 WIP ### Bugfixes diff --git a/docs/conf.py b/docs/conf.py index 04b8a1fe7..dfd77d3d4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- def _get_project_meta(): - with open('../pyproject.toml') as pyproject: + with open('../pyproject.toml', encoding='utf-8') as pyproject: file_contents = pyproject.read() return tomlkit.parse(file_contents)['tool']['poetry'] diff --git a/scripts/check_generic_visit.py b/scripts/check_generic_visit.py index 8c25a014b..d5aa54599 100644 --- a/scripts/check_generic_visit.py +++ b/scripts/check_generic_visit.py @@ -33,7 +33,7 @@ my_print('"self.generic_visit(node)" should be last statement here:') for fn, line in matches: - with open(fn, 'r') as fp: + with open(fn, 'r', encoding='utf-8') as fp: source = fp.read() lines = source.splitlines() highlighted = highlight( diff --git a/tests/fixtures/noqa/noqa.py b/tests/fixtures/noqa/noqa.py index f2c8ee77b..7861c9baa 100644 --- a/tests/fixtures/noqa/noqa.py +++ b/tests/fixtures/noqa/noqa.py @@ -381,7 +381,7 @@ def function_with_wrong_yield(): for literal in bad_concatenation: # noqa: WPS327, WPS328 continue -with open(bad_concatenation): # noqa: WPS328 +with open(bad_concatenation): # noqa: WPS328, WPS467 pass # noqa: WPS420 @@ -404,7 +404,7 @@ def some_other_function(): string_concat = 'a' + 'b' # noqa: WPS336 my_print(one == 'a' or one == 'b') # noqa: WPS514 -file_obj = open('filaname.py') # noqa: WPS515 +file_obj = open('filaname.py') # noqa: WPS515, WPS467 my_print(type(file_obj) == int) # noqa: WPS516 my_print(*[], **{'@': 1}) # noqa: WPS517, WPS445 @@ -549,7 +549,7 @@ def bad_default_values( for nodes[0] in (1, 2, 3): # noqa: WPS405 anti_wps428 = 1 -with open('some') as MyBadException.custom: # noqa: WPS406 +with open('some') as MyBadException.custom: # noqa: WPS406, WPS467 anti_wps428 = 1 diff --git a/tests/test_checker/test_noqa.py b/tests/test_checker/test_noqa.py index 9ef12b3c8..000ae3d4c 100644 --- a/tests/test_checker/test_noqa.py +++ b/tests/test_checker/test_noqa.py @@ -246,6 +246,7 @@ 'WPS464': 0, # logically unacceptable. 'WPS465': 1, 'WPS466': 0, # defined in version specific table. + 'WPS467': 3, 'WPS500': 1, 'WPS501': 1, diff --git a/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py b/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py new file mode 100644 index 000000000..d0d043c7e --- /dev/null +++ b/tests/test_visitors/test_ast/test_attributes/test_open_encoding.py @@ -0,0 +1,88 @@ +import pytest + +from wemake_python_styleguide.violations.best_practices import ( + UnspecifiedEncodingViolation, +) +from wemake_python_styleguide.visitors.ast.attributes import EncodingVisitor + +unspecified_encoding_with = """ +with open('filename.txt') as fd: + fd.read() +""" + +specified_encoding_with = """ +with open('filename.txt', encoding='ascii') as fd: + fd.read() +""" + +unspecified_encoding_assign = """ +file = open('filename.txt', 'r') +""" + +specified_encoding_assign = """ +file = open('filename.txt', 'r', encoding='utf8') +""" + +specified_encoding_with_none = """ +with open('filename.txt', encoding=None) as fd: + fd.read() +""" + +unspecified_encoding_with_multiple = """ +with open('filename.txt', 'w', -1) as fd: + fd.read() +""" + +specified_encoding_with_multiple = """ +with open('filename.txt', 'w', -1, None) as fd: + fd.read() +""" + +unspecified_encoding_assign_multiple = """ +file = open('filename.txt', 'w', -1) +""" + +specified_encoding_assign_multiple = """ +file = open('filename.txt', 'w', -1, None) +""" + + +@pytest.mark.parametrize('code', [ + unspecified_encoding_with, + unspecified_encoding_assign, + unspecified_encoding_with_multiple, + unspecified_encoding_assign_multiple, +]) +def test_unspecified_encoding( + assert_errors, + code, + default_options, + parse_ast_tree, +): + """Testing open encoding is unspecified.""" + tree = parse_ast_tree(code) + visitor = EncodingVisitor(default_options, tree=tree) + visitor.run() + + assert_errors(visitor, [UnspecifiedEncodingViolation]) + + +@pytest.mark.parametrize('code', [ + specified_encoding_with, + specified_encoding_assign, + specified_encoding_with_none, + specified_encoding_with_multiple, + specified_encoding_assign_multiple, +]) +def test_specified_encoding( + assert_errors, + code, + default_options, + parse_ast_tree, +): + """Testing open encoding is specified.""" + tree = parse_ast_tree(code) + visitor = EncodingVisitor(default_options, tree=tree) + visitor.run() + + assert_errors(visitor, []) diff --git a/wemake_python_styleguide/presets/types/tree.py b/wemake_python_styleguide/presets/types/tree.py index 90262ea94..e4b8f205f 100644 --- a/wemake_python_styleguide/presets/types/tree.py +++ b/wemake_python_styleguide/presets/types/tree.py @@ -46,6 +46,7 @@ loops.SyncForLoopVisitor, attributes.WrongAttributeVisitor, + attributes.EncodingVisitor, annotations.WrongAnnotationVisitor, functions.WrongFunctionCallVisitor, diff --git a/wemake_python_styleguide/violations/best_practices.py b/wemake_python_styleguide/violations/best_practices.py index 564fb3703..93ce739b4 100644 --- a/wemake_python_styleguide/violations/best_practices.py +++ b/wemake_python_styleguide/violations/best_practices.py @@ -83,6 +83,7 @@ EmptyCommentViolation BitwiseAndBooleanMixupViolation NewStyledDecoratorViolation + UnspecifiedEncodingViolation Best practices -------------- @@ -154,6 +155,7 @@ .. autoclass:: EmptyCommentViolation .. autoclass:: BitwiseAndBooleanMixupViolation .. autoclass:: NewStyledDecoratorViolation +.. autoclass:: UnspecifiedEncodingViolation """ @@ -2572,3 +2574,31 @@ def my_function(): ... error_template = 'Found new-styled decorator' code = 466 + + +@final +class UnspecifiedEncodingViolation(ASTViolation): + """ + Forbid using open without specifying encoding. + + Reasoning: + Inspired by https://www.python.org/dev/peps/pep-0597/ + Not specifying the encoding could be considered a bug. + Developers using macOS or Linux may forget that the + default encoding is not always UTF-8. + + Solution: + Specify the encoding in open. + + Example: + # Correct: + open('filename.txt', encoding='utf8') + + # Wrong: + with open('filename.txt') + + .. versionadded:: 0.16.0 + """ + + error_template = 'Found `open()` call without specified encoding' + code = 467 diff --git a/wemake_python_styleguide/visitors/ast/attributes.py b/wemake_python_styleguide/visitors/ast/attributes.py index b3c74fd85..30f9b2687 100644 --- a/wemake_python_styleguide/visitors/ast/attributes.py +++ b/wemake_python_styleguide/visitors/ast/attributes.py @@ -1,11 +1,13 @@ import ast -from typing import ClassVar, FrozenSet +from typing import ClassVar, FrozenSet, List from typing_extensions import final from wemake_python_styleguide.logic.naming import access +from wemake_python_styleguide.logic.tree.functions import given_function_called from wemake_python_styleguide.violations.best_practices import ( ProtectedAttributeViolation, + UnspecifiedEncodingViolation, ) from wemake_python_styleguide.violations.oop import ( DirectMagicAttributeAccessViolation, @@ -73,3 +75,27 @@ def _check_magic_attribute(self, node: ast.Attribute) -> None: self._ensure_attribute_type( node, DirectMagicAttributeAccessViolation, ) + + +@final +class EncodingVisitor(BaseNodeVisitor): + """Check if open function has the encoding parameter.""" + + def visit_Call(self, node: ast.Call) -> None: + """Visit calls and finds if it is an open function.""" + if given_function_called(node, {'open'}): + is_keyword = self._check_keywords(node.keywords) + is_positional = self._check_args(node.args) + + if not is_positional and not is_keyword: + self.add_violation(UnspecifiedEncodingViolation(node)) + + self.generic_visit(node) + + def _check_keywords(self, keywords: List[ast.keyword]) -> bool: + """Check if there is an encoding parameter.""" + return any(keyword.arg == 'encoding' for keyword in keywords) + + def _check_args(self, positionals: List[ast.expr]) -> bool: + """Check if there is an encoding parameter.""" + return len(positionals) > 3 and isinstance(positionals[3], ast.Constant)