chore: Add ruff lint configuration

open-contracting · Sep 16, 2024 · 2103289 · 2103289
1 parent f3dd678
commit 2103289
Show file tree

Hide file tree

Showing 12 changed files with 120 additions and 127 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,23 @@ line-length = 119
 target-version = "py39"
 
 [tool.ruff.lint]
-select = ["E", "C4", "F", "I", "W"]
+select = ["ALL"]
+ignore = [
+    "ANN", "C901", "COM812", "D203", "D212", "D415", "EM", "PERF203", "PLR091", "Q000",
+    "PLR2004", "PLW2901", "D1", "D205",
+    "PTH",
+    "FIX002",  # todo
+]
+
+[tool.ruff.lint.flake8-builtins]
+builtins-ignorelist = ["copyright"]
+
+[tool.ruff.lint.per-file-ignores]
+"docs/conf.py" = ["D100", "INP001"]
+"tests/*" = [
+    "ARG001", "D", "FBT003", "INP001", "PLR2004", "S", "TRY003",
+]
+"spoonbill/cli.py" = ["ARG002"]  # click
 
 [tool.pytest.ini_options]
 addopts = '--doctest-modules'
diff --git a/spoonbill/__init__.py b/spoonbill/__init__.py
@@ -17,7 +17,9 @@
 
 
 class FileAnalyzer:
-    """Main utility for analyzing files
+    """
+    Main utility for analyzing files.
+
     :param workdir: Working directory
     :param schema: Json schema file to use with data
     :param root_tables: Path configuration which should become root tables
@@ -53,8 +55,10 @@ def __init__(
         self.pkg_type = pkg_type
         self.order = None
 
-    def analyze_file(self, filenames, *, with_preview=True):
-        """Analyze provided file
+    def analyze_file(self, filenames, *, with_preview=True):  # noqa: ARG002
+        """
+        Analyze provided file.
+
         :param filename: Input filename
         :param with_preview: Generate preview during analysis
         """
@@ -89,7 +93,9 @@ def analyze_file(self, filenames, *, with_preview=True):
         self.sort_tables()
 
     def dump_to_file(self, filename):
-        """Save analyzed information to file
+        """
+        Save analyzed information to file.
+
         :param filename: Output filename in working directory
         """
         path = self.workdir / filename
@@ -122,10 +128,7 @@ def parse_schema(self, input_format, schema=None):
         self.pkg_type = pkg_type
 
     def sort_tables(self):
-        """
-        Sort tables according to order of arrays in schema
-        :return:
-        """
+        """Sort tables according to order of arrays in schema."""
         self.order = get_order(self.spec.schema["properties"].keys())
         out_schema_tables = {
             name: table for name, table in self.spec.tables.items() if name.split("_")[0] not in self.order
@@ -146,7 +149,9 @@ def sort_tables(self):
 
 
 class FileFlattener:
-    """Main utility for flattening files
+    """
+    Main utility for flattening files.
+
     :param workdir: Working directory
     :param options: Flattening configuration
     :param analyzer: Analyzed data object
@@ -196,7 +201,9 @@ def _flatten(self, filenames, writers):
                     yield count
 
     def flatten_file(self, filename):
-        """Flatten file
+        """
+        Flatten file.
+
         :param filename: Input filename in working directory
         """
         workdir = self.workdir

diff --git a/spoonbill/cli.py b/spoonbill/cli.py
@@ -1,4 +1,4 @@
-"""cli.py - Command line interface related routines"""
+"""Command line interface-related routines."""
 
 import logging
 import pathlib
@@ -22,7 +22,7 @@
 
 
 class CommaSeparated(click.ParamType):
-    """Click option type to convert comma separated string into list"""
+    """Click option type to convert comma-separated string into list."""
 
     name = "comma"
 
@@ -188,7 +188,7 @@ def cli(
     human,
     language,
 ):
-    """Spoonbill cli entry point"""
+    """Spoonbill CLI entry point."""
     if csv:
         csv = pathlib.Path(csv).resolve()
         if not csv.exists():

diff --git a/spoonbill/flatten.py b/spoonbill/flatten.py
@@ -14,7 +14,8 @@
 
 @dataclass
 class TableFlattenConfig:
-    """Table specific flattening configuration
+    """
+    Table-specific flattening configuration.
 
     :param split: Split child arrays to separate tables
     :param pretty_headers: Use human friendly headers extracted from schema
@@ -36,7 +37,8 @@ class TableFlattenConfig:
 
 @dataclass
 class FlattenOptions:
-    """Flattening configuration
+    """
+    Flattening configuration.
 
     :param selection: List of selected tables to extract from data
     :param count: Include number of rows in child table in each parent table
@@ -54,18 +56,19 @@ def __post_init__(self):
 
 
 class Flattener:
-    """Data flattener
+    """
+    In order to export data correctly, Flattener requires previously analyzed tables data.
 
-    In order to export data correctly Flattener requires previously analyzed tables data.
-    During the process flattener could add columns not based on schema analysis, such as
-    `itemsCount`.
+    During the process flattener could add columns not based on schema analysis, such as `itemsCount`.
     In every generated row, depending on table type, flattener will always few add autogenerated columns.
+
     For root table:
-    * rowID
-    * id
-    * ocid
 
-    For child tables this list well be extended with `parentID` column.
+    -  rowID
+    -  id
+    -  ocid
+
+    For child tables, this list is extended with `parentID` column.
 
     :param options: Flattening options
     :param tables: Analyzed tables data
@@ -139,7 +142,7 @@ def init_table_lookup(self, tables, table, target=None):
         self._map_types(table, target=target)
         self._map_cols(table, self.options.selection[name].split, target=target)
 
-    def init_count(self, table, options):
+    def init_count(self, table, options):  # noqa: ARG002
         if not table.splitted:
             return
         for array in table.arrays:
@@ -182,7 +185,7 @@ def init_options(self, tables):
             if options.repeat:
                 self.init_repeat(table, options)
 
-    def init_only(self, table, only, split):
+    def init_only(self, table, only, split):  # noqa: ARG002
         paths = {c_id: c for c_id, c in table.types.items() if c_id not in table.combined_columns}
         table.filter_columns(lambda col: col.id not in only)
         paths.update({c_id: c.type for c_id, c in table.columns.items()})
@@ -192,12 +195,12 @@ def get_table(self, pointer):
         return self._lookup_map.get(pointer) or self._types_map.get(pointer)
 
     def flatten(self, releases):
-        """Flatten releases
+        """
+        Flatten releases.
 
         :param releases: releases as iterable object
         :return: Iterator over mapping between table name and list of rows for each release
         """
-
         for counter, release in enumerate(releases):
             to_flatten = deque([("", "", "", {}, release, {})])
             rows = Rows(ocid=release["ocid"], buyer=release.get("buyer", {}), data=defaultdict(list))

diff --git a/spoonbill/i18n.py b/spoonbill/i18n.py
@@ -25,7 +25,7 @@
 
 
 def translate(msg_id, lang=LOCALE):
-    """Simple wrapper of python's gettext with ability to override desired language"""
+    """Wrap Python's gettext with ability to override desired language."""
     return translator(lang).gettext(msg_id)
 
 
@@ -34,7 +34,7 @@ def translator(lang):
     try:
         return gettext.translation("spoonbill", path, languages=[lang], fallback=None)
     except FileNotFoundError as e:
-        warnings.warn(f"{e.strerror} {e.filename} in language {lang}")
+        warnings.warn(f"{e.strerror} {e.filename} in language {lang}", stacklevel=2)
         return gettext.NullTranslations()
 
 

diff --git a/spoonbill/rowdata.py b/spoonbill/rowdata.py
@@ -27,9 +27,7 @@ def as_dict(self):
 
 @dataclass
 class Row(MappingBase):
-    """
-    Row data container
-    """
+    """Row data container."""
 
     row_id: str
     table_name: str
@@ -39,9 +37,7 @@ class Row(MappingBase):
 
 @dataclass
 class Rows(MappingBase):
-    """
-    Flattened rows for each object
-    """
+    """Flattened rows for each object."""
 
     ocid: str
     buyer: Mapping[str, str]

diff --git a/spoonbill/spec.py b/spoonbill/spec.py
@@ -92,9 +92,7 @@ def __post_init__(self):
             if obj:
                 init = {}
                 for name, col in obj.items():
-                    if not is_dataclass(col):
-                        col = Column(**col)
-                    init[name] = col
+                    init[name] = col if is_dataclass(col) else Column(**col)
                 setattr(self, attr, init)
             cols = DEFAULT_FIELDS_COMBINED
             if self.is_root and not self.is_combined:
@@ -112,17 +110,11 @@ def _counter(self, split, cond):
         return [header for header, col in cols.items() if cond(col)]
 
     def missing_rows(self, *, split=True):
-        """
-        Return the columns that are available in the schema, but not present in the analyzed data.
-        """
-
+        """Return the columns that are available in the schema, but not present in the analyzed data."""
         return self._counter(split, lambda c: c.hits == 0)
 
     def available_rows(self, *, split=True):
-        """
-        Return the columns that are available in the analyzed data.
-        """
-
+        """Return the columns that are available in the analyzed data."""
         return self._counter(split, lambda c: c.hits > 0)
 
     def filter_columns(self, func):
@@ -189,10 +181,7 @@ def add_column(self, path, item_type, title, *, propagated=False, additional=Fal
             self.types[path] = item_type
 
     def is_array(self, path):
-        """
-        Check whether the given path is in any table's arrays.
-        """
-
+        """Check whether the given path is in any table's arrays."""
         for array in sorted(self.arrays, reverse=True):
             if common_prefix(array, path) == array:
                 return array
@@ -234,10 +223,7 @@ def set_array(self, header, item):
         return False
 
     def inc(self):
-        """
-        Increment the number of rows in the table.
-        """
-
+        """Increment the number of rows in the table."""
         self.total_rows += 1
         for col_name in DEFAULT_FIELDS_COMBINED:
             self.inc_column(col_name, col_name)

diff --git a/spoonbill/stats.py b/spoonbill/stats.py
@@ -35,9 +35,7 @@
 
 class DataPreprocessor:
     """
-    Data analyzer
-
-    Processes the given schema and, based on this, extracts information from the iterable dataset.
+    Process the given schema and, based on this, extract information from the iterable dataset.
 
     :param schema: The dataset's schema
     :param root_tables: The paths which should become root tables
@@ -95,9 +93,7 @@ def guess_type(self, item):
         return [PYTHON_TO_JSON_TYPE.get(type(item).__name__)]
 
     def init_tables(self, tables, *, is_combined=False):
-        """
-        Initialize the root tables with default fields.
-        """
+        """Initialize the root tables with default fields."""
         for name, path in tables.items():
             table = Table(name, path, is_root=True, is_combined=is_combined, parent="")
             self.tables[name] = table
@@ -122,9 +118,7 @@ def prepare_tables(self):
             self.init_tables(self.combined_tables, is_combined=True)
 
     def parse_schema(self):
-        """
-        Extract information from the schema.
-        """
+        """Extract information from the schema."""
         self.load_schema()
         proxy = add_paths_to_schema(self.schema)
         to_analyze = deque([("", "", {}, proxy)])
@@ -178,7 +172,7 @@ def parse_schema(self):
     def add_column(self, pointer, typeset):
         self.current_table.add_column(pointer, typeset, pointer)
 
-    def _add_table(self, table, pointer):
+    def _add_table(self, table, pointer):  # noqa: ARG002
         self.tables[table.name] = table
         self.current_table = table
         self.get_table.cache_clear()
@@ -215,7 +209,7 @@ def get_table(self, path):
             return None
         return candidates[0]
 
-    def add_preview_row(self, rows, item_id, parent_key):
+    def add_preview_row(self, rows, item_id, parent_key):  # noqa: ARG002
         """
         Append a mostly-empty row to the previews.
 
@@ -260,7 +254,7 @@ def add_joinable_column(self, abs_pointer, pointer):
             pointer, JOINABLE, pointer, additional=True, abs_path=abs_pointer, header=pointer
         )
 
-    def handle_array_expanded(self, pointer, item, abs_path, key):
+    def handle_array_expanded(self, pointer, item, abs_path, key):  # noqa: ARG002
         splitted = len(item) >= self.table_threshold
         if splitted:
             self.current_table.split(pointer)
@@ -278,7 +272,7 @@ def drop(col):
         for table in self.tables.values():
             table.filter_columns(drop)
 
-    def process_items(self, releases, *, with_preview=True):
+    def process_items(self, releases, *, with_preview=True):  # noqa: ARG002
         """
         Analyze releases.
 
@@ -430,7 +424,8 @@ def restore(cls, path):
 
     def extend_table_types(self, pointer, item):
         """
-        Check if path belong to table and expand its types
+        Check if path belong to table and expand its types.
+
         :param pointer: Path to an item
         :param item: Item being analyzed
         """