Skip to content

Commit

Permalink
chore: Add ruff lint configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Sep 16, 2024
1 parent f3dd678 commit 2103289
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 127 deletions.
18 changes: 17 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,23 @@ line-length = 119
target-version = "py39"

[tool.ruff.lint]
select = ["E", "C4", "F", "I", "W"]
select = ["ALL"]
ignore = [
"ANN", "C901", "COM812", "D203", "D212", "D415", "EM", "PERF203", "PLR091", "Q000",
"PLR2004", "PLW2901", "D1", "D205",
"PTH",
"FIX002", # todo
]

[tool.ruff.lint.flake8-builtins]
builtins-ignorelist = ["copyright"]

[tool.ruff.lint.per-file-ignores]
"docs/conf.py" = ["D100", "INP001"]
"tests/*" = [
"ARG001", "D", "FBT003", "INP001", "PLR2004", "S", "TRY003",
]
"spoonbill/cli.py" = ["ARG002"] # click

[tool.pytest.ini_options]
addopts = '--doctest-modules'
27 changes: 17 additions & 10 deletions spoonbill/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@


class FileAnalyzer:
"""Main utility for analyzing files
"""
Main utility for analyzing files.
:param workdir: Working directory
:param schema: Json schema file to use with data
:param root_tables: Path configuration which should become root tables
Expand Down Expand Up @@ -53,8 +55,10 @@ def __init__(
self.pkg_type = pkg_type
self.order = None

def analyze_file(self, filenames, *, with_preview=True):
"""Analyze provided file
def analyze_file(self, filenames, *, with_preview=True): # noqa: ARG002
"""
Analyze provided file.
:param filename: Input filename
:param with_preview: Generate preview during analysis
"""
Expand Down Expand Up @@ -89,7 +93,9 @@ def analyze_file(self, filenames, *, with_preview=True):
self.sort_tables()

def dump_to_file(self, filename):
"""Save analyzed information to file
"""
Save analyzed information to file.
:param filename: Output filename in working directory
"""
path = self.workdir / filename
Expand Down Expand Up @@ -122,10 +128,7 @@ def parse_schema(self, input_format, schema=None):
self.pkg_type = pkg_type

def sort_tables(self):
"""
Sort tables according to order of arrays in schema
:return:
"""
"""Sort tables according to order of arrays in schema."""
self.order = get_order(self.spec.schema["properties"].keys())
out_schema_tables = {
name: table for name, table in self.spec.tables.items() if name.split("_")[0] not in self.order
Expand All @@ -146,7 +149,9 @@ def sort_tables(self):


class FileFlattener:
"""Main utility for flattening files
"""
Main utility for flattening files.
:param workdir: Working directory
:param options: Flattening configuration
:param analyzer: Analyzed data object
Expand Down Expand Up @@ -196,7 +201,9 @@ def _flatten(self, filenames, writers):
yield count

def flatten_file(self, filename):
"""Flatten file
"""
Flatten file.
:param filename: Input filename in working directory
"""
workdir = self.workdir
Expand Down
6 changes: 3 additions & 3 deletions spoonbill/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""cli.py - Command line interface related routines"""
"""Command line interface-related routines."""

import logging
import pathlib
Expand All @@ -22,7 +22,7 @@


class CommaSeparated(click.ParamType):
"""Click option type to convert comma separated string into list"""
"""Click option type to convert comma-separated string into list."""

name = "comma"

Expand Down Expand Up @@ -188,7 +188,7 @@ def cli(
human,
language,
):
"""Spoonbill cli entry point"""
"""Spoonbill CLI entry point."""
if csv:
csv = pathlib.Path(csv).resolve()
if not csv.exists():
Expand Down
31 changes: 17 additions & 14 deletions spoonbill/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

@dataclass
class TableFlattenConfig:
"""Table specific flattening configuration
"""
Table-specific flattening configuration.
:param split: Split child arrays to separate tables
:param pretty_headers: Use human friendly headers extracted from schema
Expand All @@ -36,7 +37,8 @@ class TableFlattenConfig:

@dataclass
class FlattenOptions:
"""Flattening configuration
"""
Flattening configuration.
:param selection: List of selected tables to extract from data
:param count: Include number of rows in child table in each parent table
Expand All @@ -54,18 +56,19 @@ def __post_init__(self):


class Flattener:
"""Data flattener
"""
In order to export data correctly, Flattener requires previously analyzed tables data.
In order to export data correctly Flattener requires previously analyzed tables data.
During the process flattener could add columns not based on schema analysis, such as
`itemsCount`.
During the process flattener could add columns not based on schema analysis, such as `itemsCount`.
In every generated row, depending on table type, flattener will always few add autogenerated columns.
For root table:
* rowID
* id
* ocid
For child tables this list well be extended with `parentID` column.
- rowID
- id
- ocid
For child tables, this list is extended with `parentID` column.
:param options: Flattening options
:param tables: Analyzed tables data
Expand Down Expand Up @@ -139,7 +142,7 @@ def init_table_lookup(self, tables, table, target=None):
self._map_types(table, target=target)
self._map_cols(table, self.options.selection[name].split, target=target)

def init_count(self, table, options):
def init_count(self, table, options): # noqa: ARG002
if not table.splitted:
return
for array in table.arrays:
Expand Down Expand Up @@ -182,7 +185,7 @@ def init_options(self, tables):
if options.repeat:
self.init_repeat(table, options)

def init_only(self, table, only, split):
def init_only(self, table, only, split): # noqa: ARG002
paths = {c_id: c for c_id, c in table.types.items() if c_id not in table.combined_columns}
table.filter_columns(lambda col: col.id not in only)
paths.update({c_id: c.type for c_id, c in table.columns.items()})
Expand All @@ -192,12 +195,12 @@ def get_table(self, pointer):
return self._lookup_map.get(pointer) or self._types_map.get(pointer)

def flatten(self, releases):
"""Flatten releases
"""
Flatten releases.
:param releases: releases as iterable object
:return: Iterator over mapping between table name and list of rows for each release
"""

for counter, release in enumerate(releases):
to_flatten = deque([("", "", "", {}, release, {})])
rows = Rows(ocid=release["ocid"], buyer=release.get("buyer", {}), data=defaultdict(list))
Expand Down
4 changes: 2 additions & 2 deletions spoonbill/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


def translate(msg_id, lang=LOCALE):
"""Simple wrapper of python's gettext with ability to override desired language"""
"""Wrap Python's gettext with ability to override desired language."""
return translator(lang).gettext(msg_id)


Expand All @@ -34,7 +34,7 @@ def translator(lang):
try:
return gettext.translation("spoonbill", path, languages=[lang], fallback=None)
except FileNotFoundError as e:
warnings.warn(f"{e.strerror} {e.filename} in language {lang}")
warnings.warn(f"{e.strerror} {e.filename} in language {lang}", stacklevel=2)
return gettext.NullTranslations()


Expand Down
8 changes: 2 additions & 6 deletions spoonbill/rowdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ def as_dict(self):

@dataclass
class Row(MappingBase):
"""
Row data container
"""
"""Row data container."""

row_id: str
table_name: str
Expand All @@ -39,9 +37,7 @@ class Row(MappingBase):

@dataclass
class Rows(MappingBase):
"""
Flattened rows for each object
"""
"""Flattened rows for each object."""

ocid: str
buyer: Mapping[str, str]
Expand Down
24 changes: 5 additions & 19 deletions spoonbill/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ def __post_init__(self):
if obj:
init = {}
for name, col in obj.items():
if not is_dataclass(col):
col = Column(**col)
init[name] = col
init[name] = col if is_dataclass(col) else Column(**col)
setattr(self, attr, init)
cols = DEFAULT_FIELDS_COMBINED
if self.is_root and not self.is_combined:
Expand All @@ -112,17 +110,11 @@ def _counter(self, split, cond):
return [header for header, col in cols.items() if cond(col)]

def missing_rows(self, *, split=True):
"""
Return the columns that are available in the schema, but not present in the analyzed data.
"""

"""Return the columns that are available in the schema, but not present in the analyzed data."""
return self._counter(split, lambda c: c.hits == 0)

def available_rows(self, *, split=True):
"""
Return the columns that are available in the analyzed data.
"""

"""Return the columns that are available in the analyzed data."""
return self._counter(split, lambda c: c.hits > 0)

def filter_columns(self, func):
Expand Down Expand Up @@ -189,10 +181,7 @@ def add_column(self, path, item_type, title, *, propagated=False, additional=Fal
self.types[path] = item_type

def is_array(self, path):
"""
Check whether the given path is in any table's arrays.
"""

"""Check whether the given path is in any table's arrays."""
for array in sorted(self.arrays, reverse=True):
if common_prefix(array, path) == array:
return array
Expand Down Expand Up @@ -234,10 +223,7 @@ def set_array(self, header, item):
return False

def inc(self):
"""
Increment the number of rows in the table.
"""

"""Increment the number of rows in the table."""
self.total_rows += 1
for col_name in DEFAULT_FIELDS_COMBINED:
self.inc_column(col_name, col_name)
Expand Down
23 changes: 9 additions & 14 deletions spoonbill/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@

class DataPreprocessor:
"""
Data analyzer
Processes the given schema and, based on this, extracts information from the iterable dataset.
Process the given schema and, based on this, extract information from the iterable dataset.
:param schema: The dataset's schema
:param root_tables: The paths which should become root tables
Expand Down Expand Up @@ -95,9 +93,7 @@ def guess_type(self, item):
return [PYTHON_TO_JSON_TYPE.get(type(item).__name__)]

def init_tables(self, tables, *, is_combined=False):
"""
Initialize the root tables with default fields.
"""
"""Initialize the root tables with default fields."""
for name, path in tables.items():
table = Table(name, path, is_root=True, is_combined=is_combined, parent="")
self.tables[name] = table
Expand All @@ -122,9 +118,7 @@ def prepare_tables(self):
self.init_tables(self.combined_tables, is_combined=True)

def parse_schema(self):
"""
Extract information from the schema.
"""
"""Extract information from the schema."""
self.load_schema()
proxy = add_paths_to_schema(self.schema)
to_analyze = deque([("", "", {}, proxy)])
Expand Down Expand Up @@ -178,7 +172,7 @@ def parse_schema(self):
def add_column(self, pointer, typeset):
self.current_table.add_column(pointer, typeset, pointer)

def _add_table(self, table, pointer):
def _add_table(self, table, pointer): # noqa: ARG002
self.tables[table.name] = table
self.current_table = table
self.get_table.cache_clear()
Expand Down Expand Up @@ -215,7 +209,7 @@ def get_table(self, path):
return None
return candidates[0]

def add_preview_row(self, rows, item_id, parent_key):
def add_preview_row(self, rows, item_id, parent_key): # noqa: ARG002
"""
Append a mostly-empty row to the previews.
Expand Down Expand Up @@ -260,7 +254,7 @@ def add_joinable_column(self, abs_pointer, pointer):
pointer, JOINABLE, pointer, additional=True, abs_path=abs_pointer, header=pointer
)

def handle_array_expanded(self, pointer, item, abs_path, key):
def handle_array_expanded(self, pointer, item, abs_path, key): # noqa: ARG002
splitted = len(item) >= self.table_threshold
if splitted:
self.current_table.split(pointer)
Expand All @@ -278,7 +272,7 @@ def drop(col):
for table in self.tables.values():
table.filter_columns(drop)

def process_items(self, releases, *, with_preview=True):
def process_items(self, releases, *, with_preview=True): # noqa: ARG002
"""
Analyze releases.
Expand Down Expand Up @@ -430,7 +424,8 @@ def restore(cls, path):

def extend_table_types(self, pointer, item):
"""
Check if path belong to table and expand its types
Check if path belong to table and expand its types.
:param pointer: Path to an item
:param item: Item being analyzed
"""
Expand Down
Loading

0 comments on commit 2103289

Please sign in to comment.