From 19470b44775d6ec90ccfa74176c70e20c48ce38c Mon Sep 17 00:00:00 2001 From: Nezar Abdennur Date: Thu, 20 Jun 2024 04:47:36 -0400 Subject: [PATCH] maint: RF101 Bugbear lint checks must be selected --- bioframe/core/arrops.py | 10 ++++++++-- bioframe/core/checks.py | 4 ++-- bioframe/core/construction.py | 4 ++-- bioframe/core/stringops.py | 4 ++-- bioframe/extras.py | 6 +++--- bioframe/io/bed.py | 3 ++- bioframe/io/fileops.py | 14 +++++++------- bioframe/io/resources.py | 2 +- bioframe/sandbox/parquet_io.py | 4 ++-- pyproject.toml | 1 + 10 files changed, 30 insertions(+), 22 deletions(-) diff --git a/bioframe/core/arrops.py b/bioframe/core/arrops.py index 1cc8190..b79abcb 100644 --- a/bioframe/core/arrops.py +++ b/bioframe/core/arrops.py @@ -4,6 +4,8 @@ import numpy as np import pandas as pd +INT64_MAX = np.iinfo(np.int64).max + def natsort_key(s, _NS_REGEX=re.compile(r"(\d+)", re.U)): return tuple([int(x) if x.isdigit() else x for x in _NS_REGEX.split(s) if x]) @@ -210,6 +212,7 @@ def _overlap_intervals_legacy(starts1, ends1, starts2, ends2, closed=False, sort "One of the inputs is provided as pandas.Series and its index " "will be ignored.", SyntaxWarning, + stacklevel=2, ) starts1 = np.asarray(starts1) @@ -308,6 +311,7 @@ def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False): "One of the inputs is provided as pandas.Series and its index " "will be ignored.", SyntaxWarning, + stacklevel=2, ) starts1 = np.asarray(starts1) @@ -442,6 +446,7 @@ def merge_intervals(starts, ends, min_dist=0): "One of the inputs is provided as pandas.Series and its index " "will be ignored.", SyntaxWarning, + stacklevel=2, ) starts = np.asarray(starts) @@ -473,7 +478,7 @@ def merge_intervals(starts, ends, min_dist=0): def complement_intervals( starts, ends, - bounds=(0, np.iinfo(np.int64).max), + bounds=(0, INT64_MAX), ): _, merged_starts, merged_ends = merge_intervals(starts, ends, min_dist=0) @@ -534,6 +539,7 @@ def _closest_intervals_nooverlap( "One of the inputs is provided as pandas.Series " "and its index will be ignored.", SyntaxWarning, + stacklevel=2, ) starts1 = np.asarray(starts1) @@ -776,7 +782,7 @@ def stack_intervals(starts, ends): occupancy = np.zeros(2, dtype=bool) levels = -1 * np.ones(n, dtype=np.int64) - for border, border_id in zip(borders, border_ids): + for border_id in border_ids: interval_id = np.abs(border_id) - 1 if border_id > 0: if occupancy.sum() == occupancy.shape[0]: diff --git a/bioframe/core/checks.py b/bioframe/core/checks.py index babe6b6..279cf20 100644 --- a/bioframe/core/checks.py +++ b/bioframe/core/checks.py @@ -310,9 +310,9 @@ def is_contained( # ek1 = end is the default value # sk1 = start is the default value assert (df_view_assigned[sk1] >= df_view_assigned[sk2 + "_"]).all() - except AssertionError: + except AssertionError as e: if raise_errors: - raise AssertionError("df not contained in view_df") + raise AssertionError("df not contained in view_df") from e else: return False return True diff --git a/bioframe/core/construction.py b/bioframe/core/construction.py index 969cbd5..be54ca6 100644 --- a/bioframe/core/construction.py +++ b/bioframe/core/construction.py @@ -165,8 +165,8 @@ def from_any(regions, fill_null=False, name_col="name", cols=None): else: ends.append(out_df[ek1].values[i]) out_df[ek1] = ends - except Exception: - raise ValueError("could not fill ends with provided chromsizes") + except Exception as e: + raise ValueError("could not fill ends with provided chromsizes") from e return out_df diff --git a/bioframe/core/stringops.py b/bioframe/core/stringops.py index 28dac31..584152e 100644 --- a/bioframe/core/stringops.py +++ b/bioframe/core/stringops.py @@ -231,8 +231,8 @@ def parse_region( if chromsizes is not None: try: clen = chromsizes[chrom] - except KeyError: - raise ValueError(f"Unknown sequence label: {chrom}") + except KeyError as e: + raise ValueError(f"Unknown sequence label: {chrom}") from e if end is None: end = clen diff --git a/bioframe/extras.py b/bioframe/extras.py index 6dd25dc..98d3d2f 100644 --- a/bioframe/extras.py +++ b/bioframe/extras.py @@ -185,7 +185,7 @@ def digest(fasta_records, enzyme): import Bio.Restriction as biorst import Bio.Seq as bioseq except ImportError: - raise ImportError("Biopython is required to use digest") + raise ImportError("Biopython is required to use digest") from None # http://biopython.org/DIST/docs/cookbook/Restriction.html#mozTocId447698 if not isinstance(fasta_records, dict): @@ -196,8 +196,8 @@ def digest(fasta_records, enzyme): chroms = fasta_records.keys() try: cut_finder = getattr(biorst, enzyme).search - except AttributeError: - raise ValueError(f"Unknown enzyme name: {enzyme}") + except AttributeError as e: + raise ValueError(f"Unknown enzyme name: {enzyme}") from e def _each(chrom): seq = bioseq.Seq(str(fasta_records[chrom][:])) diff --git a/bioframe/io/bed.py b/bioframe/io/bed.py index 5e9d36d..79e7525 100644 --- a/bioframe/io/bed.py +++ b/bioframe/io/bed.py @@ -701,7 +701,8 @@ def to_bed( warnings.warn( f"Standard column {col} contains null values. " "These will be replaced with the uninformative value " - f"{BED_FIELD_FILLVALUES[col]}." + f"{BED_FIELD_FILLVALUES[col]}.", + stacklevel=2, ) bed[col] = df[col].fillna(BED_FIELD_FILLVALUES[col]) else: diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index 065f93e..883cdd8 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -70,7 +70,7 @@ def read_table(filepath_or, schema=None, schema_is_strict=False, **kwargs): kwargs.setdefault("names", SCHEMAS[schema]) except (KeyError, TypeError): if isinstance(schema, str): - raise ValueError(f"TSV schema not found: '{schema}'") + raise ValueError(f"TSV schema not found: '{schema}'") from None kwargs.setdefault("names", schema) df = pd.read_csv(filepath_or, **kwargs) if schema_is_strict: @@ -167,7 +167,7 @@ def read_tabix(fp, chrom=None, start=None, end=None): try: import pysam except ImportError: - raise ImportError("pysam is required to use `read_tabix`") + raise ImportError("pysam is required to use `read_tabix`") from None with closing(pysam.TabixFile(fp)) as f: names = list(f.header) or None @@ -242,7 +242,7 @@ def read_alignments(fp, chrom=None, start=None, end=None): try: import pysam except ImportError: - raise ImportError("pysam is required to use `read_alignments`") + raise ImportError("pysam is required to use `read_alignments`") from None ext = os.path.splitext(fp)[1] if ext == '.sam': @@ -343,7 +343,7 @@ def load_fasta(filepath_or, engine="pysam", **kwargs): try: import pysam except ImportError: - raise ImportError("pysam is required to use engine='pysam'") + raise ImportError("pysam is required to use engine='pysam'") from None if is_multifile: for onefile in filepath_or: @@ -359,7 +359,7 @@ def load_fasta(filepath_or, engine="pysam", **kwargs): try: import pyfaidx except ImportError: - raise ImportError("pyfaidx is required to use engine='pyfaidx'") + raise ImportError("pyfaidx is required to use engine='pyfaidx'") from None if is_multifile: for onefile in filepath_or: @@ -518,7 +518,7 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path_to_binary=None): "Pass it as 'path_to_binary' parameter to bioframe.to_bigwig or " "install it with, for example, conda install -y -c bioconda " "ucsc-bedgraphtobigwig " - ) + ) from None elif path_to_binary.endswith("bedGraphToBigWig"): if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK): raise ValueError( @@ -599,7 +599,7 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6", path_to_binary=None): "Pass it as 'path_to_binary' parameter to bioframe.to_bigbed or " "install it with, for example, conda install -y -c bioconda " "ucsc-bedtobigbed " - ) + ) from None elif path_to_binary.endswith("bedToBigBed"): if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK): raise ValueError( diff --git a/bioframe/io/resources.py b/bioframe/io/resources.py index d9d37c0..95f88b0 100644 --- a/bioframe/io/resources.py +++ b/bioframe/io/resources.py @@ -222,7 +222,7 @@ def fetch_centromeres(db: str, provider: str = "local") -> pd.DataFrame: ("centromeres", client.fetch_centromeres), ] - for schema, fetcher in fetchers: + for schema, fetcher in fetchers: # noqa: B007 try: df = fetcher() break diff --git a/bioframe/sandbox/parquet_io.py b/bioframe/sandbox/parquet_io.py index b9c830b..bfc721c 100644 --- a/bioframe/sandbox/parquet_io.py +++ b/bioframe/sandbox/parquet_io.py @@ -40,7 +40,7 @@ def to_parquet( import pyarrow as pa import pyarrow.parquet except ImportError: - raise ImportError("Saving to parquet requires the `pyarrow` package") + raise ImportError("Saving to parquet requires the `pyarrow` package") from None if isinstance(pieces, pd.DataFrame): pieces = (pieces,) @@ -101,7 +101,7 @@ def read_parquet(filepath, columns=None, iterator=False, **kwargs): except ImportError: raise ImportError( "Iterating over Parquet data requires the `pyarrow` package." - ) + ) from None class ParquetFileIterator(ParquetFile): def __iter__(self): diff --git a/pyproject.toml b/pyproject.toml index a58f808..0ab16d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ exclude = [ [tool.ruff.lint] extend-select = [ + "B", # bugbear # "C", # mccabe complexity # "D", # pydocstyle "E", # style errors