From 19470b44775d6ec90ccfa74176c70e20c48ce38c Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Thu, 20 Jun 2024 04:47:36 -0400
Subject: [PATCH] maint: RF101 Bugbear lint checks must be selected

---
 bioframe/core/arrops.py        | 10 ++++++++--
 bioframe/core/checks.py        |  4 ++--
 bioframe/core/construction.py  |  4 ++--
 bioframe/core/stringops.py     |  4 ++--
 bioframe/extras.py             |  6 +++---
 bioframe/io/bed.py             |  3 ++-
 bioframe/io/fileops.py         | 14 +++++++-------
 bioframe/io/resources.py       |  2 +-
 bioframe/sandbox/parquet_io.py |  4 ++--
 pyproject.toml                 |  1 +
 10 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/bioframe/core/arrops.py b/bioframe/core/arrops.py
index 1cc8190..b79abcb 100644
--- a/bioframe/core/arrops.py
+++ b/bioframe/core/arrops.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pandas as pd
 
+INT64_MAX = np.iinfo(np.int64).max
+
 
 def natsort_key(s, _NS_REGEX=re.compile(r"(\d+)", re.U)):
     return tuple([int(x) if x.isdigit() else x for x in _NS_REGEX.split(s) if x])
@@ -210,6 +212,7 @@ def _overlap_intervals_legacy(starts1, ends1, starts2, ends2, closed=False, sort
                 "One of the inputs is provided as pandas.Series and its index "
                 "will be ignored.",
                 SyntaxWarning,
+                stacklevel=2,
             )
 
     starts1 = np.asarray(starts1)
@@ -308,6 +311,7 @@ def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False):
                 "One of the inputs is provided as pandas.Series and its index "
                 "will be ignored.",
                 SyntaxWarning,
+                stacklevel=2,
             )
 
     starts1 = np.asarray(starts1)
@@ -442,6 +446,7 @@ def merge_intervals(starts, ends, min_dist=0):
                 "One of the inputs is provided as pandas.Series and its index "
                 "will be ignored.",
                 SyntaxWarning,
+                stacklevel=2,
             )
 
     starts = np.asarray(starts)
@@ -473,7 +478,7 @@ def merge_intervals(starts, ends, min_dist=0):
 def complement_intervals(
     starts,
     ends,
-    bounds=(0, np.iinfo(np.int64).max),
+    bounds=(0, INT64_MAX),
 ):
     _, merged_starts, merged_ends = merge_intervals(starts, ends, min_dist=0)
 
@@ -534,6 +539,7 @@ def _closest_intervals_nooverlap(
                 "One of the inputs is provided as pandas.Series "
                 "and its index will be ignored.",
                 SyntaxWarning,
+                stacklevel=2,
             )
 
     starts1 = np.asarray(starts1)
@@ -776,7 +782,7 @@ def stack_intervals(starts, ends):
 
     occupancy = np.zeros(2, dtype=bool)
     levels = -1 * np.ones(n, dtype=np.int64)
-    for border, border_id in zip(borders, border_ids):
+    for border_id in border_ids:
         interval_id = np.abs(border_id) - 1
         if border_id > 0:
             if occupancy.sum() == occupancy.shape[0]:
diff --git a/bioframe/core/checks.py b/bioframe/core/checks.py
index babe6b6..279cf20 100644
--- a/bioframe/core/checks.py
+++ b/bioframe/core/checks.py
@@ -310,9 +310,9 @@ def is_contained(
             # ek1 = end is the default value
             # sk1 = start is the default value
             assert (df_view_assigned[sk1] >= df_view_assigned[sk2 + "_"]).all()
-        except AssertionError:
+        except AssertionError as e:
             if raise_errors:
-                raise AssertionError("df not contained in view_df")
+                raise AssertionError("df not contained in view_df") from e
             else:
                 return False
         return True
diff --git a/bioframe/core/construction.py b/bioframe/core/construction.py
index 969cbd5..be54ca6 100644
--- a/bioframe/core/construction.py
+++ b/bioframe/core/construction.py
@@ -165,8 +165,8 @@ def from_any(regions, fill_null=False, name_col="name", cols=None):
                 else:
                     ends.append(out_df[ek1].values[i])
             out_df[ek1] = ends
-        except Exception:
-            raise ValueError("could not fill ends with provided chromsizes")
+        except Exception as e:
+            raise ValueError("could not fill ends with provided chromsizes") from e
 
     return out_df
 
diff --git a/bioframe/core/stringops.py b/bioframe/core/stringops.py
index 28dac31..584152e 100644
--- a/bioframe/core/stringops.py
+++ b/bioframe/core/stringops.py
@@ -231,8 +231,8 @@ def parse_region(
     if chromsizes is not None:
         try:
             clen = chromsizes[chrom]
-        except KeyError:
-            raise ValueError(f"Unknown sequence label: {chrom}")
+        except KeyError as e:
+            raise ValueError(f"Unknown sequence label: {chrom}") from e
         if end is None:
             end = clen
 
diff --git a/bioframe/extras.py b/bioframe/extras.py
index 6dd25dc..98d3d2f 100644
--- a/bioframe/extras.py
+++ b/bioframe/extras.py
@@ -185,7 +185,7 @@ def digest(fasta_records, enzyme):
         import Bio.Restriction as biorst
         import Bio.Seq as bioseq
     except ImportError:
-        raise ImportError("Biopython is required to use digest")
+        raise ImportError("Biopython is required to use digest") from None
 
     # http://biopython.org/DIST/docs/cookbook/Restriction.html#mozTocId447698
     if not isinstance(fasta_records, dict):
@@ -196,8 +196,8 @@ def digest(fasta_records, enzyme):
     chroms = fasta_records.keys()
     try:
         cut_finder = getattr(biorst, enzyme).search
-    except AttributeError:
-        raise ValueError(f"Unknown enzyme name: {enzyme}")
+    except AttributeError as e:
+        raise ValueError(f"Unknown enzyme name: {enzyme}") from e
 
     def _each(chrom):
         seq = bioseq.Seq(str(fasta_records[chrom][:]))
diff --git a/bioframe/io/bed.py b/bioframe/io/bed.py
index 5e9d36d..79e7525 100644
--- a/bioframe/io/bed.py
+++ b/bioframe/io/bed.py
@@ -701,7 +701,8 @@ def to_bed(
             warnings.warn(
                 f"Standard column {col} contains null values. "
                 "These will be replaced with the uninformative value "
-                f"{BED_FIELD_FILLVALUES[col]}."
+                f"{BED_FIELD_FILLVALUES[col]}.",
+                stacklevel=2,
             )
             bed[col] = df[col].fillna(BED_FIELD_FILLVALUES[col])
         else:
diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py
index 065f93e..883cdd8 100644
--- a/bioframe/io/fileops.py
+++ b/bioframe/io/fileops.py
@@ -70,7 +70,7 @@ def read_table(filepath_or, schema=None, schema_is_strict=False, **kwargs):
             kwargs.setdefault("names", SCHEMAS[schema])
         except (KeyError, TypeError):
             if isinstance(schema, str):
-                raise ValueError(f"TSV schema not found: '{schema}'")
+                raise ValueError(f"TSV schema not found: '{schema}'") from None
             kwargs.setdefault("names", schema)
     df = pd.read_csv(filepath_or, **kwargs)
     if schema_is_strict:
@@ -167,7 +167,7 @@ def read_tabix(fp, chrom=None, start=None, end=None):
     try:
         import pysam
     except ImportError:
-        raise ImportError("pysam is required to use `read_tabix`")
+        raise ImportError("pysam is required to use `read_tabix`") from None
 
     with closing(pysam.TabixFile(fp)) as f:
         names = list(f.header) or None
@@ -242,7 +242,7 @@ def read_alignments(fp, chrom=None, start=None, end=None):
     try:
         import pysam
     except ImportError:
-        raise ImportError("pysam is required to use `read_alignments`")
+        raise ImportError("pysam is required to use `read_alignments`") from None
 
     ext = os.path.splitext(fp)[1]
     if ext == '.sam':
@@ -343,7 +343,7 @@ def load_fasta(filepath_or, engine="pysam", **kwargs):
         try:
             import pysam
         except ImportError:
-            raise ImportError("pysam is required to use engine='pysam'")
+            raise ImportError("pysam is required to use engine='pysam'") from None
 
         if is_multifile:
             for onefile in filepath_or:
@@ -359,7 +359,7 @@ def load_fasta(filepath_or, engine="pysam", **kwargs):
         try:
             import pyfaidx
         except ImportError:
-            raise ImportError("pyfaidx is required to use engine='pyfaidx'")
+            raise ImportError("pyfaidx is required to use engine='pyfaidx'") from None
 
         if is_multifile:
             for onefile in filepath_or:
@@ -518,7 +518,7 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path_to_binary=None):
                 "Pass it as 'path_to_binary' parameter to bioframe.to_bigwig or "
                 "install it with, for example, conda install -y -c bioconda "
                 "ucsc-bedgraphtobigwig "
-            )
+            ) from None
     elif path_to_binary.endswith("bedGraphToBigWig"):
         if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK):
             raise ValueError(
@@ -599,7 +599,7 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6", path_to_binary=None):
                 "Pass it as 'path_to_binary' parameter to bioframe.to_bigbed or "
                 "install it with, for example, conda install -y -c bioconda "
                 "ucsc-bedtobigbed "
-            )
+            ) from None
     elif path_to_binary.endswith("bedToBigBed"):
         if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK):
             raise ValueError(
diff --git a/bioframe/io/resources.py b/bioframe/io/resources.py
index d9d37c0..95f88b0 100644
--- a/bioframe/io/resources.py
+++ b/bioframe/io/resources.py
@@ -222,7 +222,7 @@ def fetch_centromeres(db: str, provider: str = "local") -> pd.DataFrame:
             ("centromeres", client.fetch_centromeres),
         ]
 
-        for schema, fetcher in fetchers:
+        for schema, fetcher in fetchers:  # noqa: B007
             try:
                 df = fetcher()
                 break
diff --git a/bioframe/sandbox/parquet_io.py b/bioframe/sandbox/parquet_io.py
index b9c830b..bfc721c 100644
--- a/bioframe/sandbox/parquet_io.py
+++ b/bioframe/sandbox/parquet_io.py
@@ -40,7 +40,7 @@ def to_parquet(
         import pyarrow as pa
         import pyarrow.parquet
     except ImportError:
-        raise ImportError("Saving to parquet requires the `pyarrow` package")
+        raise ImportError("Saving to parquet requires the `pyarrow` package") from None
 
     if isinstance(pieces, pd.DataFrame):
         pieces = (pieces,)
@@ -101,7 +101,7 @@ def read_parquet(filepath, columns=None, iterator=False, **kwargs):
         except ImportError:
             raise ImportError(
                 "Iterating over Parquet data requires the `pyarrow` package."
-            )
+            ) from None
 
         class ParquetFileIterator(ParquetFile):
             def __iter__(self):
diff --git a/pyproject.toml b/pyproject.toml
index a58f808..0ab16d0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,7 @@ exclude = [
 
 [tool.ruff.lint]
 extend-select = [
+    "B",  # bugbear
     # "C",  # mccabe complexity
     # "D",  # pydocstyle
     "E",  # style errors