From e37bf6fc8e2ae018184678c74dfcb9893cf0a767 Mon Sep 17 00:00:00 2001 From: Vasily Sitnik Date: Thu, 9 Nov 2023 17:28:15 +0000 Subject: [PATCH 1/3] adding logging to io.genomio.fasta.process --- .../ensembl/io/genomio/fasta/process.py | 12 +++++- .../ensembl/io/genomio/utils/__init__.py | 1 + .../ensembl/io/genomio/utils/logging.py | 39 +++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 src/python/ensembl/io/genomio/utils/logging.py diff --git a/src/python/ensembl/io/genomio/fasta/process.py b/src/python/ensembl/io/genomio/fasta/process.py index 10fa34171..fa845c0c7 100644 --- a/src/python/ensembl/io/genomio/fasta/process.py +++ b/src/python/ensembl/io/genomio/fasta/process.py @@ -16,6 +16,8 @@ __all__ = ["GFFParserError", "get_peptides_to_exclude", "prep_fasta_data"] +import logging + from pathlib import Path from os import PathLike from typing import List, Optional, Set @@ -23,6 +25,7 @@ from Bio import SeqIO from ensembl.io.genomio.utils.archive_utils import open_gz_file +from ensembl.io.genomio.utils.logging import setup_logging from ensembl.utils.argparse import ArgumentParser @@ -41,13 +44,14 @@ def get_peptides_to_exclude(genbank_path: PathLike, seqr_to_exclude: Set[str]) - with open_gz_file(genbank_path) as in_genbank: for record in SeqIO.parse(in_genbank, "genbank"): if record.id in seqr_to_exclude: - print(f"Skip sequence {record.id}") + logging.info(f"Skip sequence {record.id}") for feat in record.features: if feat.type == "CDS": if "protein_id" in feat.qualifiers: feat_id = feat.qualifiers["protein_id"] peptides_to_exclude.add(feat_id[0]) else: + logging.critical(f"Peptide without peptide ID ${feat}") raise GFFParserError(f"Peptide without peptide ID ${feat}") return peptides_to_exclude @@ -82,7 +86,7 @@ def prep_fasta_data( with open_gz_file(file_path) as in_fasta: for record in SeqIO.parse(in_fasta, "fasta"): if record.id in to_exclude: - print(f"Skip record ${record.id}") + logging.info(f"Skip record ${record.id}") else: records.append(record) with Path(fasta_outfile).open("w") as out_fasta: @@ -96,6 +100,10 @@ def main() -> None: parser.add_argument_src_path("--genbank_infile", help="Input GenBank GBFF file") parser.add_argument_dst_path("--fasta_outfile", required=True, help="Output FASTA file") parser.add_argument("--peptide_mode", action="store_true", help="Process proteins instead of DNA") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose level logging") + parser.add_argument("-d", "--debug", action="store_true", help="Debug level logging") args = parser.parse_args() + setup_logging(args) + prep_fasta_data(**vars(args)) diff --git a/src/python/ensembl/io/genomio/utils/__init__.py b/src/python/ensembl/io/genomio/utils/__init__.py index 99e82a3b7..c966048ae 100644 --- a/src/python/ensembl/io/genomio/utils/__init__.py +++ b/src/python/ensembl/io/genomio/utils/__init__.py @@ -16,3 +16,4 @@ from .archive_utils import * from .json_utils import * +from .logging import * diff --git a/src/python/ensembl/io/genomio/utils/logging.py b/src/python/ensembl/io/genomio/utils/logging.py new file mode 100644 index 000000000..d53cde21f --- /dev/null +++ b/src/python/ensembl/io/genomio/utils/logging.py @@ -0,0 +1,39 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils to deal with logging.""" + +__all__ = ["setup_logging"] + +import argparse +import logging + +LOGGING_FORMAT = "%(asctime)s\t%(levelname)s\t%(message)s" +DATE_FORMAT = r"%Y-%m-%d_%H:%M:%S" + +def setup_logging(args: argparse.Namespace): + """Setup logging infrustucture.""" + """args: argparse.Namespace -- args with "debug" and "verbose" options.""" + log_level = None + if args.debug: + log_level = logging.DEBUG + elif args.verbose: + log_level = logging.INFO + + # reload(logging) + logging.basicConfig( + format=LOGGING_FORMAT, + datefmt=DATE_FORMAT, + level=log_level, + ) From 288283258d6031a46ae4158193ab5385e8c0924e Mon Sep 17 00:00:00 2001 From: Vasily Sitnik Date: Thu, 9 Nov 2023 22:51:21 +0000 Subject: [PATCH 2/3] generic utils to setup logging --- .../ensembl/io/genomio/utils/logging.py | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/src/python/ensembl/io/genomio/utils/logging.py b/src/python/ensembl/io/genomio/utils/logging.py index d53cde21f..1a87732d0 100644 --- a/src/python/ensembl/io/genomio/utils/logging.py +++ b/src/python/ensembl/io/genomio/utils/logging.py @@ -18,22 +18,62 @@ import argparse import logging +from typing import Optional + +# default logging formats LOGGING_FORMAT = "%(asctime)s\t%(levelname)s\t%(message)s" DATE_FORMAT = r"%Y-%m-%d_%H:%M:%S" -def setup_logging(args: argparse.Namespace): +# helper functions +def _prepare_console_handler(*, debug: bool = False, verbose: bool = False) -> logging.StreamHandler: + """setup console handler with different logging levels""" + console_h = logging.StreamHandler() + # set default logging level + if debug: + console_h.setLevel(logging.DEBUG) + elif verbose: + console_h.setLevel(logging.INFO) + return console_h + + +def _prepare_file_handler(filename: Optional[str] = None, *, debug: bool = False) -> Optional[logging.FileHandler]: + """setup file handler with default loggin.INFO level""" + """retuns None if no file name defined""" + if filename is None: + return None + + file_h = logging.FileHandler(filename) + + file_h.setLevel(logging.INFO) + if debug: + file_h.setLevel(logging.DEBUG) + + return file_h + + +def setup_logging( + args: argparse.Namespace, + *, # no positional arguments allowed after this + name: Optional[str] = None, + ) -> logging.Logger: """Setup logging infrustucture.""" - """args: argparse.Namespace -- args with "debug" and "verbose" options.""" - log_level = None - if args.debug: - log_level = logging.DEBUG - elif args.verbose: - log_level = logging.INFO - - # reload(logging) - logging.basicConfig( - format=LOGGING_FORMAT, - datefmt=DATE_FORMAT, - level=log_level, - ) + """args: argparse.Namespace -- args with "debug", "verbose" and "logfile" options.""" + # get logger with a specified name (or the default one) + logger = logging.getLogger(name) + + # set up shared formatter + formatter = logging.Formatter(fmt = LOGGING_FORMAT, datefmt = DATE_FORMAT) + + # adding console handler + console_h = _prepare_console_handler(debug = args.debug, verbose = args.verbose) + console_h.setFormatter(formatter) + logger.addHandler(console_h) + + # adding logging to file + file_h = _prepare_file_handler(args.logfile) + if file_h: + file_h.setFormatter(formatter) + logger.addHandler(file_h) + + return logger From 19a0a26ade64189e55fee8cd82fc083cd76c5f57 Mon Sep 17 00:00:00 2001 From: Vasily Sitnik Date: Thu, 9 Nov 2023 22:52:58 +0000 Subject: [PATCH 3/3] adding custom logger with a failback to the "logging" module itself --- src/python/ensembl/io/genomio/fasta/process.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/python/ensembl/io/genomio/fasta/process.py b/src/python/ensembl/io/genomio/fasta/process.py index fa845c0c7..4151b016a 100644 --- a/src/python/ensembl/io/genomio/fasta/process.py +++ b/src/python/ensembl/io/genomio/fasta/process.py @@ -16,8 +16,6 @@ __all__ = ["GFFParserError", "get_peptides_to_exclude", "prep_fasta_data"] -import logging - from pathlib import Path from os import PathLike from typing import List, Optional, Set @@ -28,6 +26,10 @@ from ensembl.io.genomio.utils.logging import setup_logging from ensembl.utils.argparse import ArgumentParser +# logging references module, but can be overridden by a specific logger +import logging +logger = logging # by default + exclude_seq_regions: List[str] = [] @@ -44,14 +46,14 @@ def get_peptides_to_exclude(genbank_path: PathLike, seqr_to_exclude: Set[str]) - with open_gz_file(genbank_path) as in_genbank: for record in SeqIO.parse(in_genbank, "genbank"): if record.id in seqr_to_exclude: - logging.info(f"Skip sequence {record.id}") + logger.info(f"Skip sequence {record.id}") for feat in record.features: if feat.type == "CDS": if "protein_id" in feat.qualifiers: feat_id = feat.qualifiers["protein_id"] peptides_to_exclude.add(feat_id[0]) else: - logging.critical(f"Peptide without peptide ID ${feat}") + logger.critical(f"Peptide without peptide ID ${feat}") raise GFFParserError(f"Peptide without peptide ID ${feat}") return peptides_to_exclude @@ -86,7 +88,7 @@ def prep_fasta_data( with open_gz_file(file_path) as in_fasta: for record in SeqIO.parse(in_fasta, "fasta"): if record.id in to_exclude: - logging.info(f"Skip record ${record.id}") + logger.info(f"Skip record ${record.id}") else: records.append(record) with Path(fasta_outfile).open("w") as out_fasta: @@ -102,8 +104,9 @@ def main() -> None: parser.add_argument("--peptide_mode", action="store_true", help="Process proteins instead of DNA") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose level logging") parser.add_argument("-d", "--debug", action="store_true", help="Debug level logging") + parser.add_argument("-l", "--logfile", required=False, type=str, help="file to log to") args = parser.parse_args() - setup_logging(args) + logger = setup_logging(args, name = __name__) prep_fasta_data(**vars(args))