diff --git a/src/python/ensembl/io/genomio/fasta/process.py b/src/python/ensembl/io/genomio/fasta/process.py index 10fa34171..4151b016a 100644 --- a/src/python/ensembl/io/genomio/fasta/process.py +++ b/src/python/ensembl/io/genomio/fasta/process.py @@ -23,8 +23,13 @@ from Bio import SeqIO from ensembl.io.genomio.utils.archive_utils import open_gz_file +from ensembl.io.genomio.utils.logging import setup_logging from ensembl.utils.argparse import ArgumentParser +# logging references module, but can be overridden by a specific logger +import logging +logger = logging # by default + exclude_seq_regions: List[str] = [] @@ -41,13 +46,14 @@ def get_peptides_to_exclude(genbank_path: PathLike, seqr_to_exclude: Set[str]) - with open_gz_file(genbank_path) as in_genbank: for record in SeqIO.parse(in_genbank, "genbank"): if record.id in seqr_to_exclude: - print(f"Skip sequence {record.id}") + logger.info(f"Skip sequence {record.id}") for feat in record.features: if feat.type == "CDS": if "protein_id" in feat.qualifiers: feat_id = feat.qualifiers["protein_id"] peptides_to_exclude.add(feat_id[0]) else: + logger.critical(f"Peptide without peptide ID ${feat}") raise GFFParserError(f"Peptide without peptide ID ${feat}") return peptides_to_exclude @@ -82,7 +88,7 @@ def prep_fasta_data( with open_gz_file(file_path) as in_fasta: for record in SeqIO.parse(in_fasta, "fasta"): if record.id in to_exclude: - print(f"Skip record ${record.id}") + logger.info(f"Skip record ${record.id}") else: records.append(record) with Path(fasta_outfile).open("w") as out_fasta: @@ -96,6 +102,11 @@ def main() -> None: parser.add_argument_src_path("--genbank_infile", help="Input GenBank GBFF file") parser.add_argument_dst_path("--fasta_outfile", required=True, help="Output FASTA file") parser.add_argument("--peptide_mode", action="store_true", help="Process proteins instead of DNA") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose level logging") + parser.add_argument("-d", "--debug", action="store_true", help="Debug level logging") + parser.add_argument("-l", "--logfile", required=False, type=str, help="file to log to") args = parser.parse_args() + logger = setup_logging(args, name = __name__) + prep_fasta_data(**vars(args)) diff --git a/src/python/ensembl/io/genomio/utils/__init__.py b/src/python/ensembl/io/genomio/utils/__init__.py index 99e82a3b7..c966048ae 100644 --- a/src/python/ensembl/io/genomio/utils/__init__.py +++ b/src/python/ensembl/io/genomio/utils/__init__.py @@ -16,3 +16,4 @@ from .archive_utils import * from .json_utils import * +from .logging import * diff --git a/src/python/ensembl/io/genomio/utils/logging.py b/src/python/ensembl/io/genomio/utils/logging.py new file mode 100644 index 000000000..1a87732d0 --- /dev/null +++ b/src/python/ensembl/io/genomio/utils/logging.py @@ -0,0 +1,79 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils to deal with logging.""" + +__all__ = ["setup_logging"] + +import argparse +import logging +from typing import Optional + + +# default logging formats +LOGGING_FORMAT = "%(asctime)s\t%(levelname)s\t%(message)s" +DATE_FORMAT = r"%Y-%m-%d_%H:%M:%S" + +# helper functions +def _prepare_console_handler(*, debug: bool = False, verbose: bool = False) -> logging.StreamHandler: + """setup console handler with different logging levels""" + console_h = logging.StreamHandler() + # set default logging level + if debug: + console_h.setLevel(logging.DEBUG) + elif verbose: + console_h.setLevel(logging.INFO) + return console_h + + +def _prepare_file_handler(filename: Optional[str] = None, *, debug: bool = False) -> Optional[logging.FileHandler]: + """setup file handler with default loggin.INFO level""" + """retuns None if no file name defined""" + if filename is None: + return None + + file_h = logging.FileHandler(filename) + + file_h.setLevel(logging.INFO) + if debug: + file_h.setLevel(logging.DEBUG) + + return file_h + + +def setup_logging( + args: argparse.Namespace, + *, # no positional arguments allowed after this + name: Optional[str] = None, + ) -> logging.Logger: + """Setup logging infrustucture.""" + """args: argparse.Namespace -- args with "debug", "verbose" and "logfile" options.""" + # get logger with a specified name (or the default one) + logger = logging.getLogger(name) + + # set up shared formatter + formatter = logging.Formatter(fmt = LOGGING_FORMAT, datefmt = DATE_FORMAT) + + # adding console handler + console_h = _prepare_console_handler(debug = args.debug, verbose = args.verbose) + console_h.setFormatter(formatter) + logger.addHandler(console_h) + + # adding logging to file + file_h = _prepare_file_handler(args.logfile) + if file_h: + file_h.setFormatter(formatter) + logger.addHandler(file_h) + + return logger