Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assembly.download module and Events.dump logging updated #203

Closed
wants to merge 9 commits into from
2 changes: 1 addition & 1 deletion pipelines/nextflow/modules/download/download_asm_data.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ process DOWNLOAD_ASM_DATA {

shell:
'''
assembly_download --accession !{meta.accession} --asm_download_dir ./
assembly_download --accession !{meta.accession} --asm_download_dir ./ -d
'''
}
3 changes: 2 additions & 1 deletion pipelines/nextflow/modules/events/dump_events.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ process DUMP_EVENTS {
--user '${server.user}' \
--password '${server.password}' \
--database '${db.database}' \
--output_file "events.txt"
--output_file "events.txt" \
-d
"""
}
11 changes: 9 additions & 2 deletions src/python/ensembl/brc4/runnable/core_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
import argparse
import re
from typing import Dict, List, Any
import logging

import mysql.connector


class CoreServer:
"""Basic interface to a MySQL server with core databases.

Expand Down Expand Up @@ -55,7 +55,10 @@ def connect(self) -> None:
)

def set_database(self, db_name: str) -> None:
self._connector.database = db_name
try:
self._connector.database = db_name
except:
logging.critical(f"Unknown database: '{db_name}' - Not located on host !")
Comment on lines -58 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is needed: if there is an exception here, it should be allowed to be raised and the script should stop anyway


def get_cursor(self):
return self._connector.cursor()
Expand Down Expand Up @@ -90,6 +93,10 @@ def get_cores(

dbs = self.get_all_cores()

# Check if there are databases returned from query to host
if not dbs:
logging.warning("No databases returned from query")

if prefix:
dbs = [db for db in dbs if db.startswith(f"{prefix}")]
if dbname_re:
Expand Down
28 changes: 20 additions & 8 deletions src/python/ensembl/io/genomio/assembly/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,6 @@ def retrieve_assembly_data(
download_path = Path(download_dir)
download_dir = download_path / accession

# Configure logging
log_file = f"{accession}_download.log"
reload(logging)
logging.basicConfig(
filename=log_file, format="%(levelname)s:%(message)s", filemode="w", level=logging.DEBUG
)

# Set and create dedicated dir for download
if not download_dir.is_dir():
download_dir.mkdir(parents=True)
Expand Down Expand Up @@ -281,6 +274,25 @@ def main() -> None:
parser.add_argument_dst_path(
"--download_dir", default=Path.cwd(), help="Folder where the data will be downloaded"
)
parser.add_argument("-v", "--verbose", action="store_true", required=False,
help="Verbose level logging")
parser.add_argument("-d", "--debug", action="store_true", required=False,
help="Debug level logging")
args = parser.parse_args()

retrieve_assembly_data(**vars(args))
# Configure logging
date_format='%Y/%m/%d_%I:%M:%S(%p)'
logging_format='%(asctime)s - %(levelname)s - %(message)s'
reload(logging)
log_level = None
if args.debug:
log_level = logging.DEBUG
elif args.verbose:
log_level = logging.INFO

logging.basicConfig(
format=logging_format, datefmt=date_format,
filemode="w", level=log_level
)

retrieve_assembly_data(args.accession, args.download_dir)
Comment on lines +277 to +298
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be rewritten with the new argparse and init_logging from ensembl-py

29 changes: 25 additions & 4 deletions src/python/ensembl/io/genomio/events/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

from datetime import datetime
from pathlib import Path
from importlib import reload
from typing import List, Dict, Optional, Set, Tuple
import logging

from ensembl.brc4.runnable.core_server import CoreServer
from ensembl.utils.argparse import ArgumentParser
Expand Down Expand Up @@ -307,7 +309,7 @@ def get_history(self) -> List:

events = []
for session in sessions:
print(f"Mapping session {session['release']}")
logging.info(f"Mapping session {session['release']}")
pairs = self.get_pairs(session["id"])
session_events = self.make_events(pairs)
for event in session_events:
Expand All @@ -327,7 +329,7 @@ def print_events(self, events: List[StableIdEvent], output_file: Path) -> None:

"""
if not events:
print("No events to print")
logging.info("No events to print")
return
with output_file.open("w") as out_fh:
for event in events:
Expand Down Expand Up @@ -379,7 +381,7 @@ def get_pairs(self, session_id: int) -> List[Pair]:
for db in cursor:
pair = Pair(old_id=db[0], new_id=db[1])
pairs.append(pair)
print(f"{len(pairs)} stable id events")
logging.debug(f"{len(pairs)} stable id events")
return pairs

def make_events(self, pairs: List[Pair]) -> List:
Expand Down Expand Up @@ -423,7 +425,7 @@ def make_events(self, pairs: List[Pair]) -> List:
stats[name] += 1

for stat, value in stats.items():
print(f"\t{stat} = {value}")
logging.info(f"\t{stat} = {value}")

return events

Expand Down Expand Up @@ -525,8 +527,27 @@ def main() -> None:
)
parser.add_server_arguments(include_database=True)
parser.add_argument_dst_path("--output_file", required=True, help="Output file")
parser.add_argument("-v", "--verbose", action="store_true", required=False,
help="Verbose level logging")
parser.add_argument("-d", "--debug", action="store_true", required=False,
help="Debug level logging")
args = parser.parse_args()

# Configure logging
date_format='%Y/%m/%d_%I:%M:%S(%p)'
logging_format='%(asctime)s - %(levelname)s - %(message)s'
reload(logging)
log_level = None
if args.debug:
log_level = logging.DEBUG
elif args.verbose:
log_level = logging.INFO

logging.basicConfig(
format=logging_format, datefmt=date_format,
filemode="w", level=log_level
)

Comment on lines +530 to +550
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise, rewrite with new system

# Start
factory = CoreServer(host=args.host, port=args.port, user=args.user, password=args.password)
factory.set_database(args.database)
Expand Down
35 changes: 28 additions & 7 deletions src/python/ensembl/io/genomio/events/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from os import PathLike
from pathlib import Path
import re
from importlib import reload
from typing import Dict, Generator, List, Optional, Tuple
import logging

from sqlalchemy.orm import Session

Expand Down Expand Up @@ -64,7 +66,7 @@ def add_event(self, event: IdEvent) -> None:


class EventCollection:
""" "Collection of events with loader/writer in various formats."""
"""Collection of events with loader/writer in various formats."""

def __init__(self) -> None:
self.events: List[IdEvent] = []
Expand Down Expand Up @@ -143,7 +145,7 @@ def _parse_gene_diff_event(self, event_string: str) -> Generator[Tuple[str, str,
splitter = f"({event_sep})"
parts = re.split(splitter, event_string)
if len(parts) != 3:
print(f"Wrong partition: from '{event_string}' to '{parts}'")
logging.warning(f"Wrong partition: from '{event_string}' to '{parts}'")
return
[from_ids, sep, to_ids] = parts
event_name = event_symbol[sep]
Expand All @@ -165,7 +167,7 @@ def remap_to_ids(self, map_dict: Dict[str, str]):
elif event.to_id in map_dict:
event.to_id = map_dict[event.to_id]
else:
print(f"No map for to_id {event.to_id}")
logging.info(f"No map for to_id {event.to_id}")
no_map += 1

if no_map:
Expand All @@ -174,7 +176,7 @@ def remap_to_ids(self, map_dict: Dict[str, str]):
def write_events_to_file(self, output_file: PathLike) -> None:
"""Write the events to a file."""
with Path(output_file).open("w") as out_fh:
print(f"Write {len(self.events)} events to {output_file}")
logging.info(f"Write {len(self.events)} events to {output_file}")
for event in self.events:
out_fh.write(f"{event}\n")

Expand All @@ -194,7 +196,7 @@ def write_events_to_db(self, session: Session, update: bool = False) -> None:
# Then, add the mapping, and the events for this mapping
for release, mapping in mappings.items():
if update:
print(f"Adding mapping for release {release} ({len(mapping.events)} events)")
logging.info(f"Adding mapping for release {release} ({len(mapping.events)} events)")
map_session = MappingSession(new_release=mapping.release, created=mapping.release_date)
session.add(map_session)
session.flush()
Expand All @@ -217,9 +219,9 @@ def write_events_to_db(self, session: Session, update: bool = False) -> None:
session.add(id_event)
session.commit()
else:
print(f"Found mapping for release {release} ({len(mapping.events)} events)")
logging.info(f"Found mapping for release {release} ({len(mapping.events)} events)")
if not update:
print("Run your command again with '--update' to add them")
logging.info("Run your command again with '--update' to add mapping events.")


def main() -> None:
Expand All @@ -235,8 +237,27 @@ def main() -> None:
),
)
parser.add_argument("--update", action="store_true", help="Make changes to the database")
parser.add_argument("-v", "--verbose", action="store_true", required=False,
help="Verbose level logging")
parser.add_argument("-d", "--debug", action="store_true", required=False,
help="Debug level logging")
args = parser.parse_args()

# Configure logging
date_format='%Y/%m/%d_%I:%M:%S(%p)'
logging_format='%(asctime)s - %(levelname)s - %(message)s'
reload(logging)
log_level = None
if args.debug:
log_level = logging.DEBUG
elif args.verbose:
log_level = logging.INFO

logging.basicConfig(
format=logging_format, datefmt=date_format,
filemode="w", level=log_level
)

Comment on lines +240 to +260
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, rewrite

# Start
dbc = DBConnection(args.url)

Expand Down