Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Code For Better Extensibility #129

Draft
wants to merge 15 commits into
base: realtime
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions deployment/docker_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from echoflow import echoflow_start
from echoflow.stages.echoflow_trigger import echoflow_trigger
from prefect import flow
from prefect.task_runners import SequentialTaskRunner
from prefect.task_runners import ThreadPoolTaskRunner
from typing import Any, Dict, Optional, Union

@flow(name="Docker-Trigger", task_runner=SequentialTaskRunner())
@flow(name="Docker-Trigger", task_runner=ThreadPoolTaskRunner(max_workers=1))
def docker_trigger(
dataset_config: Union[dict, str, Path],
pipeline_config: Union[dict, str, Path],
Expand Down
3 changes: 2 additions & 1 deletion echodataflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
echodataflow_create_prefect_profile,
echodataflow_start, get_active_profile,
load_profile)
from .utils.config_utils import extract_fs, glob_url, load_block
from .utils.config_utils import extract_fs, glob_url
from .utils.file_utils import get_ed_list, get_last_run_output, get_zarr_list
from .utils.filesystem_utils import load_block
from .docker_trigger import docker_trigger

try:
Expand Down
4 changes: 2 additions & 2 deletions echodataflow/docker_trigger.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from pathlib import Path
from echodataflow.stages.echodataflow_trigger import echodataflow_trigger
from prefect import flow
from prefect.task_runners import SequentialTaskRunner
from prefect.task_runners import ThreadPoolTaskRunner
from typing import Any, Dict, Optional, Union

@flow(name="docker-trigger-latest", task_runner=SequentialTaskRunner())
@flow(name="docker-trigger-latest", task_runner=ThreadPoolTaskRunner(max_workers=1))
def docker_trigger(
dataset_config: Union[dict, str, Path],
pipeline_config: Union[dict, str, Path],
Expand Down
4 changes: 3 additions & 1 deletion echodataflow/extensions/file_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
from prefect.client.schemas.objects import FlowRun, StateType
from prefect.states import Cancelled

from echodataflow.utils.config_utils import glob_url, handle_storage_options
from echodataflow.utils.config_utils import glob_url
from echodataflow.utils.file_utils import extract_fs, make_temp_folder
import shlex

from echodataflow.utils.filesystem_utils import handle_storage_options


@task
def download_temp_file(file_url: str, storage_options: Dict[str, Any], dest_dir: str, delete_on_transfer: bool, replace: bool) -> str:
Expand Down
8 changes: 4 additions & 4 deletions echodataflow/extensions/file_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

from echodataflow.models.datastore import StorageType
from echodataflow.models.run import EDFRun, FileDetails
from echodataflow.utils.config_utils import glob_url, load_block
from prefect.task_runners import SequentialTaskRunner
from echodataflow.utils.config_utils import glob_url
from echodataflow.utils.filesystem_utils import load_block

@task
def execute_flow(
Expand Down Expand Up @@ -112,10 +112,10 @@ def file_monitor(

new_run = datetime.now(tz=timezone.utc).isoformat()
edfrun: EDFRun = None
try:
try:
edfrun = load_block(
name=block_name,
type=StorageType.EDFRUN,
stype=StorageType.EDFRUN,
)
except Exception as e:
print(e)
Expand Down
8 changes: 3 additions & 5 deletions echodataflow/models/echodataflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@
Email: [email protected]
Date: August 22, 2023
"""
import json
from typing import Any, Dict, List, Optional

from prefect.blocks.core import Block
from pydantic import SecretStr

from .datastore import StorageType

Expand All @@ -43,9 +41,9 @@ class EchodataflowPrefectConfig(Block):
class Config:
arbitrary_types_allowed = True

prefect_account_id: str = None
prefect_api_key: str = None
prefect_workspace_id: str = None
prefect_account_id: Optional[str] = None
prefect_api_key: Optional[str] = None
prefect_workspace_id: Optional[str] = None
profile_name: str = None


Expand Down
7 changes: 3 additions & 4 deletions echodataflow/stages/echodataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
)

import echopype as ep
from echodataflow.utils.config_utils import load_block

from echodataflow.stages.echodataflow_trigger import echodataflow_trigger
from echodataflow.utils.filesystem_utils import handle_storage_options


def check_internet_connection(host="8.8.8.8", port=53, timeout=5):
Expand Down Expand Up @@ -236,7 +236,7 @@ def echodataflow_start(

# Try loading the Prefect config block
try:
load_block(name="echodataflow-config", type=StorageType.ECHODATAFLOW)
handle_storage_options({'block_name':"echodataflow-config", 'type':StorageType.ECHODATAFLOW})
except ValueError:
print(
"\nNo Prefect Cloud Configuration found. Creating Prefect Local named 'echodataflow-local'. Please add your prefect cloud "
Expand Down Expand Up @@ -546,8 +546,7 @@ def load_credential_configuration(sync: bool = False):
current_config = asyncio.run(current_config)
if current_config is not None:
for base in current_config.blocks:
block = load_block(base.name, base.type)
block_dict = dict(block)
block_dict = handle_storage_options(base)
block_dict["name"] = base.name
block_dict["active"] = base.active
block_dict["options"] = json.dumps(base.options)
Expand Down
126 changes: 22 additions & 104 deletions echodataflow/stages/echodataflow_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,24 @@
import json
from pathlib import Path
from typing import Optional, Union
from fastapi.encoders import jsonable_encoder

from fastapi.encoders import jsonable_encoder
from prefect import flow
from prefect.task_runners import SequentialTaskRunner
from prefect.blocks.core import Block
from prefect.variables import Variable
from prefect.task_runners import ThreadPoolTaskRunner

from echodataflow.aspects.singleton_echodataflow import Singleton_Echodataflow
from echodataflow.models.datastore import Dataset
from echodataflow.models.pipeline import Recipe
from echodataflow.utils import log_util
from echodataflow.utils.config_utils import (
check_config,
extract_config,
get_storage_options,
load_block,
)
from echodataflow.utils.config_utils import (check_config,
parse_dynamic_parameters,
parse_yaml_config)
from echodataflow.utils.filesystem_utils import handle_storage_options

from .subflows.initialization_flow import init_flow


@flow(name="Echodataflow", task_runner=SequentialTaskRunner())
@flow(name="Echodataflow", task_runner=ThreadPoolTaskRunner(max_workers=1))
def echodataflow_trigger(
dataset_config: Union[dict, str, Path],
pipeline_config: Union[dict, str, Path],
Expand Down Expand Up @@ -82,48 +78,15 @@ def echodataflow_trigger(
print("Pipeline output:", pipeline_output)
"""

if storage_options:
# Check if storage_options is a Block (fsspec storage) and convert it to a dictionary
if isinstance(storage_options, Block):
storage_options = get_storage_options(storage_options=storage_options)
elif isinstance(storage_options, dict) and storage_options.get("block_name"):
block = load_block(
name=storage_options.get("block_name"), type=storage_options.get("type")
)
storage_options = get_storage_options(block)
else:
storage_options = {}

if isinstance(dataset_config, Path):
dataset_config = str(dataset_config)
if isinstance(logging_config, Path):
logging_config = str(logging_config)
if isinstance(pipeline_config, Path):
pipeline_config = str(pipeline_config)
storage_options = handle_storage_options(storage_options=storage_options)

dataset_config_dict = parse_yaml_config(config=dataset_config, storage_options=storage_options)
logging_config_dict = parse_yaml_config(config=logging_config, storage_options=storage_options)
pipeline_config_dict = parse_yaml_config(config=pipeline_config, storage_options=storage_options)

if isinstance(json_data_path, Path):
json_data_path = str(json_data_path)

if isinstance(dataset_config, str):
if not dataset_config.endswith((".yaml", ".yml")):
raise ValueError("Configuration file must be a YAML!")
dataset_config_dict = extract_config(dataset_config, storage_options)
elif isinstance(dataset_config, dict):
dataset_config_dict = dataset_config

if isinstance(pipeline_config, str):
if not pipeline_config.endswith((".yaml", ".yml")):
raise ValueError("Configuration file must be a YAML!")
pipeline_config_dict = extract_config(pipeline_config, storage_options)
elif isinstance(pipeline_config, dict):
pipeline_config_dict = pipeline_config

if isinstance(logging_config, str):
if not logging_config.endswith((".yaml", ".yml")):
raise ValueError("Configuration file must be a YAML!")
logging_config_dict = extract_config(logging_config, storage_options)
else:
logging_config_dict = logging_config

log_util.log(
msg={
"msg": f"Dataset Configuration Loaded For This Run",
Expand All @@ -144,7 +107,6 @@ def echodataflow_trigger(
},
eflogging=dataset_config_dict.get("logging"),
)
print(dataset_config_dict)

log_util.log(
msg={
Expand Down Expand Up @@ -172,62 +134,18 @@ def echodataflow_trigger(
check_config(dataset_config_dict, pipeline_config_dict)
pipeline = Recipe(**pipeline_config_dict)
dataset = Dataset(**dataset_config_dict)

if options.get("storage_options_override") and not options["storage_options_override"]:
storage_options = {}
if not storage_options:
if dataset.output.storage_options:
if not dataset.output.storage_options.anon:
block = load_block(
name=dataset.output.storage_options.block_name,
type=dataset.output.storage_options.type,
)
dataset.output.storage_options_dict = get_storage_options(block)
else:
dataset.output.storage_options_dict = {"anon": dataset.output.storage_options.anon}

if dataset.args.storage_options:
if not dataset.args.storage_options.anon:
block = load_block(
name=dataset.args.storage_options.block_name,
type=dataset.args.storage_options.type,
)
dataset.args.storage_options_dict = get_storage_options(block)
else:
dataset.args.storage_options_dict = {"anon": dataset.args.storage_options.anon}
if dataset.args.group:
if dataset.args.group.storage_options:
if not dataset.args.group.storage_options.anon:
block = load_block(
name=dataset.args.group.storage_options.block_name,
type=dataset.args.group.storage_options.type,
)
dataset.args.group.storage_options_dict = get_storage_options(block)
else:
dataset.args.group.storage_options_dict = {
"anon": dataset.args.group.storage_options.anon
}
else:

if options.get("storage_options_override", False):
dataset.output.storage_options_dict = storage_options
dataset.args.storage_options_dict = storage_options
dataset.args.group.storage_options_dict = storage_options
else:
dataset.output.storage_options_dict = handle_storage_options(storage_options=dataset.output.storage_options)
dataset.args.storage_options_dict = handle_storage_options(storage_options=dataset.args.storage_options)
dataset.args.group.storage_options_dict = handle_storage_options(storage_options=dataset.args.group.storage_options)

edf = Singleton_Echodataflow(log_file=logging_config_dict, pipeline=pipeline, dataset=dataset)

print("\nInitiliazing Singleton Object")
Singleton_Echodataflow(log_file=logging_config_dict, pipeline=pipeline, dataset=dataset)

if dataset.args.parameters and dataset.args.parameters.file_name and dataset.args.parameters.file_name == "VAR_RUN_NAME":
var: Variable = Variable.get("run_name", default=None)
if not var:
raise ValueError("No variable found for name `run_name`")
else:
dataset.args.parameters.file_name = var.value

# Change made to enable dynamic execution using an extension
if options and options.get("file_name"):
dataset.args.parameters.file_name = options.get("file_name")
dataset = parse_dynamic_parameters(dataset, options=options)

if options and options.get("run_name"):
dataset.name = options.get("run_name")

print("\nReading Configurations")
return init_flow(config=dataset, pipeline=pipeline, json_data_path=json_data_path)
Loading
Loading