diff --git a/CODEOWNERS b/CODEOWNERS index eec474fb..c328e105 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -7,4 +7,3 @@ # These owners will be the default owners for everything in the repo. # Unless a later match takes precedence, they will be requested for # review when someone opens a pull request. - diff --git a/ai/setup.py b/ai/setup.py index 9b73f924..e38f72d2 100644 --- a/ai/setup.py +++ b/ai/setup.py @@ -8,13 +8,7 @@ version="0.1", packages=find_packages("src"), package_dir={"": "src"}, - entry_points={ - "console_scripts": [ - "itwinai=itwinai.cli:app" - ] - }, + entry_points={"console_scripts": ["itwinai=itwinai.cli:app"]}, # Pip dependencies - install_requires=[ - 'jsonargparse[signatures]>=4.17.0' - ] + install_requires=["jsonargparse[signatures]>=4.17.0"], ) diff --git a/ai/src/itwinai/plmodels/__init__.py b/ai/src/itwinai/backend/__init__.py similarity index 100% rename from ai/src/itwinai/plmodels/__init__.py rename to ai/src/itwinai/backend/__init__.py diff --git a/ai/src/itwinai/backend/components.py b/ai/src/itwinai/backend/components.py new file mode 100644 index 00000000..2aa178e8 --- /dev/null +++ b/ai/src/itwinai/backend/components.py @@ -0,0 +1,63 @@ +from abc import ABCMeta, abstractmethod + + +class Executable(metaclass=ABCMeta): + @abstractmethod + def execute(self, args): + pass + + @abstractmethod + def setup(self, args): + pass + + +class Trainer(Executable): + @abstractmethod + def train(self, data): + pass + + +class DataGetter(Executable): + @abstractmethod + def load(self, args): + pass + + +class DataPreproc(Executable): + @abstractmethod + def preproc(self, args): + pass + + +class StatGetter(Executable): + @abstractmethod + def stats(self, args): + pass + + +class Evaluator(Executable): + @abstractmethod + def evaluate(self, args): + pass + + +class Saver(Executable): + @abstractmethod + def save(self, args): + pass + + +class Executor(Executable): + @abstractmethod + def execute(self, pipeline): + pass + + @abstractmethod + def setup(self, pipeline): + pass + + +class Logger(metaclass=ABCMeta): + @abstractmethod + def log(self): + pass diff --git a/ai/src/itwinai/backend/tensorflow/__init__.py b/ai/src/itwinai/backend/tensorflow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ai/src/itwinai/backend/tensorflow/executor.py b/ai/src/itwinai/backend/tensorflow/executor.py new file mode 100644 index 00000000..09af0caf --- /dev/null +++ b/ai/src/itwinai/backend/tensorflow/executor.py @@ -0,0 +1,15 @@ +from ..components import Executor + + +class TensorflowExecutor(Executor): + def __init__(self, args): + self.args = args + + def execute(self, pipeline): + args = None + for executable in pipeline: + args = executable.execute(args) + + def setup(self, pipeline): + for executable in pipeline: + executable.setup(self.args) diff --git a/ai/src/itwinai/backend/tensorflow/loggers.py b/ai/src/itwinai/backend/tensorflow/loggers.py new file mode 100644 index 00000000..3271f23f --- /dev/null +++ b/ai/src/itwinai/backend/tensorflow/loggers.py @@ -0,0 +1,22 @@ +import wandb +import mlflow +import mlflow.keras + +from ..components import Logger + + +class WanDBLogger(Logger): + def __init__(self): + pass + + def log(self): + wandb.init(config={"bs": 12}) + + +class MLFlowLogger(Logger): + def __init__(self): + mlflow.set_tracking_uri("http://127.0.0.1:5000") + mlflow.set_experiment("test-experiment") + + def log(self): + mlflow.keras.autolog() diff --git a/ai/src/itwinai/backend/tensorflow/utils.py b/ai/src/itwinai/backend/tensorflow/utils.py new file mode 100644 index 00000000..96cc536d --- /dev/null +++ b/ai/src/itwinai/backend/tensorflow/utils.py @@ -0,0 +1,13 @@ +import keras +import json + + +def model_to_json(model: keras.Model, filepath: str): + with open(filepath, "w") as f: + json.dump(model.to_json(), f) + + +def model_from_json(filepath: str) -> keras.Model: + with open(filepath, "r") as f: + config = json.load(f) + return keras.models.model_from_json(config) diff --git a/ai/src/itwinai/backend/torch/__init__.py b/ai/src/itwinai/backend/torch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ai/src/itwinai/backend/torch/executor.py b/ai/src/itwinai/backend/torch/executor.py new file mode 100644 index 00000000..191b88a2 --- /dev/null +++ b/ai/src/itwinai/backend/torch/executor.py @@ -0,0 +1,16 @@ +from ..components import Executor + + +class TorchExecutor(Executor): + def __init__(self): + pass + + def execute(self, pipeline): + args = None + for executable in pipeline: + args = executable.execute(args) + + def setup(self, pipeline): + args = None + for executable in pipeline: + args = executable.setup(args) diff --git a/ai/src/itwinai/backend/torch/loggers.py b/ai/src/itwinai/backend/torch/loggers.py new file mode 100644 index 00000000..e146a590 --- /dev/null +++ b/ai/src/itwinai/backend/torch/loggers.py @@ -0,0 +1,22 @@ +import wandb +import mlflow +import mlflow.keras + +from ..components import Logger + + +class WanDBLogger(Logger): + def __init__(self): + pass + + def log(self): + wandb.init(config={"bs": 12}) + + +class MLFlowLogger(Logger): + def __init__(self): + mlflow.set_tracking_uri("http://127.0.0.1:5000") + mlflow.set_experiment("test-experiment") + + def log(self): + mlflow.pytorch.autolog() diff --git a/ai/src/itwinai/backend/utils.py b/ai/src/itwinai/backend/utils.py new file mode 100644 index 00000000..deaab45a --- /dev/null +++ b/ai/src/itwinai/backend/utils.py @@ -0,0 +1,13 @@ +import yaml + + +# Parse (part of) YAML loaded in memory +def parse_pipe_config(yaml_file, parser): + with open(yaml_file, "r", encoding="utf-8") as f: + try: + config = yaml.safe_load(f) + except yaml.YAMLError as exc: + print(exc) + raise exc + + return parser.parse_object(config) diff --git a/ai/src/itwinai/cli.py b/ai/src/itwinai/cli.py index 051377fb..2efc35df 100644 --- a/ai/src/itwinai/cli.py +++ b/ai/src/itwinai/cli.py @@ -21,18 +21,9 @@ @app.command() def train( - train_dataset: str = typer.Option( - "unk", - help="Path to training dataset." - ), - config: str = typer.Option( - "unk", - help="Path to training configuration file." - ), - ml_logs: str = typer.Option( - "ml-logs/", - help="Path to logs storage." - ) + train_dataset: str = typer.Option("unk", help="Path to training dataset."), + config: str = typer.Option("unk", help="Path to training configuration file."), + ml_logs: str = typer.Option("ml-logs/", help="Path to logs storage."), ): """ Train a neural network defined as a Pytorch Lightning model. @@ -43,22 +34,13 @@ def train( from lightning.pytorch.cli import LightningCLI from omegaconf import DictConfig, OmegaConf - from itwinai.utils import ( - load_yaml_with_deps, - check_server, - flatten_dict - ) - from itwinai.plmodels.base import ( - ItwinaiBasePlModule, - ItwinaiBasePlDataModule - ) + from itwinai.utils import load_yaml_with_deps, check_server, flatten_dict + from itwinai.plmodels.base import ItwinaiBasePlModule, ItwinaiBasePlDataModule # Replicate args under cli field, to be used in imported config files - cli_conf = dict(cli=dict( - train_dataset=train_dataset, - config=config, - ml_logs=ml_logs - )) + cli_conf = dict( + cli=dict(train_dataset=train_dataset, config=config, ml_logs=ml_logs) + ) cli_conf = OmegaConf.create(cli_conf) # os.makedirs(ml_logs, exist_ok=True) @@ -68,22 +50,22 @@ def train( train_config = OmegaConf.to_container(train_config, resolve=True) # Setup logger - if os.path.exists('checkpoints'): + if os.path.exists("checkpoints"): # Remove old checkpoints - shutil.rmtree('checkpoints') + shutil.rmtree("checkpoints") # Check if MLflow server is reachable if not check_server(ml_logs): raise RuntimeError("MLFlow server not reachable!") - log_conf = train_config['logger'] + log_conf = train_config["logger"] # mlflow.set_tracking_uri('file:' + ml_logs) mlflow.set_tracking_uri(ml_logs) - mlflow.set_experiment(log_conf['experiment_name']) + mlflow.set_experiment(log_conf["experiment_name"]) mlflow.pytorch.autolog( - log_every_n_epoch=log_conf['log_every_n_epoch'], - log_every_n_step=log_conf['log_every_n_steps'], - registered_model_name=log_conf['registered_model_name'] + log_every_n_epoch=log_conf["log_every_n_epoch"], + log_every_n_step=log_conf["log_every_n_steps"], + registered_model_name=log_conf["registered_model_name"], ) # Note: we use autolog and MlFlowLogger combined: # - MlFlow logger provides better flexibility @@ -91,16 +73,16 @@ def train( # Ref: https://github.com/Lightning-AI/lightning/discussions/11197 # Load training configuration - lightning_conf = train_config['train']['conf'] + lightning_conf = train_config["train"]["conf"] # lightning_conf = OmegaConf.to_container(lightning_conf, resolve=True) # Start Mlflow run - with mlflow.start_run(description=log_conf['description']): + with mlflow.start_run(description=log_conf["description"]): # Log hyperparameters config_params = copy.copy(train_config) - config_params['cli.train_dataset'] = train_dataset - config_params['cli.ml_logs'] = ml_logs - config_params['cli.config'] = config + config_params["cli.train_dataset"] = train_dataset + config_params["cli.ml_logs"] = ml_logs + config_params["cli.config"] = config mlflow.log_params(flatten_dict(config_params)) # Save config file used for this specific training run @@ -109,73 +91,66 @@ def train( # Update lightning MLFlow logger constructor args # Infer MlFlow conf from pre-configured mlflow client - lightning_conf['trainer']['logger']['init_args'].update(dict( - experiment_name=mlflow.get_experiment( - mlflow.active_run().info.experiment_id - ).name, - tracking_uri=mlflow.get_tracking_uri(), - log_model='all', - run_id=mlflow.active_run().info.run_id, - save_dir=None - )) + lightning_conf["trainer"]["logger"]["init_args"].update( + dict( + experiment_name=mlflow.get_experiment( + mlflow.active_run().info.experiment_id + ).name, + tracking_uri=mlflow.get_tracking_uri(), + log_model="all", + run_id=mlflow.active_run().info.run_id, + save_dir=None, + ) + ) # Append CSVLogger in front: # https://github.com/Lightning-AI/lightning/issues/16310#issuecomment-1404177131 csv_log_conf = dict( - class_path='lightning.pytorch.loggers.CSVLogger', - init_args=dict(save_dir='./.tmp') + class_path="lightning.pytorch.loggers.CSVLogger", + init_args=dict(save_dir="./.tmp"), ) - lightning_conf['trainer']['logger'] = [ + lightning_conf["trainer"]["logger"] = [ csv_log_conf, - lightning_conf['trainer']['logger'] + lightning_conf["trainer"]["logger"], ] # Reset argv before using Lightning CLI old_argv = sys.argv - sys.argv = ['some_script_placeholder.py'] + sys.argv = ["some_script_placeholder.py"] + print(lightning_conf) cli = LightningCLI( args=lightning_conf, model_class=ItwinaiBasePlModule, datamodule_class=ItwinaiBasePlDataModule, run=False, - save_config_kwargs={"overwrite": True, - "config_filename": "pl-training.yml"}, + save_config_kwargs={ + "overwrite": True, + "config_filename": "pl-training.yml", + }, subclass_mode_model=True, - subclass_mode_data=True + subclass_mode_data=True, ) print(cli.trainer.log_dir) sys.argv = old_argv # Train + validation, and test cli.trainer.fit(cli.model, datamodule=cli.datamodule) cli.trainer.test( - dataloaders=cli.datamodule, - datamodule=cli.datamodule, - ckpt_path='best' + dataloaders=cli.datamodule, datamodule=cli.datamodule, ckpt_path="best" ) # Save updated lightning conf as an mlflow artifact - mlflow.log_artifact( - os.path.join(cli.trainer.log_dir, "pl-training.yml") - ) + mlflow.log_artifact(os.path.join(cli.trainer.log_dir, "pl-training.yml")) @app.command() def predict( input_dataset: str = typer.Option( - "unk", - help="Path to dataset of unseen data to make predictions." - ), - config: str = typer.Option( - "unk", - help="Path to inference configuration file." + "unk", help="Path to dataset of unseen data to make predictions." ), + config: str = typer.Option("unk", help="Path to inference configuration file."), predictions_location: str = typer.Option( - "preds/", - help="Where to save predictions." - ), - ml_logs: str = typer.Option( - "ml-logs/", - help="Path to MLFLow logs." + "preds/", help="Where to save predictions." ), + ml_logs: str = typer.Option("ml-logs/", help="Path to MLFLow logs."), ): """ Apply a pre-trained neural network to a set of unseen data. @@ -189,18 +164,17 @@ def predict( from omegaconf import DictConfig, OmegaConf from itwinai.utils import load_yaml_with_deps, load_yaml - from itwinai.plmodels.base import ( - ItwinaiBasePlModule, - ItwinaiBasePlDataModule - ) + from itwinai.plmodels.base import ItwinaiBasePlModule, ItwinaiBasePlDataModule # Replicate args under cli field, to be used in imported config files - cli_conf = dict(cli=dict( - input_dataset=input_dataset, - config=config, - predictions_location=predictions_location, - ml_logs=ml_logs - )) + cli_conf = dict( + cli=dict( + input_dataset=input_dataset, + config=config, + predictions_location=predictions_location, + ml_logs=ml_logs, + ) + ) cli_conf = OmegaConf.create(cli_conf) os.makedirs(predictions_location, exist_ok=True) @@ -208,7 +182,7 @@ def predict( ml_conf = OmegaConf.merge(ml_conf, cli_conf) # print(OmegaConf.to_yaml(ml_conf)) ml_conf = OmegaConf.to_container(ml_conf, resolve=True) - ml_conf = ml_conf['inference'] + ml_conf = ml_conf["inference"] os.makedirs(predictions_location, exist_ok=True) @@ -217,7 +191,7 @@ def predict( # Check if run ID exists try: - mlflow.get_run(ml_conf['run_id']) + mlflow.get_run(ml_conf["run_id"]) # mlflow_client.get_run(ml_conf['run_id']) except MlflowException: logging.warning( @@ -226,55 +200,53 @@ def predict( f"'{ml_conf['experiment_name']}' instead." ) runs = mlflow.search_runs( - experiment_names=[ml_conf['experiment_name']], - + experiment_names=[ml_conf["experiment_name"]], ) - new_run_id = runs[runs.status == 'FINISHED'].iloc[0]['run_id'] - ml_conf['run_id'] = new_run_id + new_run_id = runs[runs.status == "FINISHED"].iloc[0]["run_id"] + ml_conf["run_id"] = new_run_id logging.warning(f"Using Run ID: '{new_run_id}'") # Download training configuration train_conf_path = mlflow.artifacts.download_artifacts( - run_id=ml_conf['run_id'], - artifact_path=ml_conf['train_config_artifact_path'], - dst_path='tmp/', - tracking_uri=mlflow.get_tracking_uri() + run_id=ml_conf["run_id"], + artifact_path=ml_conf["train_config_artifact_path"], + dst_path="tmp/", + tracking_uri=mlflow.get_tracking_uri(), ) # Download last ckpt ckpt_path = mlflow.artifacts.download_artifacts( - run_id=ml_conf['run_id'], - artifact_path=ml_conf['ckpt_path'], - dst_path='tmp/', - tracking_uri=mlflow.get_tracking_uri() + run_id=ml_conf["run_id"], + artifact_path=ml_conf["ckpt_path"], + dst_path="tmp/", + tracking_uri=mlflow.get_tracking_uri(), ) # Instantiate PL model lightning_conf = load_yaml(train_conf_path) - if ml_conf['conf'] is not None: + if ml_conf["conf"] is not None: # Override training configuration with the one # provided during inference. # Example: predictions dataset is different # from training dataset - lightning_conf.update(ml_conf['conf']) + lightning_conf.update(ml_conf["conf"]) # Reset argv before using Lightning CLI old_argv = sys.argv - sys.argv = ['some_script_placeholder.py'] + sys.argv = ["some_script_placeholder.py"] cli = LightningCLI( args=lightning_conf, model_class=ItwinaiBasePlModule, run=False, subclass_mode_model=True, subclass_mode_data=True, - save_config_callback=None + save_config_callback=None, ) sys.argv = old_argv # Load best model loaded_model = cli.model.load_from_checkpoint( - ckpt_path, - lightning_conf['model']['init_args'] + ckpt_path, lightning_conf["model"]["init_args"] ) # Load Data module @@ -284,39 +256,31 @@ def predict( # Predict predictions = trainer.predict( - loaded_model, - datamodule=loaded_data_module + loaded_model, datamodule=loaded_data_module ) # , ckpt_path='best') - pred_class_names = loaded_data_module.preds_to_names( - torch.cat(predictions) - ) + pred_class_names = loaded_data_module.preds_to_names(torch.cat(predictions)) # Save list of predictions as class names - preds_file = os.path.join(predictions_location, 'predictions.txt') - with open(preds_file, 'w') as preds_file: - preds_file.write('\n'.join(pred_class_names)) + preds_file = os.path.join(predictions_location, "predictions.txt") + with open(preds_file, "w") as preds_file: + preds_file.write("\n".join(pred_class_names)) @app.command() def mlflow_ui( - path: str = typer.Option( - "ml-logs/", - help="Path to logs storage." - ), + path: str = typer.Option("ml-logs/", help="Path to logs storage."), ): """ Visualize Mlflow logs. """ import subprocess + subprocess.run(f"mlflow ui --backend-store-uri {path}".split()) @app.command() def datasets( - use_case: str = typer.Option( - "./use-cases/mnist", - help="Path to use case files." - ), + use_case: str = typer.Option("./use-cases/mnist", help="Path to use case files."), ): """List datasets of an use case.""" from rich.console import Console @@ -325,46 +289,28 @@ def datasets( from pathlib import Path from omegaconf import OmegaConf - datasets_reg = load_yaml( - os.path.join(use_case, 'meta.yml') - ) + datasets_reg = load_yaml(os.path.join(use_case, "meta.yml")) # datasets_reg = OmegaConf.create(datasets_reg) - datasets_reg = OmegaConf.to_container( - OmegaConf.create(datasets_reg), - resolve=True - ) + datasets_reg = OmegaConf.to_container(OmegaConf.create(datasets_reg), resolve=True) rows = [] - columns = [ - "Name", - "Description", - "Location" - ] - for ds_name, ds_info in datasets_reg['datasets'].items(): - rows.append( - [ - ds_name, - ds_info['doc'], - ds_info['location'] - ] - ) + columns = ["Name", "Description", "Location"] + for ds_name, ds_info in datasets_reg["datasets"].items(): + rows.append([ds_name, ds_info["doc"], ds_info["location"]]) use_case_dir = os.path.basename(Path(use_case)) table = Table(title=f"Datasets registry for '{use_case_dir}' use case") for column in columns: table.add_column(column) for row in rows: - table.add_row(*row, style='bright_green') + table.add_row(*row, style="bright_green") console = Console() console.print(table) @app.command() def workflows( - use_case: str = typer.Option( - "./use-cases/mnist", - help="Path to use case files." - ), + use_case: str = typer.Option("./use-cases/mnist", help="Path to use case files."), ): """List workflows of an use case.""" from omegaconf import OmegaConf @@ -372,38 +318,32 @@ def workflows( from rich.table import Table from pathlib import Path from itwinai.utils import load_yaml_with_deps + use_case_dir = os.path.basename(Path(use_case)) - wf_files = filter(lambda itm: itm.endswith( - "-workflow.yml"), os.listdir(use_case)) - columns = [ - "Step", - "Description", - "Command", - "Env location", - "Env file" - ] + wf_files = filter(lambda itm: itm.endswith("-workflow.yml"), os.listdir(use_case)) + columns = ["Step", "Description", "Command", "Env location", "Env file"] for workflow_file in wf_files: - wf = load_yaml_with_deps( - os.path.join(use_case, workflow_file) - ) - wf_name = workflow_file.split('.')[0] + wf = load_yaml_with_deps(os.path.join(use_case, workflow_file)) + wf_name = workflow_file.split(".")[0] rows = [] for step in wf.steps: step = OmegaConf.to_container(step, resolve=True) step_name, step_data = list(step.items())[0] - rows.append([ - step_name, - step_data['doc'], - step_data['command'], - step_data['env']['prefix'], - step_data['env']['file'], - ]) + rows.append( + [ + step_name, + step_data["doc"], + step_data["command"], + step_data["env"]["prefix"], + step_data["env"]["file"], + ] + ) table = Table(title=f"'{wf_name}' for '{use_case_dir}' use case") for column in columns: table.add_column(column) for row in rows: - table.add_row(*row, style='bright_green') + table.add_row(*row, style="bright_green") console = Console() console.print(table) diff --git a/ai/src/itwinai/models/cnn.py b/ai/src/itwinai/models/cnn.py deleted file mode 100644 index 67e9da20..00000000 --- a/ai/src/itwinai/models/cnn.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Convolutional Neural Networks. -""" - -from torch import nn -import torch.nn.functional as F - - -class LeNetv5(nn.Module): - - def __init__(self): - super(LeNetv5, self).__init__() - self.conv1 = nn.Conv2d(1, 6, 5, padding=2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 5 * 5, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - """ - One forward pass through the network. - - Args: - x: input - """ - x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) - x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2)) - x = x.view(-1, self.num_flat_features(x)) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x diff --git a/ai/src/itwinai/models/tensorflow/__init__.py b/ai/src/itwinai/models/tensorflow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ai/src/itwinai/models/tensorflow/cyclones_vgg.py b/ai/src/itwinai/models/tensorflow/cyclones_vgg.py new file mode 100644 index 00000000..c829ac82 --- /dev/null +++ b/ai/src/itwinai/models/tensorflow/cyclones_vgg.py @@ -0,0 +1,681 @@ +import tensorflow as tf + + +def custom_VGG_V1(patch_size, channels, activation, regularizer): + model = tf.keras.Sequential() + + model.add( + tf.keras.layers.Conv2D( + input_shape=(patch_size, patch_size, channels[0]), + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(2, 2), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(2, 2), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) + + model.add(tf.keras.layers.Flatten()) + model.add( + tf.keras.layers.Dense( + units=512, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=256, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=128, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=64, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add(tf.keras.layers.Dense(channels[1])) + + return model + + +def custom_VGG_V2(patch_size, channels, activation, regularizer): + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Input(shape=(patch_size, patch_size, channels[0]))) + + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(2, 2), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(2, 2), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(2, 2), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.Flatten()) + + model.add( + tf.keras.layers.Dense( + units=1024, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=512, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=256, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=128, activation=activation, kernel_regularizer=regularizer + ) + ) + + model.add(tf.keras.layers.Dense(channels[1])) + + return model + + +def custom_VGG_V3(patch_size, channels, activation, regularizer): + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Input(shape=(patch_size, patch_size, channels[0]))) + + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=32, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=64, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=128, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=256, + kernel_size=(3, 3), + padding="same", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=512, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add( + tf.keras.layers.Conv2D( + filters=1024, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + model.add( + tf.keras.layers.Conv2D( + filters=1024, + kernel_size=(2, 2), + padding="valid", + activation=activation, + kernel_regularizer=regularizer, + ) + ) + + model.add(tf.keras.layers.Flatten()) + + model.add( + tf.keras.layers.Dense( + units=1024, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=512, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=512, activation=activation, kernel_regularizer=regularizer + ) + ) + model.add( + tf.keras.layers.Dense( + units=256, activation=activation, kernel_regularizer=regularizer + ) + ) + + model.add(tf.keras.layers.Dense(channels[1])) + + return model + + +""" +def VGG_V4(patch_size, label_no_cyclone, channels, activation, regularizer): + model = tf.keras.Sequential() + + model.add(tf.keras.layers.Input(shape=(patch_size, patch_size, channels[0]))) + + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2))) + + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2))) + + model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2))) + + model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.Flatten()) + + model.add(tf.keras.layers.Dense(units=1024, activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Dense(units=512, activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Dense(units=256, activation=activation, kernel_regularizer=regularizer)) + model.add(tf.keras.layers.Dense(units=128, activation=activation, kernel_regularizer=regularizer)) + + model.add(tf.keras.layers.Dense(channels[1])) + model.add(PositionDiscretizationLayer(label_no_cyclone=label_no_cyclone, patch_size=patch_size)) + + return model +""" + + +def ModelV5(patch_size, channels, last_activation, kernel_size=3): + # kernel initializer + initializer = tf.random_normal_initializer(0.0, 0.02) + + # input layer + inputs = tf.keras.layers.Input(shape=(patch_size, patch_size, channels[0])) + + conv_blocks = [ + ConvBlock( + filters=32, + initializer=initializer, + kernel_size=kernel_size, + strides=2, + apply_batchnorm=True, + apply_dropout=False, + apply_gaussian_noise=True, + ), + ConvBlock( + filters=64, + initializer=initializer, + kernel_size=kernel_size, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=128, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=True, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=256, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=True, + ), + ConvBlock( + filters=512, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=False, + apply_dropout=False, + apply_gaussian_noise=False, + ), + ConvBlock( + filters=1024, + initializer=initializer, + kernel_size=3, + strides=2, + apply_batchnorm=True, + apply_dropout=True, + apply_gaussian_noise=False, + ), + ] + x = inputs + for block in conv_blocks: + x = block(x) + + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense( + units=1024, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=512, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=256, activation="relu", kernel_initializer=initializer + )(x) + x = tf.keras.layers.Dense( + units=128, activation="relu", kernel_initializer=initializer + )(x) + + outputs = tf.keras.layers.Dense( + channels[1], activation=last_activation, kernel_initializer=initializer + )(x) + + return tf.keras.Model(inputs=inputs, outputs=outputs, name="model_V5") + + +""" +from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense +from tensorflow.keras.models import Sequential +def custom_VGG_V1(patch_size, channels, activation, regularizer): + model = Sequential() + #model.add(Conv2D(input_shape=(40,40,len(variables_list)), filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(input_shape=(patch_size,patch_size,channels[0]), filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2))) + model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2))) + model.add(Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + #model.add(Conv2D(filters=256, kernel_size=(2,2), padding="same", activation=activation, kernel_regularizer=regularizer)) + model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2))) + model.add(Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(Conv2D(filters=512, kernel_size=(2,2), padding="valid", activation=activation, kernel_regularizer=regularizer)) + model.add(MaxPooling2D(pool_size=(2,2))) #,strides=(2,2))) + # model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")) + # model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")) + # model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")) + # model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) + model.add(Flatten()) + model.add(Dense(units=512, activation=activation, kernel_regularizer=regularizer)) + model.add(Dense(units=256, activation=activation, kernel_regularizer=regularizer)) + model.add(Dense(units=128, activation=activation, kernel_regularizer=regularizer)) + model.add(Dense(units=64, activation=activation, kernel_regularizer=regularizer)) + model.add(Dense(channels[1])) + + return model +""" diff --git a/ai/src/itwinai/models/tensorflow/mnist.py b/ai/src/itwinai/models/tensorflow/mnist.py new file mode 100644 index 00000000..99575e7e --- /dev/null +++ b/ai/src/itwinai/models/tensorflow/mnist.py @@ -0,0 +1,24 @@ +import keras + +from typing import List + + +class MNIST_Model(keras.Model): + def __init__(self, input_shape: List[int], output_shape: int): + super().__init__() + + self.model = keras.Sequential( + [ + keras.Input(shape=input_shape), + keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dropout(0.5), + keras.layers.Dense(output_shape, activation="softmax"), + ] + ) + + def call(self, inputs): + return self.model(inputs) diff --git a/ai/src/itwinai/models/torch/__init__.py b/ai/src/itwinai/models/torch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ai/src/itwinai/models/torch/mnist.py b/ai/src/itwinai/models/torch/mnist.py new file mode 100644 index 00000000..d68fa8d8 --- /dev/null +++ b/ai/src/itwinai/models/torch/mnist.py @@ -0,0 +1,75 @@ +import torch +import torch.nn as nn +import lightning as L + +from torch.nn import functional as F + + +class MNISTModel(L.LightningModule): + """ + Simple PL model for MNIST. + Adapted from + https://lightning.ai/docs/pytorch/stable/notebooks/lightning_examples/mnist-hello-world.html + """ + + def __init__( + self, + hidden_size: int = 64, + ): + super().__init__() + + # Automatically save constructor args as hyperparameters + self.save_hyperparameters() + + # Set our init args as class attributes + self.hidden_size = hidden_size + + # Hardcode some dataset specific attributes + self.num_classes = 10 + self.dims = (1, 28, 28) + channels, width, height = self.dims + + # Define PyTorch model + self.model = nn.Sequential( + nn.Flatten(), + nn.Linear(channels * width * height, hidden_size), + nn.ReLU(), + nn.Dropout(0.1), + nn.Linear(hidden_size, hidden_size), + nn.ReLU(), + nn.Dropout(0.1), + nn.Linear(hidden_size, self.num_classes), + ) + + def forward(self, x): + x = self.model(x) + return F.log_softmax(x, dim=1) + + def training_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + + # Log metrics with autolog + self.log("train_loss", loss, on_step=True, on_epoch=True) + + return loss + + def validation_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + preds = torch.argmax(logits, dim=1) + self.log("val_loss", loss, prog_bar=True, on_step=True, on_epoch=True) + + def test_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + self.log("test_loss", loss) + + def predict_step(self, batch, batch_idx, dataloader_idx=0): + x, _ = batch + logits = self(x) + preds = torch.argmax(logits, dim=1) + return preds diff --git a/ai/src/itwinai/plmodels/base.py b/ai/src/itwinai/plmodels/base.py deleted file mode 100644 index fe88963b..00000000 --- a/ai/src/itwinai/plmodels/base.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Base classes for pytorch lightning for itwinai. -""" - -from typing import Any, List -import lightning as L - - -class ItwinaiBasePlModule(L.LightningModule): - """ - Base class for pytorch lightning models for itwinai. - """ - pass - - -class ItwinaiBasePlDataModule(L.LightningDataModule): - """ - Base class for pytorch lightning models for itwinai. - """ - - def preds_to_names( - self, - preds: Any - ) -> List[str]: - """Convert predictions to class names.""" - raise NotImplementedError diff --git a/ai/src/itwinai/plmodels/mnist.py b/ai/src/itwinai/plmodels/mnist.py deleted file mode 100644 index ebd4c379..00000000 --- a/ai/src/itwinai/plmodels/mnist.py +++ /dev/null @@ -1,246 +0,0 @@ -""" -Pytorch Lightning models for MNIST dataset. -""" - -from typing import Callable, List, Optional, Union -from lightning.pytorch.utilities.types import EVAL_DATALOADERS -import torch -from torch import nn -from torch.nn import functional as F -from torch.utils.data import DataLoader, random_split -from torchmetrics import Accuracy -from torchvision import transforms -from torchvision.datasets import MNIST - -from .base import ItwinaiBasePlModule, ItwinaiBasePlDataModule - - -class MNISTDataModule(ItwinaiBasePlDataModule): - """Pytorch Lightning data module for MNIST dataset - - Args: - data_dir (str): path to dataset directory - batch_size (int, optional): batch size. Defaults to 32. - train_prop (float, optional): proportion of examples in the train - split, after dataset is split into train and validation. - Defaults to 0.7. - transform (Optional[Callable], optional): transformations to apply - to the loaded images. Defaults to None. - """ - - def __init__( - self, - data_dir: str, - batch_size: int = 32, - train_prop: float = 0.7, - transform: Optional[Callable] = None, - ) -> None: - super().__init__() - self.data_dir = data_dir - self.batch_size = batch_size - self.train_prop = train_prop - if transform is None: - self.transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)), - ] - ) - else: - self.transform = transform - - def setup(self, stage=None): - # Assign train/val datasets for use in dataloaders - if stage == "fit" or stage is None: - mnist_full = MNIST( - self.data_dir, train=True, - transform=self.transform, - download=False - ) - n_train_samples = int(self.train_prop * len(mnist_full)) - n_val_samples = len(mnist_full) - n_train_samples - self.mnist_train, self.mnist_val = random_split( - mnist_full, [n_train_samples, n_val_samples] - ) - - # Assign test dataset for use in dataloader(s) - if stage == "test" or stage is None: - self.mnist_test = MNIST( - self.data_dir, - train=False, - download=False, - transform=self.transform - ) - - if stage == "predict": - self.mnist_predict = MNIST( - self.data_dir, - train=False, - download=False, - transform=self.transform - ) - - def train_dataloader(self): - return DataLoader( - self.mnist_train, - batch_size=self.batch_size, - num_workers=4, - ) - - def val_dataloader(self): - return DataLoader( - self.mnist_val, - batch_size=self.batch_size, - num_workers=4, - ) - - def test_dataloader(self): - return DataLoader( - self.mnist_test, - batch_size=self.batch_size, - num_workers=4, - ) - - def predict_dataloader(self) -> EVAL_DATALOADERS: - return DataLoader( - self.mnist_predict, - batch_size=self.batch_size, - num_workers=4, - ) - - def preds_to_names( - self, - preds: Union[torch.Tensor, List[torch.Tensor]] - ) -> List[str]: - """Convert predictions to class names.""" - # Convert prediction to label: in this case is easy, as the label - # is an integer. - if not isinstance(preds, list): - preds = [preds] - - names = [] - for p in preds: - p += 1 - names.extend([str(el) for el in p.tolist()]) - return names - - -class LitMNIST(ItwinaiBasePlModule): - """ - Simple PL model for MNIST. - Adapted from - https://lightning.ai/docs/pytorch/stable/notebooks/lightning_examples/mnist-hello-world.html - """ - - def __init__( - self, - hidden_size: int = 64, - ): - super().__init__() - - # Automatically save constructor args as hyperparameters - self.save_hyperparameters() - - # Set our init args as class attributes - self.hidden_size = hidden_size - - # Hardcode some dataset specific attributes - self.num_classes = 10 - self.dims = (1, 28, 28) - channels, width, height = self.dims - self.transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)), - ] - ) - - # Define PyTorch model - self.model = nn.Sequential( - nn.Flatten(), - nn.Linear(channels * width * height, hidden_size), - nn.ReLU(), - nn.Dropout(0.1), - nn.Linear(hidden_size, hidden_size), - nn.ReLU(), - nn.Dropout(0.1), - nn.Linear(hidden_size, self.num_classes), - ) - - self.val_accuracy = Accuracy(task="multiclass", num_classes=10) - self.test_accuracy = Accuracy(task="multiclass", num_classes=10) - - def forward(self, x): - x = self.model(x) - return F.log_softmax(x, dim=1) - - def training_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - - # Log metrics with autolog - self.log( - "train_loss", - loss, - on_step=True, - on_epoch=True - ) - - # Good alternative - # # Log with generic logger - # self.logger.log_metrics( - # metrics=dict(train_loss=loss.item()), - # step=self.global_step - # ) - - return loss - - def validation_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - preds = torch.argmax(logits, dim=1) - self.val_accuracy.update(preds, y) - - self.log( - "val_loss", - loss, - prog_bar=True, - on_step=True, - on_epoch=True - ) - self.log( - "val_acc", - self.val_accuracy, - prog_bar=True, - on_step=True, - on_epoch=True - ) - - # good alternative - # # Log with generic logger - # self.logger.log_metrics( - # metrics=dict(val_loss=loss.item()), - # step=self.global_step - # ) - # self.logger.log_metrics( - # metrics=dict(val_acc=acc.item()), - # step=self.global_step - # ) - - def test_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - preds = torch.argmax(logits, dim=1) - self.test_accuracy.update(preds, y) - - self.log("test_loss", loss) - self.log("test_acc", self.test_accuracy) - - def predict_step(self, batch, batch_idx, dataloader_idx=0): - x, _ = batch - logits = self(x) - preds = torch.argmax(logits, dim=1) - return preds diff --git a/ai/src/itwinai/utils.py b/ai/src/itwinai/utils.py index 844dc001..b8736adf 100644 --- a/ai/src/itwinai/utils.py +++ b/ai/src/itwinai/utils.py @@ -69,14 +69,9 @@ def load_yaml_with_deps(path: str) -> DictConfig: yaml_conf = load_yaml(path) use_case_dir = os.path.dirname(path) deps = [] - if yaml_conf.get('conf-dependencies'): - for dependency in yaml_conf['conf-dependencies']: - deps.append(load_yaml( - os.path.join( - use_case_dir, - dependency - )) - ) + if yaml_conf.get("conf-dependencies"): + for dependency in yaml_conf["conf-dependencies"]: + deps.append(load_yaml(os.path.join(use_case_dir, dependency))) return OmegaConf.merge(yaml_conf, *deps) @@ -99,9 +94,7 @@ def dynamically_import_class(name: str): def flatten_dict( - d: MutableMapping, - parent_key: str = '', - sep: str = '.' + d: MutableMapping, parent_key: str = "", sep: str = "." ) -> MutableMapping: """Flatten dictionary diff --git a/ai/tests/test_cli.py b/ai/tests/test_cli.py index de668466..bad96692 100644 --- a/ai/tests/test_cli.py +++ b/ai/tests/test_cli.py @@ -9,19 +9,13 @@ def test_datasets_viz(): """ Test visualization of use case's dataset registry. """ - USE_CASE = 'use-cases/mnist/' - subprocess.run( - f'itwinai datasets --use-case {USE_CASE}'.split(), - check=True - ) + USE_CASE = "use-cases/mnist/" + subprocess.run(f"itwinai datasets --use-case {USE_CASE}".split(), check=True) def test_workflows_viz(): """ Test visualization of use case's workflows. """ - USE_CASE = './use-cases/mnist/' - subprocess.run( - f'itwinai workflows --use-case {USE_CASE}'.split(), - check=True - ) + USE_CASE = "./use-cases/mnist/" + subprocess.run(f"itwinai workflows --use-case {USE_CASE}".split(), check=True) diff --git a/ai/tests/test_utils.py b/ai/tests/test_utils.py index 3ce7681d..bbeb61fa 100644 --- a/ai/tests/test_utils.py +++ b/ai/tests/test_utils.py @@ -9,16 +9,10 @@ def test_flatten_dict(): """ Test flatten dict function. """ - dict1 = dict( - a=1, - b=dict( - b1=2, - b2=3 - ) - ) + dict1 = dict(a=1, b=dict(b1=2, b2=3)) flattened = flatten_dict(dict1) - assert flattened.get('a') == 1 - assert flattened.get('b.b1') == 2 - assert flattened.get('b.b2') == 3 + assert flattened.get("a") == 1 + assert flattened.get("b.b1") == 2 + assert flattened.get("b.b2") == 3 assert len(flattened) == 3 diff --git a/other/argparse/argparsee.py b/other/argparse/argparsee.py deleted file mode 100644 index f1e780a3..00000000 --- a/other/argparse/argparsee.py +++ /dev/null @@ -1,31 +0,0 @@ -from jsonargparse import ArgumentParser -import yaml - -from itwinai.plmodels.base import ItwinaiBasePlModule - -# Ref: -# https://jsonargparse.readthedocs.io/en/stable/#class-type-and-sub-classes - -parser = ArgumentParser() - -parser.add_argument('--car', type=str) -parser.add_argument('--number', type=int) -parser.add_subclass_arguments(ItwinaiBasePlModule, 'model') - - -# Parse (part of) YAML loaded in memory -with open('config.yml', "r", encoding="utf-8") as yaml_file: - try: - train_config = yaml.safe_load(yaml_file) - except yaml.YAMLError as exc: - print(exc) - raise exc -cfg = parser.parse_object(train_config) - -# # Parse whole YAML from file -# cfg = parser.parse_path('config.yml') - -print(cfg.model.as_dict()) - -cfg = parser.instantiate_classes(cfg) -print(cfg.model) diff --git a/other/argparse/config.yml b/other/argparse/config.yml deleted file mode 100644 index 468a59a4..00000000 --- a/other/argparse/config.yml +++ /dev/null @@ -1,10 +0,0 @@ -model: - class_path: itwinai.plmodels.mnist.LitMNIST - init_args: - data_dir: ../data/mnist/preproc-images-train - learning_rate: 0.001 - batch_size: 32 - hidden_size: 64 - -car: my car -number: 3 diff --git a/pyproject.toml b/pyproject.toml index f9efd3d5..d1c98dad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ [tool.pytest.ini_options] markers = [ "integration: integration tests (deselect with '-m \"not slow\"')", -] \ No newline at end of file +] diff --git a/run-workflow.py b/run-workflow.py index 5b6aa48b..251df816 100644 --- a/run-workflow.py +++ b/run-workflow.py @@ -59,30 +59,23 @@ def load_yaml_with_deps(path: str) -> DictConfig: yaml_conf = load_yaml(path) use_case_dir = os.path.dirname(path) deps = [] - if yaml_conf.get('conf-dependencies'): - for dependency in yaml_conf['conf-dependencies']: - deps.append(load_yaml( - os.path.join( - use_case_dir, - dependency - )) - ) + if yaml_conf.get("conf-dependencies"): + for dependency in yaml_conf["conf-dependencies"]: + deps.append(load_yaml(os.path.join(use_case_dir, dependency))) return OmegaConf.merge(yaml_conf, *deps) if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Run a simple DT workflow.') + parser = argparse.ArgumentParser(description="Run a simple DT workflow.") parser.add_argument( - '-f', '--workflow-file', + "-f", + "--workflow-file", type=str, - help='Path to file describing DT a workflow.', - required=True - ) - parser.add_argument( - '--cwl', - action=argparse.BooleanOptionalAction + help="Path to file describing DT a workflow.", + required=True, ) + parser.add_argument("--cwl", action=argparse.BooleanOptionalAction) args = parser.parse_args() # 1. Parse workflow file @@ -92,55 +85,63 @@ def load_yaml_with_deps(path: str) -> DictConfig: # 2. Deploy steps (i.e., create micromamba envs), if not already there - for step in workflow.get('steps'): + for step in workflow.get("steps"): step_name, step_data = next(iter(step.items())) - if not os.path.exists(step_data['env']['prefix']): + if not os.path.exists(step_data["env"]["prefix"]): print(f'Deploying "{step_name}" step...') # Install python env from micromamba env definition file subprocess.run( - (f"micromamba env create -p {step_data['env']['prefix']} " - f"--file {step_data['env']['file']} -y").split(), - check=True + ( + f"micromamba env create -p {step_data['env']['prefix']} " + f"--file {step_data['env']['file']} -y" + ).split(), + check=True, ) # Install local python project from source, if present - if step_data['env'].get('source') is not None: + if step_data["env"].get("source") is not None: subprocess.run( - (f"micromamba run -p {step_data['env']['prefix']} " - "python -m pip install " # --no-deps - f"-e {step_data['env']['source']}").split(), - check=True + ( + f"micromamba run -p {step_data['env']['prefix']} " + "python -m pip install " # --no-deps + f"-e {step_data['env']['source']}" + ).split(), + check=True, ) # 3. Run the by invoking the CWL execution tool' - # invoke workflow with CWL + # invoke workflow with CWL if args.cwl: - print('Invoked workflow with CWL.') + print("Invoked workflow with CWL.") # raise NotImplementedError('CWL workflow definition need to be updated') print( - (f"cwltool --leave-tmpdir " - f"--outdir={workflow['root'] + '/data'} " - f"{workflow.get('workflowFileCWL')} " - f"{args.workflow_file}") + ( + f"cwltool --leave-tmpdir " + f"--outdir={workflow['root'] + '/data'} " + f"{workflow.get('workflowFileCWL')} " + f"{args.workflow_file}" + ) ) subprocess.run( - (f"cwltool --leave-tmpdir " - f"--outdir={workflow['root'] + '/data'} " - f"{workflow.get('workflowFileCWL')} " - f"{args.workflow_file}"), + ( + f"cwltool --leave-tmpdir " + f"--outdir={workflow['root'] + '/data'} " + f"{workflow.get('workflowFileCWL')} " + f"{args.workflow_file}" + ), shell=True, check=True, ) # invoke workflow step-by-step with 'micromamba run ...' else: - for step in workflow.get('steps'): + for step in workflow.get("steps"): step_name, step_data = next(iter(step.items())) # Args - args_str = '' - if step_data['args'] is not None: - args_str = " ".join([ - f"--{k} {v}" for k, v in step_data['args'].items() - ]) + args_str = "" + if step_data["args"] is not None: + args_str = " ".join( + [f"--{k} {v}" for k, v in step_data["args"].items()] + ) print(f'Running "{step_name}" step...') print( @@ -148,7 +149,9 @@ def load_yaml_with_deps(path: str) -> DictConfig: f"{step_data['command']} {args_str} \n\n" ) subprocess.run( - (f"micromamba run -p {step_data['env']['prefix']} " - f"{step_data['command']} {args_str} ").split(), + ( + f"micromamba run -p {step_data['env']['prefix']} " + f"{step_data['command']} {args_str} " + ).split(), check=True, ) diff --git a/tests/use-cases/test_mnist.py b/tests/use-cases/test_mnist.py index 75661ec0..d31aa21a 100644 --- a/tests/use-cases/test_mnist.py +++ b/tests/use-cases/test_mnist.py @@ -26,7 +26,7 @@ def test_training_workflow(): subprocess.run(cmd.split(), check=True) # CWL - subprocess.run(cmd.split() + ['--cwl'], check=True) + subprocess.run(cmd.split() + ["--cwl"], check=True) @pytest.mark.integration @@ -41,4 +41,4 @@ def test_inference_workflow(): subprocess.run(cmd.split(), check=True) # CWL - subprocess.run(cmd.split() + ['--cwl'], check=True) + subprocess.run(cmd.split() + ["--cwl"], check=True) diff --git a/use-cases/cyclones/.gitignore b/use-cases/cyclones/.gitignore new file mode 100644 index 00000000..5a0ea349 --- /dev/null +++ b/use-cases/cyclones/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +.vscode +data +experiments +.DS_Store \ No newline at end of file diff --git a/use-cases/cyclones/dataloader.py b/use-cases/cyclones/dataloader.py new file mode 100644 index 00000000..645fe8b1 --- /dev/null +++ b/use-cases/cyclones/dataloader.py @@ -0,0 +1,218 @@ +import gdown +import numpy as np +import logging + +from os import listdir, makedirs +from os.path import join, exists +from itwinai.backend.components import DataGetter, DataPreproc +from typing import List, Optional + +from lib.macros import ( + PatchType, + Network, + Losses, + RegularizationStrength, + Activation, + LabelNoCyclone, + AugmentationType, +) +from lib.tfrecords.functions import read_tfrecord_as_tensor +from lib.scaling import save_tf_minmax +from lib.tfrecords.dataset import eFlowsTFRecordDataset +from lib.transform import ( + coo_left_right, + coo_up_down, + coo_rot180, + msk_left_right, + msk_up_down, + msk_rot180, +) + + +class TensorflowDataGetter(DataGetter): + def __init__( + self, + patch_type: PatchType, + shuffle: bool, + split_ratio: List[float], + batch_size: int, + augment: bool, + epochs: int, + target_scale: bool, + label_no_cyclone: LabelNoCyclone, + aug_type: AugmentationType, + experiment: dict, + shuffle_buffer: int = None, + ): + self.batch_size = batch_size + self.split_ratio = split_ratio + self.epochs = epochs + self.target_scale = target_scale + self.label_no_cyclone = label_no_cyclone.value + self.shuffle_buffer = shuffle_buffer + self.aug_type = aug_type.value + self.patch_type = patch_type.value + self.augment = augment + self.shuffle = shuffle + self.drv_vars, self.coo_vars = ( + experiment["DRV_VARS_1"], + experiment["COO_VARS_1"], + ) + self.msk_var = ( + None if experiment["MSK_VAR_1"] == "None" else experiment["MSK_VAR_!"] + ) + self.channels = [len(self.drv_vars), len(self.coo_vars)] + + # Shuffle + if shuffle: + np.random.shuffle(self.cyclone_files) + np.random.shuffle(self.adj_files) + np.random.shuffle(self.random_files) + + # Patches types + if self.augment: + if self.msk_var: + self.aug_fns = { + "left_right": msk_left_right, + "up_down": msk_up_down, + "rot180": msk_rot180, + } + else: + self.aug_fns = { + "left_right": coo_left_right, + "up_down": coo_up_down, + "rot180": coo_rot180, + } + else: + self.aug_fns = {} + + def split_files(self, files, ratio): + n = len(files) + return ( + files[0 : int(ratio[0] * n)], + files[int(ratio[0] * n) : int((ratio[0] + ratio[1]) * n)], + ) + + def load(self): + # divide into train, valid and test dataset files + train_c_fs, valid_c_fs = self.split_files( + files=self.cyclone_files, ratio=self.split_ratio + ) + train_a_fs, valid_a_fs = self.split_files( + files=self.adj_files, ratio=self.split_ratio + ) + train_r_fs, valid_r_fs = self.split_files( + files=self.random_files, ratio=self.split_ratio + ) + + # merge all the files together + train_files = train_c_fs + train_a_fs + train_r_fs + valid_files = valid_c_fs + valid_a_fs + valid_r_fs + + # compute scaler on training data + Xt, _ = read_tfrecord_as_tensor( + filenames=train_files, + shape=self.shape, + drv_vars=self.drv_vars, + coo_vars=self.coo_vars, + msk_var=self.msk_var, + ) + X_scaler = save_tf_minmax(Xt.numpy(), outfile=self.scaler_file) + scalers = [X_scaler, None] + Xt = None + + # instantiate training, validation and test sets + # Contains: (dataset, n_count) + train_dataset = eFlowsTFRecordDataset( + cyc_fnames=train_c_fs, + adj_fnames=train_a_fs, + rnd_fnames=train_r_fs, + batch_size=self.batch_size, + epochs=self.epochs, + scalers=scalers, + target_scale=self.target_scale, + shape=self.shape, + label_no_cyclone=self.label_no_cyclone, + drv_vars=self.drv_vars, + coo_vars=self.coo_vars, + msk_var=self.msk_var, + shuffle_buffer=self.shuffle_buffer, + aug_fns=self.aug_fns, + patch_type=self.patch_type, + aug_type=self.aug_type, + ) + valid_dataset = eFlowsTFRecordDataset( + cyc_fnames=valid_c_fs, + adj_fnames=valid_a_fs, + rnd_fnames=valid_r_fs, + batch_size=self.batch_size, + epochs=self.epochs, + scalers=scalers, + target_scale=self.target_scale, + shape=self.shape, + label_no_cyclone=self.label_no_cyclone, + drv_vars=self.drv_vars, + coo_vars=self.coo_vars, + msk_var=self.msk_var, + shuffle_buffer=self.shuffle_buffer, + aug_fns=self.aug_fns, + patch_type=self.patch_type, + aug_type=self.aug_type, + ) + return train_dataset, valid_dataset + + def execute(self, args): + train, test = self.load() + logging.debug(f"Train, valid and test datasets loaded.") + return [train, test] + + def setup(self, args): + self.shape = args["shape"] + root_dir = args["root_dir"] + + # Download data + url = "https://drive.google.com/drive/folders/15DEq33MmtRvIpe2bNCg44lnfvEiHcPaf" + gdown.download_folder(url=url, quiet=False, output=join(root_dir, "data")) + + # Scalar fields + self.root_dir = root_dir + self.dataset_dir = join(root_dir, "data", "tfrecords", "trainval/") + self.scaler_file = join(args["scaler_dir"], "minmax.tfrecord") + + # get records filenames + self.cyclone_files = sorted( + [ + join(self.dataset_dir, f) + for f in listdir(self.dataset_dir) + if f.endswith(".tfrecord") and f.startswith(PatchType.CYCLONE.value) + ] + ) + if self.patch_type == PatchType.NEAREST.value: + self.adj_files = sorted( + [ + join(self.dataset_dir, f) + for f in listdir(self.dataset_dir) + if f.endswith(".tfrecord") and f.startswith(PatchType.NEAREST.value) + ] + ) + elif self.patch_type == PatchType.ALLADJACENT.value: + self.adj_files = sorted( + [ + join(self.dataset_dir, f) + for f in listdir(self.dataset_dir) + if f.endswith(".tfrecord") + and f.startswith(PatchType.ALLADJACENT.value) + ] + ) + self.random_files = sorted( + [ + join(self.dataset_dir, f) + for f in listdir(self.dataset_dir) + if f.endswith(".tfrecord") and f.startswith(PatchType.RANDOM.value) + ] + ) + + args["epochs"] = self.epochs + args["batch_size"] = self.batch_size + args["channels"] = self.channels + return args diff --git a/use-cases/cyclones/env-files/tensorflow-env.yml b/use-cases/cyclones/env-files/tensorflow-env.yml new file mode 100644 index 00000000..920a1750 --- /dev/null +++ b/use-cases/cyclones/env-files/tensorflow-env.yml @@ -0,0 +1,13 @@ +name: tensorflow-env +channels: + - tensorflow + - conda-forge +dependencies: + - tensorflow + - python=3.9.12 + - pip: + - tfx + - scipy==1.9.3 + - scikit-learn + - gdown + - git+https://github.com/User3574/C_T6.git#egg=itwinai&subdirectory=ai \ No newline at end of file diff --git a/use-cases/cyclones/executor.py b/use-cases/cyclones/executor.py new file mode 100644 index 00000000..9a61f3fa --- /dev/null +++ b/use-cases/cyclones/executor.py @@ -0,0 +1,64 @@ +import logging + +from os.path import join, exists +from os import listdir, makedirs +from itwinai.backend.components import Executor +from datetime import datetime +from lib.macros import PATCH_SIZE as patch_size, SHAPE as shape + + +class CycloneExecutor(Executor): + def __init__(self, run_name: str): + self.run_name = run_name + + def execute(self, pipeline): + args = None + for executable in pipeline: + args = executable.execute(args) + + def setup(self, args): + pipeline, root_dir = args[0], args[1] + print(pipeline, root_dir) + + # Paths, Folders + FORMATTED_DATETIME = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + MODEL_BACKUP_DIR = join(root_dir, "models/") + EXPERIMENTS_DIR = join(root_dir, "experiments") + RUN_DIR = join(EXPERIMENTS_DIR, self.run_name + "_" + FORMATTED_DATETIME) + SCALER_DIR = join(RUN_DIR, "scalers") + TENSORBOARD_DIR = join(RUN_DIR, "tensorboard") + CHECKPOINTS_DIR = join(RUN_DIR, "checkpoints") + + # Files + LOG_FILE = join(RUN_DIR, "run.log") + + # Create folders + makedirs(EXPERIMENTS_DIR, exist_ok=True) + makedirs(RUN_DIR, exist_ok=True) + makedirs(SCALER_DIR, exist_ok=True) + makedirs(TENSORBOARD_DIR, exist_ok=True) + makedirs(CHECKPOINTS_DIR, exist_ok=True) + + self.args = { + "root_dir": root_dir, + "experiment_dir": EXPERIMENTS_DIR, + "run_dir": RUN_DIR, + "scaler_dir": SCALER_DIR, + "tensorboard_dir": TENSORBOARD_DIR, + "checkpoints_dir": CHECKPOINTS_DIR, + "backup_dir": MODEL_BACKUP_DIR, + "log_file": LOG_FILE, + "shape": shape, + "patch_size": patch_size, + } + + # initialize logger + logging.basicConfig( + format="[%(asctime)s] %(levelname)s : %(message)s", + level=logging.DEBUG, + filename=LOG_FILE, + datefmt="%Y-%m-%d %H:%M:%S", + ) + + for executable in pipeline: + self.args = executable.setup(self.args) diff --git a/use-cases/cyclones/pipeline.yaml b/use-cases/cyclones/pipeline.yaml new file mode 100644 index 00000000..521f87b4 --- /dev/null +++ b/use-cases/cyclones/pipeline.yaml @@ -0,0 +1,31 @@ +executor: + class_path: executor.CycloneExecutor + init_args: + run_name: 'default' + +getter: + class_path: dataloader.TensorflowDataGetter + init_args: + patch_type: NEAREST + shuffle: False + split_ratio: [0.75, 0.25] + batch_size: 16 + augment: True + epochs: 1 + target_scale: False + label_no_cyclone: NONE + aug_type: ONLY_TCS + experiment: { + 'DRV_VARS_1': ['fg10', 'msl', 't_500', 't_300'], + 'COO_VARS_1': ['patch_cyclone'], + 'MSK_VAR_1': None + } + +trainer: + class_path: trainer.TensorflowTrainer + init_args: + network: VGG_V1 + activation: LINEAR + regularization_strength: NONE + learning_rate: 0.0001 + loss: MAE diff --git a/use-cases/cyclones/train.py b/use-cases/cyclones/train.py new file mode 100644 index 00000000..8ee26dab --- /dev/null +++ b/use-cases/cyclones/train.py @@ -0,0 +1,33 @@ +import argparse + +from jsonargparse import ArgumentParser +from trainer import TensorflowTrainer +from dataloader import TensorflowDataGetter +from executor import CycloneExecutor +from itwinai.backend.utils import parse_pipe_config + +if __name__ == "__main__": + # Create CLI Parser + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--pipeline", type=str) + parser.add_argument("-r", "--root_dir", type=str) + args = parser.parse_args() + + print(args.root_dir) + + # Create parser for the pipeline (ordered) + pipe_parser = ArgumentParser() + pipe_parser.add_subclass_arguments(CycloneExecutor, "executor") + pipe_parser.add_subclass_arguments(TensorflowDataGetter, "getter") + pipe_parser.add_subclass_arguments(TensorflowTrainer, "trainer") + + # Parse, Instantiate pipe + parsed = parse_pipe_config(args.pipeline, pipe_parser) + steps = pipe_parser.instantiate_classes(parsed) + # Extract Executor, Steps in Pipe + executor = steps.executor + pipe = [getattr(steps, arg) for arg in list(vars(steps))[1:]] + + # Run pipe + executor.setup([pipe, args.root_dir]) + executor.execute(pipe) diff --git a/use-cases/cyclones/trainer.py b/use-cases/cyclones/trainer.py new file mode 100644 index 00000000..7b4ce200 --- /dev/null +++ b/use-cases/cyclones/trainer.py @@ -0,0 +1,151 @@ +import tensorflow.keras as keras +import logging +import pandas as pd + +from os import listdir, makedirs +from datetime import datetime +from os.path import join, exists +from lib.strategy import get_mirrored_strategy +from lib.utils import Timer, saveparams, get_network_config, load_model +from itwinai.backend.components import Trainer +from lib.callbacks import ProcessBenchmark +from lib.macros import ( + PatchType, + Network, + Losses, + RegularizationStrength, + Activation, + LabelNoCyclone, + AugmentationType, +) + + +class TensorflowTrainer(Trainer): + def __init__( + self, + network: Network, + activation: Activation, + regularization_strength: RegularizationStrength, + learning_rate: float, + loss: Losses, + kernel_size: int = None, + model_backup: str = None, + cores: int = None, + ): + # Configurable + self.cores = cores + self.model_backup = model_backup + self.network = network.value + self.activation = activation.value + self.kernel_size = kernel_size + self.regularization_strength, self.regularizer = regularization_strength.value + self.loss_name, self.loss = loss.value + + # Optimizers, Losses + self.optimizer = keras.optimizers.Adam(learning_rate=learning_rate) + + def train(self, data): + (train_dataset, n_train), (valid_dataset, n_valid) = data[0], data[1] + + # set mirrored strategy + mirrored_strategy, n_devices = get_mirrored_strategy(cores=self.cores) + logging.debug(f"Mirrored strategy created with {n_devices} devices") + + # distribute datasets among MirroredStrategy's replicas + dist_train_dataset = mirrored_strategy.experimental_distribute_dataset( + train_dataset + ) + dist_valid_dataset = mirrored_strategy.experimental_distribute_dataset( + valid_dataset + ) + + # Inside the strategy load the model, data generators and train + with mirrored_strategy.scope(): + if not self.model_backup: + model = get_network_config( + network=self.network, + patch_size=self.patch_size, + activation=self.activation, + regularizer=self.regularizer, + kernel_size=self.kernel_size, + channels=self.channels, + ) + logging.debug(f"New model created") + else: + model = load_model(model_fpath=self.best_model_name) + logging.debug(f"Model loaded from backup at {self.best_model_name}") + + metrics = [keras.metrics.MeanAbsoluteError(name="mae")] + model.compile(loss=self.loss, optimizer=self.optimizer, metrics=metrics) + logging.debug(f"Model compiled") + + # print model summary to check if model's architecture is correct + print(model.summary()) + + # compute the steps per epoch for train and valid + steps_per_epoch = n_train // self.batch_size + validation_steps = n_valid // self.batch_size + + # train the model + model.fit( + dist_train_dataset, + validation_data=dist_valid_dataset, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps, + epochs=self.epochs, + callbacks=self.callbacks, + ) + logging.debug(f"Model trained") + + # save the best model + model.save(self.last_model_name) + logging.debug(f"Saved training history") + + def execute(self, data): + self.train(data) + + def setup(self, args): + self.experiment_dir = args["experiment_dir"] + self.run_dir = args["run_dir"] + self.epochs = args["epochs"] + self.batch_size = args["batch_size"] + self.patch_size = args["patch_size"] + self.channels = args["channels"] + + # Paths + CHECKPOINTS_DIR = join(self.run_dir, "checkpoints") + + # files and csvs definition + CHECKPOINTS_FILEPATH = join(CHECKPOINTS_DIR, "model_{epoch:02d}.h5") + LOSS_METRICS_HISTORY_CSV = join(self.run_dir, "loss_metrics_history.csv") + BENCHMARK_HISTORY_CSV = join(self.run_dir, "benchmark_history.csv") + + self.callbacks = [ + keras.callbacks.EarlyStopping( + monitor="val_loss", + patience=100, + min_delta=0.0001, + restore_best_weights=True, + verbose=1, + mode="min", + ), + keras.callbacks.CSVLogger(LOSS_METRICS_HISTORY_CSV), + ProcessBenchmark(BENCHMARK_HISTORY_CSV), + keras.callbacks.ModelCheckpoint( + filepath=CHECKPOINTS_FILEPATH, + save_best_only=True, + monitor="val_loss", + mode="min", + save_weights_only=False, + verbose=1, + ), + ] + + # Check if model backup exists + if self.model_backup is not None and not exists(self.model_backup): + raise FileNotFoundError("Model backup file not found") + if self.model_backup: + self.best_model_name = join(self.model_backup, "best_model.h5") + self.last_model = join(self.run_dir, "last_model.h5") + + return args diff --git a/use-cases/cyclones/workflows/meta.yml b/use-cases/cyclones/workflows/meta.yml new file mode 100644 index 00000000..e6f54a3d --- /dev/null +++ b/use-cases/cyclones/workflows/meta.yml @@ -0,0 +1,4 @@ +# Configuration file for use case metadata +# Load with OmegaConf + +root: ./use-cases/cyclones \ No newline at end of file diff --git a/use-cases/cyclones/workflows/workflow-train.yml b/use-cases/cyclones/workflows/workflow-train.yml new file mode 100644 index 00000000..f27c6892 --- /dev/null +++ b/use-cases/cyclones/workflows/workflow-train.yml @@ -0,0 +1,16 @@ +# Workflow definition in classical style +# Load with OmegaConf +# Other configuration files to merge with this file via OmegaConf +conf-dependencies: + - meta.yml + +steps: + - Training: + doc: Run Tensorflow Cyclone training + command: python ${root}/train.py + env: + file: ${root}/env-files/tensorflow-env.yml + prefix: ${root}/.venv-tensorflow + args: + pipeline: ${root}/pipeline.yaml + root_dir: ${root} \ No newline at end of file diff --git a/use-cases/mnist/README.md b/use-cases/mnist/README.md deleted file mode 100644 index e0d97b11..00000000 --- a/use-cases/mnist/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# MNIST: toy example for DT workflows - -Read the latest version of the docs [here](https://intertwin-eu.github.io/T6.5-AI-and-ML/docs/use-cases/mnist.html). diff --git a/use-cases/mnist/env-files/preproc-env.yml b/use-cases/mnist/env-files/preproc-env.yml deleted file mode 100644 index 79decc54..00000000 --- a/use-cases/mnist/env-files/preproc-env.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: preproc-env -channels: - - pytorch - - conda-forge -dependencies: - - python=3.9.12 - - torchvision - - pytorch::cpuonly diff --git a/use-cases/mnist/inference-workflow.yml b/use-cases/mnist/inference-workflow.yml deleted file mode 100644 index 24382ee0..00000000 --- a/use-cases/mnist/inference-workflow.yml +++ /dev/null @@ -1,49 +0,0 @@ -# Load with OmegaConf - -# Other configuration files to merge with this file via OmegaConf -conf-dependencies: - - meta.yml - -steps: - - preprocessing: - doc: Download and split MNIST dataset into train and test sets - command: python ${root}/mnist-preproc.py - env: - file: ${root}/env-files/preproc-env.yml - prefix: ${root}/.venv-preproc - args: - output: ${datasets.preproc-images.location} - stage: test - - run-mlflow-server: - doc: Run MLFlow server on localhost - command: python ${root}/mlflow-server.py - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - path: ${datasets.mlflow-backend-store-uri.location} - port: ${mlflow.port} - - ml-inference: - doc: Apply a pre-trained neural network on unseen data and store them - command: itwinai predict - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - config: ${root}/mnist-ai-inference.yml - input-dataset: ${datasets.preproc-images.location} - predictions-location: ${datasets.ml-predictions.location} - ml-logs: ${datasets.ml-logs.location} - - stop-mlflow-server: - doc: Stop MLFlow server on localhost, if running - command: python ${root}/mlflow-server.py - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - mode: kill - port: ${mlflow.port} - diff --git a/use-cases/mnist/meta.yml b/use-cases/mnist/meta.yml deleted file mode 100644 index 1707cf92..00000000 --- a/use-cases/mnist/meta.yml +++ /dev/null @@ -1,26 +0,0 @@ -# Configuration file for use case metadata -# Load with OmegaConf - -# Use case root location. End without path '/' char! -root: ./use-cases/mnist - -# AI folder location. End without path '/' char! -ai-root: ./ai - -# Datasets registry -datasets: - preproc-images: - doc: Preprocessed MNIST images - location: ${root}/data/preproc-images - ml-logs: - doc: MLflow tracking URI for local logging - location: http://127.0.0.1:${mlflow.port} - mlflow-backend-store-uri: - doc: MLFlow server storage location - location: ${root}/data/ml-logs - ml-predictions: - doc: predictions on unseen data - location: ${root}/data/ml-predictions - -mlflow: - port: 5000 \ No newline at end of file diff --git a/use-cases/mnist/mlflow-server.py b/use-cases/mnist/mlflow-server.py deleted file mode 100644 index 0421efa1..00000000 --- a/use-cases/mnist/mlflow-server.py +++ /dev/null @@ -1,47 +0,0 @@ -import argparse -import sys -import subprocess - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Manage MLFLow server." - ) - parser.add_argument( - "-p", "--port", - type=int, - help="MLFlow server port.", - default=5000 - ) - parser.add_argument( - "-m", "--mode", - help="Start or kill MlFlow server.", - type=str, - default='run', - choices=('run', 'kill') - ) - parser.add_argument( - "--path", - type=str, - help="MLFlow server storage path (backend-store-uri).", - default=None - ) - args = parser.parse_args() - - if args.mode == 'kill': - # Kill server - print(f"Killing MLFlow server on localhost port {args.port}") - subprocess.run( - f"kill -9 $(lsof -t -i:{args.port})", - shell=True, - check=True, - stderr=subprocess.DEVNULL - ) - sys.exit() - - # Start server - print("Starting MLFlow server") - subprocess.Popen( - ('mlflow server --backend-store-uri ' - f'file:{args.path}').split(), - stderr=subprocess.DEVNULL - ) diff --git a/use-cases/mnist/mnist-ai-inference.yml b/use-cases/mnist/mnist-ai-inference.yml deleted file mode 100644 index da2fae31..00000000 --- a/use-cases/mnist/mnist-ai-inference.yml +++ /dev/null @@ -1,15 +0,0 @@ -# Inference configuration -inference: - type: lightning - experiment_name: MNIST classification lite - # Run ID in MLFlow server: pre-trained model - run_id: unk - ckpt_path: model/checkpoints/best-checkpoint/best-checkpoint.ckpt - train_config_artifact_path: pl-training.yml - conf: - # Lightning data module configuration - data: - class_path: itwinai.plmodels.mnist.MNISTDataModule - init_args: - data_dir: ${cli.input_dataset} - batch_size: 32 \ No newline at end of file diff --git a/use-cases/mnist/mnist-ai-train.yml b/use-cases/mnist/mnist-ai-train.yml deleted file mode 100644 index a2317fcc..00000000 --- a/use-cases/mnist/mnist-ai-train.yml +++ /dev/null @@ -1,119 +0,0 @@ -# Configuration file of AI workflows for MNIST use case -# Load with OmegaConf - -# Pytorch lightning config for training -train: - type: lightning - # Follows lightning config file format: - # https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#multiple-models-and-or-datasets - conf: - seed_everything: 4231162351 - - # Lightning Trainer configuration - trainer: - accelerator: auto - strategy: auto - devices: auto - num_nodes: 1 - precision: 32-true - - # MLFlow logger (initial) configuration. - # Do not modify this field - logger: - class_path: lightning.pytorch.loggers.MLFlowLogger - init_args: - experiment_name: ${logger.experiment_name} - run_name: null - tracking_uri: null - tags: null - save_dir: ./mlruns - log_model: false - prefix: '' - artifact_location: null - run_id: null - - # Callbacks - callbacks: - - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping - init_args: - monitor: val_loss - patience: 2 - - class_path: lightning.pytorch.callbacks.lr_monitor.LearningRateMonitor - init_args: - logging_interval: step - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: checkpoints - filename: best-checkpoint - save_top_k: 1 - verbose: true - monitor: val_loss - mode: min - - fast_dev_run: false - max_epochs: 1 - min_epochs: null - max_steps: -1 - min_steps: null - max_time: null - limit_train_batches: null - limit_val_batches: null - limit_test_batches: null - limit_predict_batches: null - overfit_batches: 0.0 - val_check_interval: null - check_val_every_n_epoch: 1 - num_sanity_val_steps: null - log_every_n_steps: null - enable_checkpointing: null - enable_progress_bar: null - enable_model_summary: null - accumulate_grad_batches: 1 - gradient_clip_val: null - gradient_clip_algorithm: null - deterministic: null - benchmark: null - inference_mode: true - use_distributed_sampler: true - profiler: null - detect_anomaly: false - barebones: false - plugins: null - sync_batchnorm: false - reload_dataloaders_every_n_epochs: 0 - default_root_dir: null - - # Lightning Model configuration - model: - class_path: itwinai.plmodels.mnist.LitMNIST - init_args: - hidden_size: 64 - - # Lightning data module configuration - data: - class_path: itwinai.plmodels.mnist.MNISTDataModule - init_args: - data_dir: ${cli.train_dataset} - batch_size: 32 - - # Torch Optimizer configuration - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.001 - - # Torch LR scheduler configuration - lr_scheduler: - class_path: torch.optim.lr_scheduler.ExponentialLR - init_args: - gamma: 0.1 - -# Mlflow -logger: - experiment_name: MNIST classification lite - description: A MLP classifier for MNIST dataset. - log_every_n_epoch: 1 - log_every_n_steps: 1 - # Name used in Models Registry. If given, it is automatically - # registered in the Models Registry. - registered_model_name: MNIST-clf-lite diff --git a/use-cases/mnist/mnist-preproc.cwl b/use-cases/mnist/mnist-preproc.cwl deleted file mode 100644 index 11f7af3a..00000000 --- a/use-cases/mnist/mnist-preproc.cwl +++ /dev/null @@ -1,69 +0,0 @@ -cwlVersion: v1.2 # Specifies the version of the Common Workflow Language (CWL) being used -class: CommandLineTool - -baseCommand: [conda, run] -# The command to be executed by the tool. It runs the 'conda' command with 'run' subcommand, -# then sets the path to the virtual environment to be used, and finally runs the 'mnist-preproc.py' -# script using the 'python' interpreter. - -requirements: - EnvVarRequirement: - envDef: - FILE_READ_BUFFER_SIZE: "10" -# The following requirement sets the environment variable 'FILE_READ_BUFFER_SIZE' to a value of 10. -# This requirement defines an environment variable requirement, specifying the value of the environment -# variable to be set. - -stdout: ./logs/mnist-preproc-stdout.txt -# Specifies that the standard output of the command will be redirected to the file 'mnist-preproc-stdout.txt' -# located in the 'logs' directory. - -inputs: - preprocessEnvironment: - type: Directory - inputBinding: - position: 1 - prefix: -p - # Specifies that the 'preprocessEnvironment' input is a directory. - # The 'inputBinding' section provides additional information on how this input should be passed - # to the command line tool. 'position' specifies the position of the argument in the command line, - # and 'prefix' specifies the prefix to be used for the argument. - - preprocessScript: - type: File - inputBinding: - position: 2 - prefix: python - # Specifies that the 'preprocessScript' input is a file. - # 'position' and 'prefix' are used to pass this input to the command line tool. - - rawDatasetPath: - type: Directory? - inputBinding: - position: 3 - prefix: --input - # Specifies that the 'rawDatasetPath' input is an optional directory. - # 'position' and 'prefix' are used to pass this input to the command line tool. - - preprocessOutput: - type: string? - inputBinding: - position: 4 - prefix: --output - # Specifies that the 'preprocessOutput' input is an optional string. - # 'position' and 'prefix' are used to pass this input to the command line tool. - -outputs: - preprocessingStdout: - type: stdout - # Specifies that the 'preprocessingStdout' output is the standard output of the command. - - preprocessedDatasetPath: - type: Directory - outputBinding: - glob: "use-cases/mnist/data/preproc-images" - # Specifies that the 'preprocessedDatasetPath' output is a directory. - # 'glob' specifies the glob pattern to find the output directory. - - - diff --git a/use-cases/mnist/mnist-preproc.py b/use-cases/mnist/mnist-preproc.py deleted file mode 100644 index ead73ef5..00000000 --- a/use-cases/mnist/mnist-preproc.py +++ /dev/null @@ -1,42 +0,0 @@ -import argparse -from torchvision.datasets import MNIST - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Preprocessing of MNIST dataset.") - parser.add_argument( - "-o", "--output", - type=str, - help="Path where to store preprocessed datasets.", - default=None - ) - # Syntactic sugar - parser.add_argument( - "-s", "--stage", - type=str, - help="Kind of dataset split to use.", - default='train', - choices=('train', 'test') - ) - args = parser.parse_args() - - if args.stage == 'train': - # Download and store training dataset - MNIST(args.output, train=True, download=True) - if args.stage == 'test': - # Download and store test dataset - MNIST(args.output, train=False, download=True) - - print( - """ - ****************************** - * Called MNIST preprocessing * - ****************************** - - - Download dataset - - Split the dataset in training and inference - - Preprocess it - - Store it to local filesystem - - """ - ) diff --git a/use-cases/mnist/tensorflow/dataloader.py b/use-cases/mnist/tensorflow/dataloader.py new file mode 100644 index 00000000..74096cc5 --- /dev/null +++ b/use-cases/mnist/tensorflow/dataloader.py @@ -0,0 +1,37 @@ +import tensorflow.keras as keras + +from itwinai.backend.components import DataGetter, DataPreproc + + +class TensorflowDataGetter(DataGetter): + def __init__(self): + pass + + def load(self): + return keras.datasets.mnist.load_data() + + def execute(self, args): + train, test = self.load() + return [train, test] + + def setup(self, args): + pass + + +class TensorflowDataPreproc(DataPreproc): + def __init__(self, classes: int): + self.classes = classes + + def preproc(self, datasets): + preprocessed = [] + for dataset in datasets: + x, y = dataset + y = keras.utils.to_categorical(y, self.classes) + preprocessed.append((x, y)) + return preprocessed + + def execute(self, datasets): + return self.preproc(datasets) + + def setup(self, args): + pass diff --git a/use-cases/mnist/tensorflow/env-files/tensorflow-env.yml b/use-cases/mnist/tensorflow/env-files/tensorflow-env.yml new file mode 100644 index 00000000..15fe252e --- /dev/null +++ b/use-cases/mnist/tensorflow/env-files/tensorflow-env.yml @@ -0,0 +1,13 @@ +name: tensorflow-env +channels: + - tensorflow + - conda-forge +dependencies: + - tensorflow + - python=3.9.12 + - pip: + - tfx + - scipy==1.9.3 + - git+https://github.com/User3574/C_T6.git#egg=itwinai&subdirectory=ai + - wandb + - mlflow \ No newline at end of file diff --git a/use-cases/mnist/tensorflow/pipeline.yaml b/use-cases/mnist/tensorflow/pipeline.yaml new file mode 100644 index 00000000..bc9f611b --- /dev/null +++ b/use-cases/mnist/tensorflow/pipeline.yaml @@ -0,0 +1,31 @@ +getter: + class_path: dataloader.TensorflowDataGetter + +preproc: + class_path: dataloader.TensorflowDataPreproc + init_args: + classes: 10 + +trainer: + class_path: trainer.TensorflowTrainer + init_args: + epochs: 10 + loss: { + class_name: "CategoricalCrossentropy", + config: { + from_logits: True + } + } + optimizer: { + class_name: "Adam", + config: { + learning_rate: 0.001 + } + } + model: + class_path: itwinai.models.tensorflow.mnist.MNIST_Model + init_args: + input_shape: [ 28, 28, 1 ] + output_shape: 10 + logger: + class_path: itwinai.backend.tensorflow.loggers.WanDBLogger diff --git a/use-cases/mnist/tensorflow/train.py b/use-cases/mnist/tensorflow/train.py new file mode 100644 index 00000000..018a0144 --- /dev/null +++ b/use-cases/mnist/tensorflow/train.py @@ -0,0 +1,31 @@ +import argparse + +from trainer import TensorflowTrainer +from dataloader import TensorflowDataGetter, TensorflowDataPreproc +from itwinai.backend.tensorflow.executor import TensorflowExecutor +from itwinai.backend.utils import parse_pipe_config +from jsonargparse import ArgumentParser + + +if __name__ == "__main__": + # Create CLI Parser + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--pipeline", type=str) + args = parser.parse_args() + + # Create parser for the pipeline (ordered) + pipe_parser = ArgumentParser() + pipe_parser.add_subclass_arguments(TensorflowDataGetter, "getter") + pipe_parser.add_subclass_arguments(TensorflowDataPreproc, "preproc") + pipe_parser.add_subclass_arguments(TensorflowTrainer, "trainer") + + # Parse, Instantiate pipe + parsed = parse_pipe_config(args.pipeline, pipe_parser) + pipe = pipe_parser.instantiate_classes(parsed) + # Make pipe as a list + pipe = [getattr(pipe, arg) for arg in vars(pipe)] + + # Execute pipe + executor = TensorflowExecutor(args={}) + executor.setup(pipe) + executor.execute(pipe) diff --git a/use-cases/mnist/tensorflow/trainer.py b/use-cases/mnist/tensorflow/trainer.py new file mode 100644 index 00000000..f6c63fcb --- /dev/null +++ b/use-cases/mnist/tensorflow/trainer.py @@ -0,0 +1,33 @@ +import tensorflow.keras as keras + +from itwinai.backend.components import Trainer +from itwinai.backend.components import Logger + + +class TensorflowTrainer(Trainer): + def __init__( + self, + epochs: int, + loss: dict, + optimizer: dict, + model: keras.Model, + logger: Logger, + ): + # Configurable + self.loss = keras.losses.get(loss) + self.optimizer = keras.optimizers.get(optimizer) + self.epochs = epochs + self.model = model + self.logger = logger + + def train(self, data): + x, y = data + self.model.compile(optimizer=self.optimizer, loss=self.loss) + self.logger.log() + self.model.fit(x, y, epochs=self.epochs) + + def execute(self, data): + self.train(data[0]) + + def setup(self, args): + pass diff --git a/use-cases/mnist/tensorflow/workflows/meta.yml b/use-cases/mnist/tensorflow/workflows/meta.yml new file mode 100644 index 00000000..ad332fdb --- /dev/null +++ b/use-cases/mnist/tensorflow/workflows/meta.yml @@ -0,0 +1,4 @@ +# Configuration file for use case metadata +# Load with OmegaConf + +root: ./use-cases/mnist/tensorflow \ No newline at end of file diff --git a/use-cases/mnist/tensorflow/workflows/workflow-train.yml b/use-cases/mnist/tensorflow/workflows/workflow-train.yml new file mode 100644 index 00000000..63f5e773 --- /dev/null +++ b/use-cases/mnist/tensorflow/workflows/workflow-train.yml @@ -0,0 +1,15 @@ +# Workflow definition in classical style +# Load with OmegaConf +# Other configuration files to merge with this file via OmegaConf +conf-dependencies: + - meta.yml + +steps: + - Training: + doc: Run Tensorflow training sub-pipe for training + command: python ${root}/train.py + env: + file: ${root}/env-files/tensorflow-env.yml + prefix: ${root}/.venv-tensorflow + args: + pipeline: ${root}/pipeline.yaml \ No newline at end of file diff --git a/use-cases/mnist/torch/dataloader.py b/use-cases/mnist/torch/dataloader.py new file mode 100644 index 00000000..1e0b7b9a --- /dev/null +++ b/use-cases/mnist/torch/dataloader.py @@ -0,0 +1,57 @@ +import lightning as L + +from torchvision.datasets import MNIST +from torch.utils.data import DataLoader, random_split +from torchvision import transforms + + +class MNISTDataModule(L.LightningModule): + def __init__( + self, + path: str, + batch_size: int, + train_prop: float, + ) -> None: + super().__init__() + self.path = path + self.batch_size = batch_size + self.train_prop = train_prop + self.transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + ] + ) + + def setup(self, stage=None): + if stage == "fit": + mnist_full = MNIST( + self.path, train=True, download=True, transform=self.transform + ) + n_train_samples = int(self.train_prop * len(mnist_full)) + n_val_samples = len(mnist_full) - n_train_samples + self.mnist_train, self.mnist_val = random_split( + mnist_full, [n_train_samples, n_val_samples] + ) + + if stage == "test": + self.mnist_test = MNIST( + self.path, train=False, download=True, transform=self.transform + ) + + if stage == "predict": + self.mnist_predict = MNIST( + self.path, train=False, download=True, transform=self.transform + ) + + def train_dataloader(self): + return DataLoader(self.mnist_train, batch_size=self.batch_size, num_workers=4) + + def val_dataloader(self): + return DataLoader(self.mnist_val, batch_size=self.batch_size, num_workers=4) + + def test_dataloader(self): + return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=4) + + def predict_dataloader(self): + return DataLoader(self.mnist_predict, batch_size=self.batch_size, num_workers=4) diff --git a/use-cases/mnist/torch/env-files/lightning-env.yml b/use-cases/mnist/torch/env-files/lightning-env.yml new file mode 100644 index 00000000..65383eb0 --- /dev/null +++ b/use-cases/mnist/torch/env-files/lightning-env.yml @@ -0,0 +1,16 @@ +name: lightning-env +channels: + - pytorch + - conda-forge +dependencies: + - python=3.9.12 + - torchvision + - pytorch::cpuonly + - pip: + - pyyaml + - lightning + - marshmallow-dataclass + - omegaconf + - wandb + - mlflow + - git+https://github.com/User3574/C_T6.git#egg=itwinai&subdirectory=ai diff --git a/use-cases/mnist/torch/pipeline.yaml b/use-cases/mnist/torch/pipeline.yaml new file mode 100644 index 00000000..24a1e1f6 --- /dev/null +++ b/use-cases/mnist/torch/pipeline.yaml @@ -0,0 +1,86 @@ +trainer: + class_path: trainer.TorchTrainer + init_args: + # Pytorch lightning config for training + train: + seed_everything: 4231162351 + trainer: + accelerator: auto + accumulate_grad_batches: 1 + barebones: false + benchmark: null + callbacks: + - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping + init_args: + monitor: val_loss + patience: 2 + - class_path: lightning.pytorch.callbacks.lr_monitor.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: checkpoints + filename: best-checkpoint + mode: min + monitor: val_loss + save_top_k: 1 + verbose: true + check_val_every_n_epoch: 1 + default_root_dir: null + detect_anomaly: false + deterministic: null + devices: auto + enable_checkpointing: null + enable_model_summary: null + enable_progress_bar: null + fast_dev_run: false + gradient_clip_algorithm: null + gradient_clip_val: null + inference_mode: true + limit_predict_batches: null + limit_test_batches: null + limit_train_batches: null + limit_val_batches: null + log_every_n_steps: null + logger: null + max_epochs: 1 + max_steps: -1 + max_time: null + min_epochs: null + min_steps: null + num_nodes: 1 + num_sanity_val_steps: null + overfit_batches: 0.0 + plugins: null + profiler: null + reload_dataloaders_every_n_epochs: 0 + strategy: auto + sync_batchnorm: false + use_distributed_sampler: true + val_check_interval: null + + # Lightning Model configuration + model: + class_path: itwinai.models.torch.mnist.MNISTModel + init_args: + hidden_size: 64 + + # Lightning data module configuration + data: + class_path: dataloader.MNISTDataModule + init_args: + batch_size: 32 + path: ./use-cases/mnist/torch + train_prop: 0.8 + + # Torch Optimizer configuration + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.001 + + # Torch LR scheduler configuration + lr_scheduler: + class_path: torch.optim.lr_scheduler.ExponentialLR + init_args: + gamma: 0.1 \ No newline at end of file diff --git a/use-cases/mnist/torch/train.py b/use-cases/mnist/torch/train.py new file mode 100644 index 00000000..6013ee7c --- /dev/null +++ b/use-cases/mnist/torch/train.py @@ -0,0 +1,28 @@ +import argparse + +from trainer import TorchTrainer +from itwinai.backend.torch.executor import TorchExecutor +from itwinai.backend.utils import parse_pipe_config +from jsonargparse import ArgumentParser + + +if __name__ == "__main__": + # Create CLI Parser + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--pipeline", type=str) + args = parser.parse_args() + + # Create parser for the pipeline (ordered) + pipe_parser = ArgumentParser() + pipe_parser.add_subclass_arguments(TorchTrainer, "trainer") + + # Parse, Instantiate pipe + parsed = parse_pipe_config(args.pipeline, pipe_parser) + pipe = pipe_parser.instantiate_classes(parsed) + # Make pipe as a list + pipe = [getattr(pipe, arg) for arg in vars(pipe)] + + # Execute pipe + executor = TorchExecutor() + executor.setup(pipe) + executor.execute(pipe) diff --git a/use-cases/mnist/torch/trainer.py b/use-cases/mnist/torch/trainer.py new file mode 100644 index 00000000..5681c6a0 --- /dev/null +++ b/use-cases/mnist/torch/trainer.py @@ -0,0 +1,39 @@ +import os + +from itwinai.backend.components import Trainer +from itwinai.models.torch.mnist import MNISTModel +from dataloader import MNISTDataModule +from lightning.pytorch.cli import LightningCLI +from omegaconf import DictConfig, OmegaConf +from utils import load_yaml_with_deps_from_dict + + +class TorchTrainer(Trainer): + def __init__(self, train: dict): + # Convert from YAML + train_config: DictConfig = load_yaml_with_deps_from_dict( + train, os.path.dirname(__file__) + ) + train_config = OmegaConf.to_container(train_config, resolve=True) + self.conf = train_config + + def train(self, data): + cli = LightningCLI( + args=self.conf, + model_class=MNISTModel, + datamodule_class=MNISTDataModule, + run=False, + save_config_kwargs={ + "overwrite": True, + "config_filename": "pl-training.yml", + }, + subclass_mode_model=True, + subclass_mode_data=True, + ) + cli.trainer.fit(cli.model, datamodule=cli.datamodule) + + def execute(self, data): + self.train(data) + + def setup(self, args): + pass diff --git a/use-cases/mnist/torch/utils.py b/use-cases/mnist/torch/utils.py new file mode 100644 index 00000000..c4a1d854 --- /dev/null +++ b/use-cases/mnist/torch/utils.py @@ -0,0 +1,128 @@ +""" +Utilities for itwinai package. +""" +import os +import yaml + +from collections.abc import MutableMapping +from typing import Dict +from omegaconf import OmegaConf +from omegaconf.dictconfig import DictConfig + + +def check_server(uri: str) -> bool: + """Check if an HTTP server is reachable + + Args: + uri (str): Server URL + + Returns: + bool: True if reachable. + """ + import requests + from requests import ConnectionError + + success = True + try: + _ = requests.get(uri) + except ConnectionError: + success = False + return success + + +def load_yaml(path: str) -> Dict: + """Load YAML file as dict. + + Args: + path (str): path to YAML file. + + Raises: + exc: yaml.YAMLError for loading/parsing errors. + + Returns: + Dict: nested dict representation of parsed YAML file. + """ + with open(path, "r", encoding="utf-8") as yaml_file: + try: + loaded_config = yaml.safe_load(yaml_file) + except yaml.YAMLError as exc: + print(exc) + raise exc + return loaded_config + + +def load_yaml_with_deps_from_file(path: str) -> DictConfig: + """ + Load YAML file with OmegaConf and merge it with its dependencies + specified in the `conf-dependencies` field. + Assume that the dependencies live in the same folder of the + YAML file which is importing them. + + Args: + path (str): path to YAML file. + + Raises: + exc: yaml.YAMLError for loading/parsing errors. + + Returns: + DictConfig: nested representation of parsed YAML file. + """ + yaml_conf = load_yaml(path) + use_case_dir = os.path.dirname(path) + deps = [] + if yaml_conf.get("conf-dependencies"): + for dependency in yaml_conf["conf-dependencies"]: + deps.append(load_yaml(os.path.join(use_case_dir, dependency))) + + return OmegaConf.merge(yaml_conf, *deps) + + +def load_yaml_with_deps_from_dict(dict_conf, use_case_dir) -> DictConfig: + deps = [] + + if dict_conf.get("conf-dependencies"): + for dependency in dict_conf["conf-dependencies"]: + deps.append(load_yaml(os.path.join(use_case_dir, dependency))) + + return OmegaConf.merge(dict_conf, *deps) + + +def dynamically_import_class(name: str): + """ + Dynamically import class by module path. + Adapted from https://stackoverflow.com/a/547867 + + Args: + name (str): path to the class (e.g., mypackage.mymodule.MyClass) + + Returns: + __class__: class object. + """ + module, class_name = name.rsplit(".", 1) + mod = __import__(module, fromlist=[class_name]) + klass = getattr(mod, class_name) + return klass + + +def flatten_dict( + d: MutableMapping, parent_key: str = "", sep: str = "." +) -> MutableMapping: + """Flatten dictionary + + Args: + d (MutableMapping): nested dictionary to flatten + parent_key (str, optional): prefix for all keys. Defaults to ''. + sep (str, optional): separator for nested key concatenation. + Defaults to '.'. + + Returns: + MutableMapping: flattened dictionary with new keys. + """ + items = [] + for k, v in d.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, MutableMapping): + items.extend(flatten_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) diff --git a/use-cases/mnist/torch/workflows/meta.yml b/use-cases/mnist/torch/workflows/meta.yml new file mode 100644 index 00000000..d0de7ac9 --- /dev/null +++ b/use-cases/mnist/torch/workflows/meta.yml @@ -0,0 +1,4 @@ +# Configuration file for use case metadata +# Load with OmegaConf + +root: ./use-cases/mnist/torch \ No newline at end of file diff --git a/use-cases/mnist/torch/workflows/workflow-train.yml b/use-cases/mnist/torch/workflows/workflow-train.yml new file mode 100644 index 00000000..c357542d --- /dev/null +++ b/use-cases/mnist/torch/workflows/workflow-train.yml @@ -0,0 +1,15 @@ +# Workflow definition in classical style +# Load with OmegaConf +# Other configuration files to merge with this file via OmegaConf +conf-dependencies: + - meta.yml + +steps: + - Training: + doc: Run PyTorchLightning training sub-pipe for training + command: python ${root}/train.py + env: + file: ${root}/env-files/lightning-env.yml + prefix: ${root}/.venv-lightning + args: + pipeline: ${root}/pipeline.yaml \ No newline at end of file diff --git a/use-cases/mnist/training-workflow.yml b/use-cases/mnist/training-workflow.yml deleted file mode 100644 index ca60c1e7..00000000 --- a/use-cases/mnist/training-workflow.yml +++ /dev/null @@ -1,76 +0,0 @@ -# Workflow definition in classical style - -# Load with OmegaConf - -# Other configuration files to merge with this file via OmegaConf -conf-dependencies: - - meta.yml - -steps: - - preprocessing: - doc: Download and split MNIST dataset into train and test sets - command: python ${root}/mnist-preproc.py - env: - file: ${root}/env-files/preproc-env.yml - prefix: ${root}/.venv-preproc - args: - output: ${datasets.preproc-images.location} - stage: train - - run-mlflow-server: - doc: Run MLFlow server on localhost - command: python ${root}/mlflow-server.py - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - path: ${datasets.mlflow-backend-store-uri.location} - port: ${mlflow.port} - - ml-training: - doc: Train a neural network to classify MNIST images - command: itwinai train - env: - file: ${ai-root}/env-files/pytorch-env-gpu.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - train-dataset: ${datasets.preproc-images.location} - ml-logs: ${datasets.ml-logs.location} - config: ${root}/mnist-ai-train.yml - - stop-mlflow-server: - doc: Stop MLFlow server on localhost, if running - command: python ${root}/mlflow-server.py - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - mode: kill - port: ${mlflow.port} - -# #Workflow definition in CWL style - -workflowFileCWL: use-cases/mnist/workflow.cwl - -preprocessEnvironment: - class: Directory - path: .venv-preproc - -preprocessScript: - class: File - path: mnist-preproc.py - -# OmegaConf referencing does not work in CWL definition -#preprocessOutput: ${datasets.preproc-images.location} -preprocessOutput: use-cases/mnist/data/preproc-images - -#training step -trainingConfig: - class: File - path: mnist-ai-train.yml - -trainingEnvironment: - class: Directory - path: ../../ai/.venv-pytorch - -trainingCommand: train \ No newline at end of file diff --git a/use-cases/mnist/workflow.cwl b/use-cases/mnist/workflow.cwl deleted file mode 100644 index 79202558..00000000 --- a/use-cases/mnist/workflow.cwl +++ /dev/null @@ -1,56 +0,0 @@ -cwlVersion: v1.2 -class: Workflow - -inputs: - preprocessEnvironment: # Input directory for the preprocess environment - type: Directory - preprocessScript: # Input file for the preprocess script - type: File - preprocessInput: # Optional input directory for preprocess - type: Directory? - preprocessOutput: # Optional output string for preprocess - type: string? - - trainingConfig: # Input file for the training configuration - type: File - trainingEnvironment: # Input directory for the training environment - type: Directory - trainingCommand: # Input string for the training command - type: string - -outputs: - preprocessingStdout: # Output file for the preprocessing stdout - type: File - outputSource: preprocess/preprocessingStdout - - outputCheckpoint: # Output directory for the trained model checkpoint - type: Directory - outputSource: training/outputCheckpoint - - preprocessedDatasetPath: # Output directory for the preprocessed dataset - type: Directory - outputSource: preprocess/preprocessedDatasetPath - - mlLogs: # Output directory for the machine learning logs - type: Directory - outputSource: training/mlLogs - -steps: - preprocess: # Step for preprocessing - run: mnist-preproc.cwl - in: - preprocessEnvironment: preprocessEnvironment - preprocessScript: preprocessScript - rawDatasetPath: preprocessInput - preprocessOutput: preprocessOutput - out: [preprocessingStdout, preprocessedDatasetPath] - - training: # Step for training - run: ../../ai/training.cwl - in: - preprocessedDatasetPath: preprocess/preprocessedDatasetPath - trainingConfig: trainingConfig - trainingEnvironment: trainingEnvironment - trainingCommand: trainingCommand - preprocessingFlag: preprocess/preprocessingStdout - out: [outputCheckpoint, mlLogs]