diff --git a/apps/python/register/README.md b/apps/python/register/README.md
index 784e42b..28dd0c3 100644
--- a/apps/python/register/README.md
+++ b/apps/python/register/README.md
@@ -64,7 +64,7 @@ python3 apps/python/register/register.py \
 
 # Dockers
 
-* (Optional): Prepare a postgreSQL from `apps/python/postgresql` running `docker compose up`
+* (Optional): Prepare a postgreSQL from `infrastructure/dev/postgresql` running `docker compose up postgresql pgadmin`
 
 1. Build register
 `./build.sh`
@@ -76,11 +76,26 @@ docker run -it --network host pocket_dataset_register \
 /code/register.py \
 --tasks arc_challenge,hellaswag,truthfulqa_mc2,mmlu_abstract_algebra,mmlu_anatomy,mmlu_astronomy,mmlu_business_ethics,mmlu_clinical_knowledge,mmlu_college_biology,mmlu_college_chemistry,mmlu_college_computer_science,mmlu_college_mathematics,mmlu_college_medicine,mmlu_college_physics,mmlu_computer_security,mmlu_conceptual_physics,mmlu_econometrics,mmlu_electrical_engineering,mmlu_elementary_mathematics,mmlu_formal_logic,mmlu_global_facts,mmlu_high_school_biology,mmlu_high_school_chemistry,mmlu_high_school_computer_science,mmlu_high_school_european_history,mmlu_high_school_geography,mmlu_high_school_government_and_politics,mmlu_high_school_macroeconomics,mmlu_high_school_mathematics,mmlu_high_school_microeconomics,mmlu_high_school_physics,mmlu_high_school_psychology,mmlu_high_school_statistics,mmlu_high_school_us_history,mmlu_high_school_world_history,mmlu_human_aging,mmlu_human_sexuality,mmlu_international_law,mmlu_jurisprudence,mmlu_logical_fallacies,mmlu_machine_learning,mmlu_management,mmlu_marketing,mmlu_medical_genetics,mmlu_miscellaneous,mmlu_moral_disputes,mmlu_moral_scenarios,mmlu_nutrition,mmlu_philosophy,mmlu_prehistory,mmlu_professional_accounting,mmlu_professional_law,mmlu_professional_medicine,mmlu_professional_psychology,mmlu_public_relations,mmlu_security_studies,mmlu_sociology,mmlu_us_foreign_policy,mmlu_virology,mmlu_world_religions,winogrande,gsm8k \
 --dbname lm-evaluation-harness \
---user root \
---password root \
+--user admin \
+--password admin \
 --host localhost \
 --port 5432 \
 --verbosity DEBUG
 ```
 
 **Note**:If you have already downloaded HF datasets, mount them adding `-v path/to/huggingface/directory:/root/.cache/huggingface/` to avoid re-download.
+
+
+### Accessing the DB with PG Admin
+
+To explore the generated database, the PG Admin is available in the docker compose (`infrastructure/dev/docker-compose.yaml`).
+To access the service just go to `127.0.0.1:5050` and use the credentials `admin@local.dev:admin`. 
+Then in the PG Admin page click on `Add New Server` and fill the data:
+General tab:
+- Name: `pokt-ml-datasets`
+Connection tab:
+- Host Name: `postgres_container`
+- Port: `5432`
+- Maintenance database: `lm-evaluation-harness`
+- Username: `admin`
+- Password: `admin`
\ No newline at end of file
diff --git a/apps/python/register/register.py b/apps/python/register/register.py
index 12ed343..a344de6 100644
--- a/apps/python/register/register.py
+++ b/apps/python/register/register.py
@@ -1,197 +1,8 @@
-################################
-# lm-eval-harness (evaulator.py)
-################################
-import argparse
-import json
-import logging
-import os
-import re
-import sys
-from functools import partial
-from pathlib import Path
-from typing import Union
+import utils.lmeh as lmeh
 
-import numpy as np
-
-from lm_eval import evaluator, utils
-from lm_eval.evaluator import request_caching_arg_to_dict
-from lm_eval.logging_utils import WandbLogger
-from lm_eval.tasks import TaskManager, include_path, initialize_tasks
-from lm_eval.utils import make_table, simple_parse_args_string
-
-################################
-# Custom modules
-################################
-from utils.uploader import get_ConfigurableTask 
-from utils.sql import create_dataset_table, register_task, create_task_table, checked_task
-import psycopg2
-from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
-
-def parse_eval_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument(
-        "--tasks",
-        "-t",
-        default=None,
-        metavar="task1,task2",
-        help="To get full list of tasks, use the command lm-eval --tasks list",
-    )
-    parser.add_argument(
-        "--dbname",
-        type=str,
-        default="postgres",
-        help="Name of the database",
-    )
-    parser.add_argument(
-        "--user",
-        type=str,
-        default="postgres",
-        help="Name of the user",
-    )
-    parser.add_argument(
-        "--password",
-        type=str,
-        default="password",
-        help="Password for the user",
-    )
-    parser.add_argument(
-        "--host",
-        type=str,
-        default="localhost",
-        help="Host name",
-    )
-    parser.add_argument(
-        "--port",
-        type=str,
-        default="5432",
-        help="Port number",
-    )
-    parser.add_argument(
-        "--include_path",
-        type=str,
-        default=None,
-        metavar="DIR",
-        help="Additional path to include if there are external tasks to include.",
-    )    
-    parser.add_argument(
-        "--verbosity",
-        "-v",
-        type=str.upper,
-        default="INFO",
-        metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
-        help="Controls the reported logging error level. Set to DEBUG when testing + adding new task configurations for comprehensive log output.",
-    )        
-    return parser.parse_args()
-
-def cli_register_task(args: Union[argparse.Namespace, None] = None) -> None:
-    if not args:
-        # we allow for args to be passed externally, else we parse them ourselves
-        args = parse_eval_args()
-
-    eval_logger = utils.eval_logger
-    eval_logger.setLevel(getattr(logging, f"{args.verbosity}"))
-    eval_logger.info(f"Verbosity set to {args.verbosity}")
-
-    initialize_tasks(args.verbosity)
-    task_manager = TaskManager(args.verbosity, include_path=args.include_path)
-
-    if args.include_path is not None:
-        eval_logger.info(f"Including path: {args.include_path}")
-        include_path(args.include_path)
-
-    if args.tasks is None:
-        eval_logger.error("Need to specify task to evaluate.")
-        sys.exit()
-    elif args.tasks == "list":
-        eval_logger.info(
-            "Available Tasks:\n - {}".format("\n - ".join(task_manager.all_tasks))
-        )
-        sys.exit()
-    else:
-        if os.path.isdir(args.tasks):
-            import glob
-
-            task_names = []
-            yaml_path = os.path.join(args.tasks, "*.yaml")
-            for yaml_file in glob.glob(yaml_path):
-                config = utils.load_yaml_config(yaml_file)
-                task_names.append(config)
-        else:
-            task_list = args.tasks.split(",")
-            task_names = task_manager.match_tasks(task_list)
-            for task in [task for task in task_list if task not in task_names]:
-                if os.path.isfile(task):
-                    config = utils.load_yaml_config(task)
-                    task_names.append(config)
-            task_missing = [
-                task for task in task_list if task not in task_names and "*" not in task
-            ]  # we don't want errors if a wildcard ("*") task name was used
-
-            if task_missing:
-                missing = ", ".join(task_missing)
-                eval_logger.error(
-                    f"Tasks were not found: {missing}\n"
-                    f"{utils.SPACING}Try `lm-eval --tasks list` for list of available tasks",
-                )
-                raise ValueError(
-                    f"Tasks not found: {missing}. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues."
-                )
-
-    task_dict = get_ConfigurableTask(
-        tasks=task_names,
-        num_fewshot=None,
-        check_integrity=False,
-        gen_kwargs=None,
-        task_manager= None,
-        verbosity= "INFO",
-        predict_only= False,    
-    )
-
-    # check and connect to the database
-    try:
-        conn = psycopg2.connect(
-            dbname=args.dbname,
-            user=args.user,
-            password=args.password,
-            host=args.host,
-            port=args.port
-        )
-        eval_logger.info("Connected to the database")
-        # Obtain a DB Cursor
-        cursor = conn.cursor()
-    except Exception as e:
-        eval_logger.error("Unable to connect to the database")
-        exit(-1)
-
-    create_task_table(connection=conn)
+if __name__ == "__main__":
 
-    for t in task_dict:
-        task_name_i = t
-        dataset_path = task_dict[t].config.dataset_path
-        dataset_name = task_dict[t].config.dataset_name
-        table_name = dataset_path + "--" + dataset_name if dataset_name else dataset_path
-        data = task_dict[t].dataset
-        # check if the task is already registered
-        if not checked_task(task_name_i, connection= conn):
-            # Register task
-            try:
-                # Create dataset table
-                create_dataset_table(table_name = table_name, 
-                                    data = data, 
-                                    connection = conn)
-                # Regist task/dataset pair
-                register_task(task_name = task_name_i, 
-                            dataset_table_name = table_name,
-                            connection = conn)
-            except Exception as e:
-                eval_logger.error(f"Error: {e}")
-                conn.rollback()
-                cursor.close()
-                conn.close()
-                exit(-1)
-            eval_logger.info(f"Task {task_name_i} registered successfully")
-        else:
-            eval_logger.info(f"Task {task_name_i} already registered")
+    # This is the task to upload datasets
 
-if __name__ == "__main__":
-    cli_register_task()
\ No newline at end of file
+    # TODO: read configuration from file and pass it to the function (mutiple tests support requirement)
+    lmeh.cli_register_task()
\ No newline at end of file
diff --git a/apps/python/register/requirements.txt b/apps/python/register/requirements.txt
index c16fa3d..6b1cf98 100644
--- a/apps/python/register/requirements.txt
+++ b/apps/python/register/requirements.txt
@@ -1,2 +1,2 @@
-lm_eval[dev] @ git+https://github.com/EleutherAI/lm-evaluation-harness@7d9922c80114218eaf43975b7655bb48cda84f50
+lm_eval[dev]==0.4.2
 psycopg2==2.9.9
\ No newline at end of file
diff --git a/apps/python/register/utils/lmeh.py b/apps/python/register/utils/lmeh.py
new file mode 100644
index 0000000..0db7d51
--- /dev/null
+++ b/apps/python/register/utils/lmeh.py
@@ -0,0 +1,291 @@
+import argparse
+import logging
+import os
+import sys
+from typing import Union
+from lm_eval import  utils
+from lm_eval.tasks import TaskManager
+from lm_eval.utils import eval_logger, positional_deprecated, simple_parse_args_string
+from lm_eval.tasks import TaskManager, get_task_dict
+from lm_eval.evaluator_utils import run_task_tests
+from lm_eval.__main__ import parse_eval_args
+from typing import List, Optional, Union
+
+
+from utils.sql import create_dataset_table, register_task, create_task_table, checked_task
+import psycopg2
+
+
+def setup_parser() -> argparse.Namespace:
+    '''
+    Argument parsing for LM-Evaluation-Harness dataset uploading.
+    '''
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument(
+        "--tasks",
+        "-t",
+        default=None,
+        type=str,
+        metavar="task1,task2",
+        help="To get full list of tasks, use the command lm-eval --tasks list",
+    )
+    parser.add_argument(
+        "--dbname",
+        type=str,
+        default="lm-evaluation-harness",
+        help="Name of the database",
+    )
+    parser.add_argument(
+        "--user",
+        type=str,
+        default="admin",
+        help="Name of the user",
+    )
+    parser.add_argument(
+        "--password",
+        type=str,
+        default="admin",
+        help="Password for the user",
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="localhost",
+        help="Host name",
+    )
+    parser.add_argument(
+        "--port",
+        type=str,
+        default="5432",
+        help="Port number",
+    )
+    parser.add_argument(
+        "--include_path",
+        type=str,
+        default=None,
+        metavar="DIR",
+        help="Additional path to include if there are external tasks to include.",
+    )    
+    parser.add_argument(
+        "--verbosity",
+        "-v",
+        type=str.upper,
+        default="INFO",
+        metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
+        help="Controls the reported logging error level. Set to DEBUG when testing + adding new task configurations for comprehensive log output.",
+    )        
+    return parser
+
+def cli_register_task(args: Union[argparse.Namespace, None] = None) -> None:
+    '''
+    LM Evaluation Harness dataset uploading.
+
+    This function takes the selected tasks and fill the database with all 
+    requiered datasets.
+    '''
+    if not args:
+        # we allow for args to be passed externally, else we parse them ourselves
+        parser = setup_parser()
+        args = parse_eval_args(parser)
+
+    eval_logger = utils.eval_logger
+    eval_logger.setLevel(getattr(logging, f"{args.verbosity}"))
+    eval_logger.info(f"Verbosity set to {args.verbosity}")
+
+    if args.include_path is not None:
+        eval_logger.info(f"Including path: {args.include_path}")
+    task_manager = TaskManager(args.verbosity, include_path=args.include_path)
+
+    if args.tasks is None:
+        eval_logger.error("Need to specify task to evaluate.")
+        sys.exit()
+    elif args.tasks == "list":
+        eval_logger.info(
+            "Available Tasks:\n - {}".format("\n - ".join(task_manager.all_tasks))
+        )
+        sys.exit()
+    else:
+        if os.path.isdir(args.tasks):
+            import glob
+
+            task_names = []
+            yaml_path = os.path.join(args.tasks, "*.yaml")
+            for yaml_file in glob.glob(yaml_path):
+                config = utils.load_yaml_config(yaml_file)
+                task_names.append(config)
+        else:
+            task_list = args.tasks.split(",")
+            task_names = task_manager.match_tasks(task_list)
+            for task in [task for task in task_list if task not in task_names]:
+                if os.path.isfile(task):
+                    config = utils.load_yaml_config(task)
+                    task_names.append(config)
+            task_missing = [
+                task for task in task_list if task not in task_names and "*" not in task
+            ]  # we don't want errors if a wildcard ("*") task name was used
+
+            if task_missing:
+                missing = ", ".join(task_missing)
+                eval_logger.error(
+                    f"Tasks were not found: {missing}\n"
+                    f"{utils.SPACING}Try `lm-eval --tasks list` for list of available tasks",
+                )
+                raise ValueError(
+                    f"Tasks not found: {missing}. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues."
+                )
+
+    task_dict = get_ConfigurableTask(
+        tasks=task_names,
+        num_fewshot=None,
+        check_integrity=False,
+        gen_kwargs=None,
+        task_manager= None,
+        verbosity= "INFO",
+        predict_only= False,    
+    )
+
+    # check and connect to the database
+    try:
+        conn = psycopg2.connect(
+            dbname=args.dbname,
+            user=args.user,
+            password=args.password,
+            host=args.host,
+            port=args.port
+        )
+        eval_logger.info("Connected to the database")
+        # Obtain a DB Cursor
+        cursor = conn.cursor()
+    except Exception as e:
+        eval_logger.error("Unable to connect to the database")
+        exit(-1)
+
+    create_task_table(connection=conn)
+
+    for t in task_dict:
+        task_name_i = t
+        dataset_path = task_dict[t].config.dataset_path
+        dataset_name = task_dict[t].config.dataset_name
+        table_name = dataset_path + "--" + dataset_name if dataset_name else dataset_path
+        data = task_dict[t].dataset
+        # check if the task is already registered
+        if not checked_task(task_name_i, connection= conn):
+            # Register task
+            try:
+                # Create dataset table
+                create_dataset_table(table_name = table_name, 
+                                    data = data, 
+                                    connection = conn)
+                # Regist task/dataset pair
+                register_task(task_name = task_name_i, 
+                            dataset_table_name = table_name,
+                            connection = conn)
+            except Exception as e:
+                eval_logger.error(f"Error: {e}")
+                conn.rollback()
+                cursor.close()
+                conn.close()
+                exit(-1)
+            eval_logger.info(f"Task {task_name_i} registered successfully")
+        else:
+            eval_logger.info(f"Task {task_name_i} already registered")
+
+
+# cutted def simple_evaluate(..) from lm-eval-harness to generate config task
+@positional_deprecated
+def get_ConfigurableTask(
+    tasks: Optional[List[Union[str, dict, object]]] = None,
+    num_fewshot: Optional[int] = None,
+    check_integrity: bool = False,
+    gen_kwargs: Optional[str] = None,
+    task_manager: Optional[TaskManager] = None,
+    verbosity: str = "INFO",
+    predict_only: bool = False,
+
+):
+    """Instantiate and evaluate a model on a list of tasks.
+
+    :param tasks: list[Union[str, dict, Task]]
+        List of task names or Task objects. Task objects will be taken to have name task.EVAL_HARNESS_NAME if defined and type(task).__name__ otherwise.
+    :param num_fewshot: int
+        Number of examples in few-shot context
+    :param check_integrity: bool
+        Whether to run the relevant part of the test suite for the tasks
+    :param gen_kwargs: str
+        String arguments for model generation
+        Ignored for all tasks with loglikelihood output_type
+    :param predict_only: bool
+        If true only model outputs will be generated and returned. Metrics will not be evaluated
+
+    :return
+        Task dictionary
+    """
+    eval_logger.setLevel(getattr(logging, f"{verbosity}"))
+
+    seed_message = []
+
+    if seed_message:
+        eval_logger.info(" | ".join(seed_message))
+
+    if tasks is None:
+        tasks = []
+    if len(tasks) == 0:
+        raise ValueError(
+            "No tasks specified, or no tasks found. Please verify the task names."
+        )
+
+    if gen_kwargs is not None:
+        gen_kwargs = simple_parse_args_string(gen_kwargs)
+        eval_logger.warning(
+            "generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. "
+            "Ensure 'do_sample=True' for non-greedy decoding!"
+        )
+        if gen_kwargs == "":
+            gen_kwargs = None
+
+    if task_manager is None:
+        task_manager = TaskManager(verbosity)
+
+    task_dict = get_task_dict(tasks, task_manager)
+    for task_name in task_dict.keys():
+        task_obj = task_dict[task_name]
+        if isinstance(task_obj, tuple):
+            _, task_obj = task_obj
+            if task_obj is None:
+                continue
+
+        if task_obj.get_config("output_type") == "generate_until":
+            if gen_kwargs is not None:
+                task_obj.set_config(
+                    key="generation_kwargs", value=gen_kwargs, update=True
+                )
+
+        if predict_only:
+            log_samples = True
+            eval_logger.info(
+                f"Processing {task_name} in output-only mode. Metrics will not be calculated!"
+            )
+            # we have to change the class properties post-hoc. This is pretty hacky.
+            task_obj.override_metric(metric_name="bypass")
+
+        # override tasks' fewshot values to the provided num_fewshot arg value
+        # except if tasks have it set to 0 manually in their configs--then we should never overwrite that
+        if num_fewshot is not None:
+            if (default_num_fewshot := task_obj.get_config("num_fewshot")) == 0:
+                eval_logger.info(
+                    f"num_fewshot has been set to 0 for {task_name} in its config. Manual configuration will be ignored."
+                )
+            else:
+                eval_logger.warning(
+                    f"Overwriting default num_fewshot of {task_name} from {default_num_fewshot} to {num_fewshot}"
+                )
+                task_obj.set_config(key="num_fewshot", value=num_fewshot)
+        else:
+            # if num_fewshot not provided, and the task does not define a default one, default to 0
+            if (default_num_fewshot := task_obj.get_config("num_fewshot")) is None:
+                task_obj.set_config(key="num_fewshot", value=0)
+
+    if check_integrity:
+        run_task_tests(task_list=tasks)
+
+    return task_dict
diff --git a/apps/python/register/utils/sql.py b/apps/python/register/utils/sql.py
index 539f511..96bb4f6 100644
--- a/apps/python/register/utils/sql.py
+++ b/apps/python/register/utils/sql.py
@@ -5,7 +5,6 @@
 import decimal
 
 import datasets
-from datasets import DatasetDict
 
 # Define the columns for the table
 _ID_NAME = "__id"
diff --git a/apps/python/sampler/Dockerfile b/apps/python/sampler/Dockerfile
new file mode 100644
index 0000000..55f4c01
--- /dev/null
+++ b/apps/python/sampler/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.11.8-slim-bookworm
+
+RUN apt-get update -y && \ 
+    apt-get install -y libpq-dev gcc git
+
+RUN mkdir -p /code
+COPY ./requirements_dev.txt /code/requirements_dev.txt
+RUN pip install --no-cache-dir -r /code/requirements_dev.txt
+
+
+
+# Mirror folder without unnecesary files
+COPY ./sampler.py /code/sampler.py
+COPY ./utils /code/utils
+
+ENTRYPOINT ["python3"]
\ No newline at end of file
diff --git a/apps/python/sampler/README.md b/apps/python/sampler/README.md
index 248a27d..b1867f7 100644
--- a/apps/python/sampler/README.md
+++ b/apps/python/sampler/README.md
@@ -1,3 +1,31 @@
+# pocket_lm_eval
+Files that follow the structure of `lm-eval-harness`. The intention, for instance, is to avoid a fork.
+
+## pocket_lm_eval - task
+* **[New]** `PocketNetworkTaskManager`: A class based on `TaskManager`,  that is used to inject `pocket_args` into the `task.config.metadata`. 
+
+## pocket_lm_eval - api
+* **[New]** `PocketNetworkConfigurableTask`: A class based on `ConfigurableTask`, that retrieve samples from the sql database, based on `blacklist` id's & `uri` previously defined in `pocket_args`. In `PocketNetworkConfigurableTask.download` validations reagrding `training_split`, `validation_split`, `test_split` and `fewshot_split` are followed as pointed in the `lm-eval-harness- documentation. 
+
+# generator
+
+* **[New]** A functions `get_ConfigurableTask` to return only the random samples based of the configiguration split and the blacklist.
+
+# Docker Run
+
+```bash
+docker run -it --network host pocket_sampler \
+/code/sampler.py \
+--pocket_args '{"hellaswag": {"address": "random", "blacklist": [49908, 59949], "qty": 15}}' \
+--dbname lm-evaluation-harness \
+--user admin \
+--password admin \
+--host localhost \
+--port 5432 \
+--verbosity DEBUG
+```
+
+
 ### Create a copy of config.json and set the proper values for your environment
 ### Run
 CONFIG_PATH=/your/path/to/config.json python worker/main.py
diff --git a/apps/python/sampler/build.sh b/apps/python/sampler/build.sh
new file mode 100755
index 0000000..1a4cf6a
--- /dev/null
+++ b/apps/python/sampler/build.sh
@@ -0,0 +1,6 @@
+DEFAULT_IMAGE_NAME="pocket_sampler"
+
+# Build register
+docker build . --progress=plain --tag $DEFAULT_IMAGE_NAME:latest
+# Broadcast image name and tag
+echo "$DEFAULT_IMAGE_NAME"
\ No newline at end of file
diff --git a/apps/python/sampler/requirements_dev.txt b/apps/python/sampler/requirements_dev.txt
new file mode 100644
index 0000000..711b3dc
--- /dev/null
+++ b/apps/python/sampler/requirements_dev.txt
@@ -0,0 +1,3 @@
+lm_eval[dev]==0.4.2
+psycopg2==2.9.9
+SQLAlchemy==2.0.29
\ No newline at end of file
diff --git a/apps/python/sampler/sampler.py b/apps/python/sampler/sampler.py
new file mode 100644
index 0000000..fbc529e
--- /dev/null
+++ b/apps/python/sampler/sampler.py
@@ -0,0 +1,6 @@
+import utils.lmeh as lmeh
+
+if __name__ == "__main__":
+
+    # This is the task to generate samples request
+    lmeh.cli_register_task()
\ No newline at end of file
diff --git a/apps/python/sampler/utils/__init__.py b/apps/python/sampler/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/python/register/utils/uploader.py b/apps/python/sampler/utils/generator.py
similarity index 80%
rename from apps/python/register/utils/uploader.py
rename to apps/python/sampler/utils/generator.py
index 689501c..e4ab321 100644
--- a/apps/python/register/utils/uploader.py
+++ b/apps/python/sampler/utils/generator.py
@@ -1,8 +1,9 @@
 import itertools
 import logging
 import random
+import time
 from collections import defaultdict
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
 import numpy as np
 import torch
@@ -21,6 +22,7 @@
 )
 from lm_eval.logging_utils import add_env_info, get_git_commit_hash
 from lm_eval.tasks import TaskManager, get_task_dict
+from utils.pocket_lm_eval.tasks import PocketNetworkTaskManager
 from lm_eval.utils import eval_logger, positional_deprecated, simple_parse_args_string
 
 
@@ -29,7 +31,7 @@
     from lm_eval.tasks import Task
 
 
-# cutted def simple_evaluate(..) from lm-eval-harness to generate config task commit:7d9922c80114218eaf43975b7655bb48cda84f50
+# adapted from evaluator.py # def simple_evaluate(..) from lm-eval-harness to generate config task 
 @positional_deprecated
 def get_ConfigurableTask(
     tasks: Optional[List[Union[str, dict, object]]] = None,
@@ -39,6 +41,7 @@ def get_ConfigurableTask(
     task_manager: Optional[TaskManager] = None,
     verbosity: str = "INFO",
     predict_only: bool = False,
+    pocket_args: Optional[Dict] = None,
 
 ):
     """Instantiate and evaluate a model on a list of tasks.
@@ -59,6 +62,7 @@ def get_ConfigurableTask(
         Task dictionary
     """
     eval_logger.setLevel(getattr(logging, f"{verbosity}"))
+    start_date = time.time()    
 
     seed_message = []
 
@@ -82,12 +86,8 @@ def get_ConfigurableTask(
             gen_kwargs = None
 
     if task_manager is None:
-        task_manager = TaskManager(verbosity)
+        task_manager = PocketNetworkTaskManager(verbosity, pocket_args=pocket_args)
 
-    eval_logger.info(
-        "get_task_dict has been updated to accept an optional argument, `task_manager`"
-        "Read more here:https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage"
-    )
     task_dict = get_task_dict(tasks, task_manager)
     for task_name in task_dict.keys():
         task_obj = task_dict[task_name]
@@ -110,6 +110,8 @@ def get_ConfigurableTask(
             # we have to change the class properties post-hoc. This is pretty hacky.
             task_obj.override_metric(metric_name="bypass")
 
+        # override tasks' fewshot values to the provided num_fewshot arg value
+        # except if tasks have it set to 0 manually in their configs--then we should never overwrite that
         if num_fewshot is not None:
             if (default_num_fewshot := task_obj.get_config("num_fewshot")) == 0:
                 eval_logger.info(
@@ -120,8 +122,14 @@ def get_ConfigurableTask(
                     f"Overwriting default num_fewshot of {task_name} from {default_num_fewshot} to {num_fewshot}"
                 )
                 task_obj.set_config(key="num_fewshot", value=num_fewshot)
+        else:
+            # if num_fewshot not provided, and the task does not define a default one, default to 0
+            if (default_num_fewshot := task_obj.get_config("num_fewshot")) is None:
+                task_obj.set_config(key="num_fewshot", value=0)
 
     if check_integrity:
         run_task_tests(task_list=tasks)
 
-    return task_dict
\ No newline at end of file
+    return task_dict
+
+# adapted from evaluator.py # def evaluate(..) from lm-eval-harness to generate config task 
\ No newline at end of file
diff --git a/apps/python/sampler/utils/lmeh.py b/apps/python/sampler/utils/lmeh.py
new file mode 100644
index 0000000..07d6250
--- /dev/null
+++ b/apps/python/sampler/utils/lmeh.py
@@ -0,0 +1,172 @@
+################################
+# lm-eval-harness (evaulator.py)
+################################
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from functools import partial
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+
+from lm_eval import evaluator, utils
+from lm_eval.evaluator import request_caching_arg_to_dict
+from lm_eval.logging_utils import WandbLogger
+from lm_eval.tasks import TaskManager
+from lm_eval.utils import make_table, simple_parse_args_string
+
+from lm_eval.__main__ import parse_eval_args
+
+# Custom modules
+from utils.generator import get_ConfigurableTask 
+
+
+def setup_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument(
+        "--pocket_args",
+        type=str,
+        default='{"hellaswag": {"address": "random", "blacklist": [0, 157, 900, 1200], "qty": 5}, "gsmk8": {"address": "random", "blacklist": [0, 157, 900, 1200], "qty": 5}}',
+        help="json string of pocket arguments",
+    )    
+    parser.add_argument(
+        "--dbname",
+        type=str,
+        default="lm-evaluation-harness",
+        help="Name of the database",
+    )
+    parser.add_argument(
+        "--user",
+        type=str,
+        default="admin",
+        help="Name of the user",
+    )
+    parser.add_argument(
+        "--password",
+        type=str,
+        default="admin",
+        help="Password for the user",
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="localhost",
+        help="Host name",
+    )
+    parser.add_argument(
+        "--port",
+        type=str,
+        default="5432",
+        help="Port number",
+    )
+    parser.add_argument(
+        "--include_path",
+        type=str,
+        default=None,
+        metavar="DIR",
+        help="Additional path to include if there are external tasks to include.",
+    )    
+    parser.add_argument(
+        "--verbosity",
+        "-v",
+        type=str.upper,
+        default="INFO",
+        metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
+        help="Controls the reported logging error level. Set to DEBUG when testing + adding new task configurations for comprehensive log output.",
+    )        
+    return parser
+
+def cli_register_task(args: Union[argparse.Namespace, None] = None) -> None:
+    if not args:
+        # we allow for args to be passed externally, else we parse them ourselves
+        parser = setup_parser()
+        args = parse_eval_args(parser)
+
+    eval_logger = utils.eval_logger
+    eval_logger.setLevel(getattr(logging, f"{args.verbosity}"))
+    eval_logger.info(f"Verbosity set to {args.verbosity}")
+    ############################################################
+    # START: POCKET NETWORK CODE
+    ############################################################
+    POSTGRES_DB_USER = args.user
+    POSTGRES_DB_PASS = args.password
+    POSTGRES_DB_HOST = args.host
+    POSTGRES_DB_PORT = args.port
+    POSTGRES_DB_NAME = args.dbname
+
+    postgres_uri = f"postgresql://{POSTGRES_DB_USER}:{POSTGRES_DB_PASS}@{POSTGRES_DB_HOST}:{POSTGRES_DB_PORT}/{POSTGRES_DB_NAME}"
+    
+    # Generate pocket_args from string
+    pocket_args = json.loads(args.pocket_args)
+    # join keys from pocket_args, adding "," between them
+    tasks = ",".join(pocket_args.keys())
+    #then add the uri to the pocket_args to be used during ConfigurableTask init
+    for k in pocket_args.keys():
+        pocket_args[k]['uri'] = postgres_uri
+    ############################################################
+    # END: POCKET NETWORK CODE
+    ############################################################
+
+    if args.include_path is not None:
+        eval_logger.info(f"Including path: {args.include_path}")
+    task_manager = TaskManager(args.verbosity, include_path=args.include_path)
+
+    if tasks is None:
+        eval_logger.error("Need to specify task to evaluate.")
+        sys.exit()
+    elif tasks == "list":
+        eval_logger.info(
+            "Available Tasks:\n - {}".format("\n - ".join(task_manager.all_tasks))
+        )
+        sys.exit()
+    else:
+        if os.path.isdir(tasks):
+            import glob
+
+            task_names = []
+            yaml_path = os.path.join(tasks, "*.yaml")
+            for yaml_file in glob.glob(yaml_path):
+                config = utils.load_yaml_config(yaml_file)
+                task_names.append(config)
+        else:
+            task_list = tasks.split(",")
+            task_names = task_manager.match_tasks(task_list)
+            for task in [task for task in task_list if task not in task_names]:
+                if os.path.isfile(task):
+                    config = utils.load_yaml_config(task)
+                    task_names.append(config)
+            task_missing = [
+                task for task in task_list if task not in task_names and "*" not in task
+            ]  # we don't want errors if a wildcard ("*") task name was used
+
+            if task_missing:
+                missing = ", ".join(task_missing)
+                eval_logger.error(
+                    f"Tasks were not found: {missing}\n"
+                    f"{utils.SPACING}Try `lm-eval --tasks list` for list of available tasks",
+                )
+                raise ValueError(
+                    f"Tasks not found: {missing}. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues."
+                )
+
+    task_dict = get_ConfigurableTask(
+        tasks=task_names,
+        num_fewshot=None,
+        check_integrity=False,
+        gen_kwargs=None,
+        task_manager= None,
+        verbosity= "INFO",
+        predict_only= False,
+        pocket_args=pocket_args,
+    )
+
+    print(task_dict)
+    exit()
+
+
+if __name__ == "__main__":
+    cli_register_task()
\ No newline at end of file
diff --git a/apps/python/sampler/utils/pocket_lm_eval/__init__.py b/apps/python/sampler/utils/pocket_lm_eval/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/python/sampler/utils/pocket_lm_eval/api/task.py b/apps/python/sampler/utils/pocket_lm_eval/api/task.py
new file mode 100644
index 0000000..c356694
--- /dev/null
+++ b/apps/python/sampler/utils/pocket_lm_eval/api/task.py
@@ -0,0 +1,290 @@
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
+import logging
+import numpy as np
+import random
+import datasets
+from lm_eval.api import samplers
+from lm_eval.api.registry import (
+    AGGREGATION_REGISTRY,
+    DEFAULT_METRIC_REGISTRY,
+    get_aggregation,
+    get_metric,
+    get_metric_aggregation,
+    is_higher_better,
+)
+from lm_eval.api.task import ConfigurableTask, TaskConfig, ALL_OUTPUT_TYPES
+from lm_eval.filters import build_filter_ensemble
+from lm_eval.prompts import get_prompt
+
+eval_logger = logging.getLogger("lm-eval")
+
+import psycopg2
+from urllib.parse import urlparse
+
+def get_max_min_ids(uri:str, table_name:str):
+    """
+    This function connects to a PostgreSQL database and retrieves the min and max ids for each split
+
+    Args:
+    uri: The URI of the PostgreSQL database
+    table_name: The name of the table in the database
+
+    Returns:
+    A dictionary with the min and max ids for each split
+    Example:
+    {
+        'train': {'min': 0, 'max': 100},
+        'validation': {'min': 101, 'max': 200},
+        'test': {'min': 201, 'max': 300}
+    }
+    """
+
+    try:
+        # Parse the URI to extract connection parameters
+        uri_parts = urlparse(uri)
+        dbname = uri_parts.path[1:]
+        username = uri_parts.username
+        password = uri_parts.password
+        host = uri_parts.hostname
+        port = uri_parts.port
+
+        # Connect to your PostgreSQL database
+        conn = psycopg2.connect(
+            dbname=dbname,
+            user=username,
+            password=password,
+            host=host,
+            port=port
+        )
+
+        # Create a cursor object using the cursor() method
+        cursor = conn.cursor()
+
+        # Construct the SQL query
+        sql_query = """
+            SELECT
+                "__split",
+                MIN("__id") AS min_id,
+                MAX("__id") AS max_id
+            FROM
+                {}
+            GROUP BY
+                "__split";
+        """.format(table_name)
+
+        # Execute the SQL query
+        cursor.execute(sql_query)
+
+        # Fetch all rows from the result
+        rows = cursor.fetchall()
+        _split_ranges = {}
+        # Print the result
+        for row in rows:
+            _split_ranges[row[0]] = {'min':row[1], 'max': row[2]}
+
+    except (Exception, psycopg2.Error) as error:
+        print("Error while connecting to PostgreSQL", error)
+
+    finally:
+        # Close the cursor and database connection
+        if conn:
+            cursor.close()
+            conn.close()
+
+    return _split_ranges
+
+def get_split_from_ids(_split_ranges : dict, __ids:List[int]):
+    """
+    This functions take a list of ids, and detect to which range they belong to
+
+    Args:
+    _split_ranges: A dictionary with the min and max ids for each split
+    Example:
+    {
+        'train': {'min': 0, 'max': 100},
+        'validation': {'min': 101, 'max': 200},
+        'test': {'min': 201, 'max': 300}
+    }
+
+    __ids: A list of ids
+    Example
+    [202, 203, 204, 205]
+
+    Returns:
+    The split range to which the ids belong to
+    Example:
+    'test'
+    """
+    split_ranges = {}
+    for k,v in _split_ranges.items():
+        split_ranges[k] = set(range(v['min'],v['max']+1))
+
+    split_range = []
+    for _id in __ids:
+        for k,v in split_ranges.items():
+            if _id in v:
+                split_range.append(k)
+                break
+    # all ids should belong to a split range
+    if len(split_range) != len(__ids):
+        raise ValueError("Some ids do not belong to any split range")
+
+    # all ids should belong to a unique split range
+    if len(set(split_range)) != 1:
+        raise ValueError("Some ids belong to more than one split. Please check that ids belong to only one split (test or validation).")
+
+
+    return list(set(split_range))[0]
+
+
+
+
+class PocketNetworkConfigurableTask(ConfigurableTask):
+
+    def check_split_exist(self, split: str, _split_ranges: dict):
+        """
+        This function checks if a self.config.split exists in the keys of _split_ranges
+        """
+        if split not in _split_ranges.keys():
+            raise ValueError(
+                f"'{split}' split not found in _split_ranges: {_split_ranges.keys()}"
+            )
+        
+    def add_string_ids_range(self, split: str, id_list_str: str, _split_ranges: dict):
+        """
+        This function adds a range of ids to the id_list_str
+
+        Args:
+        split: The split for which the range of ids should be added (this is one of self.config.<training|validation|dev>_split)
+        id_list_str: A string of ids separated by commas
+        _split_ranges: A dictionary with the min and max ids for each split
+
+        Returns:
+        id_list_str: A string of ids separated by commas (to be used in a SQL query)
+        """
+        min_range = _split_ranges[split]['min']
+        max_range = _split_ranges[split]['max']+1
+        eval_logger.debug(f"adding split \'{split}\' to id_list_str from ranges: {min_range}-{max_range}")
+        id_list_str += ', '.join(str(id) for id in range(min_range, max_range))
+        return id_list_str
+    
+    def generate_random_numbers(self, table_name:str, _split:str, qty:int, min:int, max:int, blacklist: List[int] = []) -> List[int]:
+        """
+        This function generates a list of random numbers within a range, excluding some blacklisted numbers
+        """
+        # check that the quantity of numbers to generate is less than the range
+        if qty > (max - min + 1):
+            raise ValueError("Quantity of numbers to generate is greater than the range")
+        # Generate a list of random numbers within the range [min, max] excluding the blacklist
+        ints = set(range(min, max+1))
+        if blacklist is not None:
+            original_len = len(ints)
+            # Remove the blacklisted numbers
+            ints = ints - set(blacklist)
+            # Check that the blacklist numbers were removed
+            if len(ints) == original_len:
+                raise ValueError("Blacklist numbers corresponding to '{}' table & '{}' split were not founded in the range [min, max] generated: [{}-{}]".format(table_name, _split, min, max))
+
+        choices = list(np.random.choice(list(ints), qty, replace=False))
+
+        return choices
+    
+    def get_SQL_where_clause(self, indexes, _split: str, _split_ranges: dict):
+        """
+        This function constructs a WHERE clause for a SQL query. Apply the logic detailed in 
+        
+        """
+
+        id_list_str = ''
+        if self.config.test_split:
+            eval_logger.debug("in self.config.test_split")
+            self.check_split_exist(self.config.test_split, _split_ranges)
+            if _split != self.config.test_split:
+                raise ValueError(
+                    f"_split '{_split}' not equal to test_split '{self.config.test_split}'"
+                )
+            
+            id_list_str += ', '.join(str(id) for id in indexes)+', '
+
+            eval_logger.debug(f"Test split:\n {id_list_str}")
+            if self.config.validation_split:
+                self.check_split_exist(self.config.validation_split, _split_ranges)
+                id_list_str = self.add_string_ids_range(self.config.validation_split, id_list_str, _split_ranges)
+
+            if self.config.training_split:
+                self.check_split_exist(self.config.training_split, _split_ranges)
+                id_list_str = self.add_string_ids_range(self.config.training_split, id_list_str, _split_ranges)
+
+            if self.config.fewshot_split:
+                self.check_split_exist(self.config.fewshot_split, _split_ranges)
+                id_list_str = self.add_string_ids_range(self.config.fewshot_split, id_list_str, _split_ranges)
+
+        elif self.config.validation_split:
+            eval_logger.debug(f"in self.config.validation_split")            
+            self.check_split_exist(self.config.validation_split, _split_ranges)
+            if _split != self.config.validation_split:
+                raise ValueError(
+                    f"_split '{_split}' not equal to validation_split '{self.config.validation_split}'"
+                )
+            id_list_str += ', '.join(str(id) for id in indexes)+', '
+            eval_logger.debug(f"Validation split:\n {id_list_str}")
+            if self.config.training_split:
+                self.check_split_exist(self.config.training_split, _split_ranges)
+                id_list_str = self.add_string_ids_range(self.config.training_split, id_list_str, _split_ranges)
+
+            if self.config.fewshot_split:
+                self.check_split_exist(self.config.fewshot_split, _split_ranges)
+                id_list_str = self.add_string_ids_range(self.config.fewshot_split, id_list_str, _split_ranges)
+        else:
+            # error
+            raise ValueError("Neither test_split nor validation_split in config, cannot proceed, please check get_SQL_where_clause")
+        
+        where_clause = f"__id IN ({id_list_str})"
+
+        return where_clause        
+    
+    def download(self, dataset_kwargs: Optional[Dict[str, Any]] = None) -> None:
+
+        blacklist = self._config.metadata['pocket_args']['blacklist']
+        qty = self._config.metadata['pocket_args']['qty']
+        uri = self._config.metadata['pocket_args']['uri']
+        table_name = self.DATASET_PATH + "--" + self.DATASET_NAME if self.DATASET_NAME else self.DATASET_PATH
+        _split_ranges = get_max_min_ids(table_name=table_name, uri=uri)
+        eval_logger.debug(f"Split ranges:\n{ _split_ranges}")
+
+        # Its necesarry to detect wich is the split used to test to take the range, and then get random indexes
+        if self.config.test_split:
+            _split = self.config.test_split
+            # validate that the split exists in the _split_ranges
+            self.check_split_exist(_split, _split_ranges)
+        elif self.config.validation_split:
+            _split = self.config.validation_split
+            # validate that the split exists in the _split_ranges
+            self.check_split_exist(_split, _split_ranges)
+        else:
+            raise ValueError(f"Neither {self.config.test_split} nor {self.config.validation_split} in splits were found in '_split_ranges'. Available splits are {_split_ranges.keys()}")
+
+        _range = _split_ranges[_split]
+        indexes = self.generate_random_numbers(table_name, _split, qty, _range['min'], _range['max'], blacklist)
+
+        where_clause = self.get_SQL_where_clause(indexes, _split, _split_ranges)
+        # Construct the full SQL query
+        sql_query = f"SELECT * FROM {table_name} WHERE {where_clause};"
+    
+        ds = datasets.Dataset.from_sql(sql_query, con = uri)
+        dataset = datasets.DatasetDict()
+        for split in ds.unique("__split"):
+            eval_logger.debug(f"Split: {split}")
+            dataset[split] = ds.filter(lambda x: x["__split"] == split)
+        self.dataset = dataset.remove_columns(["__id", "__split"])
diff --git a/apps/python/sampler/utils/pocket_lm_eval/tasks/__init__.py b/apps/python/sampler/utils/pocket_lm_eval/tasks/__init__.py
new file mode 100644
index 0000000..766d06c
--- /dev/null
+++ b/apps/python/sampler/utils/pocket_lm_eval/tasks/__init__.py
@@ -0,0 +1,177 @@
+import collections
+import logging
+import os
+from functools import partial
+from typing import Dict, List, Mapping, Optional, Union
+
+from lm_eval import utils
+from lm_eval.api.task import Task
+from lm_eval.tasks import TaskManager
+from utils.pocket_lm_eval.api.task import PocketNetworkConfigurableTask
+
+
+class PocketNetworkTaskManager(TaskManager):
+    """PocketNetworkTaskManager indexes all tasks from the default `lm_eval/tasks/`
+    and an optional directory if provided.
+
+    """
+
+    def __init__(self, verbosity="INFO", include_path: Optional[str] = None, pocket_args: Optional[Dict] = None) -> None:
+        self.verbosity = verbosity
+        self.include_path = include_path
+        self.pocket_args = pocket_args
+        self.logger = utils.eval_logger
+        self.logger.setLevel(getattr(logging, f"{verbosity}"))
+
+        self._task_index = self.initialize_tasks(include_path=include_path)
+        self._all_tasks = sorted(list(self._task_index.keys()))
+
+        self.task_group_map = collections.defaultdict(list)
+
+    def _load_individual_task_or_group(
+        self,
+        name_or_config: Optional[Union[str, dict]] = None,
+        parent_name: Optional[str] = None,
+        update_config: Optional[dict] = None,
+        yaml_path: Optional[str] = None,
+    ) -> Mapping:
+        def load_task(config, task, group=None, yaml_path=None):
+            if "include" in config:
+                if yaml_path is None:
+                    raise ValueError
+                config.update(
+                    utils.load_yaml_config(
+                        yaml_path,
+                        yaml_config={"include": config.pop("include")},
+                        mode="full",
+                    )
+                )
+            if self._config_is_python_task(config):
+                task_object = config["class"]()
+            else:
+                config = self._process_alias(config, group=group)
+                task_object = PocketNetworkConfigurableTask(config=config)
+            if group is not None:
+                task_object = (group, task_object)
+            return {task: task_object}
+        if isinstance(name_or_config, str):
+            if update_config is not None:
+                # Process name_or_config as a dict instead
+                name_or_config = {"task": name_or_config, **update_config}
+            elif self._name_is_task(name_or_config):
+                task_config = self._get_config(name_or_config)
+                ############################################################
+                # START: POCKET NETWORK CODE
+                ############################################################
+                if 'metadata' in task_config.keys():
+                    task_config['metadata'].update({'pocket_args':self.pocket_args[task_config['task']]})
+                else:
+                    task_config['metadata'] = {'pocket_args':self.pocket_args[task_config['task']]}
+                ############################################################
+                # END: POCKET NETWORK CODE
+                ############################################################                    
+                return load_task(task_config, task=name_or_config, group=parent_name)
+            else:
+                group_name = name_or_config
+                subtask_list = self._get_tasklist(name_or_config)
+                if subtask_list == -1:
+                    group_config = self._get_config(name_or_config)
+                    subtask_list = group_config["task"]
+
+                # This checks if we're at the root.
+                if parent_name is None:
+                    group_config = self._get_config(name_or_config)
+                    if set(group_config.keys()) > {"task", "group"}:
+                        update_config = {
+                            k: v
+                            for k, v in group_config.items()
+                            if k not in ["task", "group"]
+                        }
+                    yaml_path = self._get_yaml_path(group_name)
+
+                    if (update_config is not None) and ("group_alias" in update_config):
+                        group_name = update_config["group_alias"]
+                        update_config.pop("group_alias")
+
+        if isinstance(name_or_config, dict):
+            if update_config is not None:
+                name_or_config = {
+                    **name_or_config,
+                    **update_config,
+                }
+
+            if self._config_is_task(name_or_config):
+                name = name_or_config["task"]
+                # If the name is registered as a group
+                # if self._name_is_task(name) is False:
+                if self._name_is_group(name):
+                    group_name = name
+                    update_config = {
+                        k: v for k, v in name_or_config.items() if k != "task"
+                    }
+                    subtask_list = self._get_tasklist(name)
+                    if subtask_list == -1:
+                        subtask_list = self._get_config(name)["task"]
+                else:
+                    if self._name_is_registered(name):
+                        base_task_config = self._get_config(name)
+
+                        # Check if this is a duplicate.
+                        if parent_name is not None:
+                            name_or_config["group"] = parent_name
+                            num_duplicate = len(
+                                list(
+                                    filter(
+                                        lambda x: x.startswith(name),
+                                        self.task_group_map[parent_name],
+                                    )
+                                )
+                            )
+                            if num_duplicate > 0:
+                                name = f"{name}-{num_duplicate}"
+                            self.task_group_map[parent_name].append(name)
+
+                        task_config = {
+                            **base_task_config,
+                            **name_or_config,
+                        }
+                    else:
+                        task_config = name_or_config
+                        ############################################################
+                        # START: POCKET NETWORK CODE
+                        ############################################################
+                        if 'metadata' in task_config.keys():
+                            task_config['metadata'].update({'pocket_args':self.pocket_args[task_config['task']]})
+                        else:
+                            task_config['metadata'] = {'pocket_args':self.pocket_args[task_config['task']]}
+                        ############################################################
+                        # END: POCKET NETWORK CODE
+                        ############################################################
+                    return load_task(
+                        task_config, task=name, group=parent_name, yaml_path=yaml_path
+                    )
+            else:
+                group_name = name_or_config["group"]
+                subtask_list = name_or_config["task"]
+                if set(name_or_config.keys()) > {"task", "group"}:
+                    update_config = {
+                        k: v
+                        for k, v in name_or_config.items()
+                        if k not in ["task", "group"]
+                    }
+
+        all_subtasks = {}
+        if parent_name is not None:
+            all_subtasks = {group_name: (parent_name, None)}
+
+        fn = partial(
+            self._load_individual_task_or_group,
+            parent_name=group_name,
+            update_config=update_config,
+            yaml_path=yaml_path,
+        )
+        all_subtasks = {
+            **all_subtasks,
+            **dict(collections.ChainMap(*map(fn, subtask_list))),
+        }
+        return all_subtasks
\ No newline at end of file
diff --git a/infrastructure/dev/.env b/infrastructure/dev/.env
index 011cf16..7b4c79c 100644
--- a/infrastructure/dev/.env
+++ b/infrastructure/dev/.env
@@ -4,6 +4,7 @@ TEMPORAL_UI_VERSION=2.22.3
 POSTGRESQL_VERSION=14.11-bookworm
 POSTGRES_PASSWORD=admin
 POSTGRES_USER=admin
+POSTGRES_DB=lm-evaluation-harness
 PGADMIN_VERSION=8.4
 PGADMIN_DEFAULT_EMAIL=admin@local.dev
 PGADMIN_DEFAULT_PASSWORD=admin
diff --git a/infrastructure/dev/docker-compose.yaml b/infrastructure/dev/docker-compose.yaml
index 22c8046..30f8acc 100644
--- a/infrastructure/dev/docker-compose.yaml
+++ b/infrastructure/dev/docker-compose.yaml
@@ -10,6 +10,7 @@ services:
     environment:
       POSTGRES_PASSWORD: ${POSTGRES_USER}
       POSTGRES_USER: ${POSTGRES_PASSWORD}
+      POSTGRES_DB: ${POSTGRES_DB}
     networks:
       - temporal
       - postgresql
diff --git a/infrastructure/dev/postgresql/.gitignore b/infrastructure/dev/postgresql/.gitignore
new file mode 100644
index 0000000..6320cd2
--- /dev/null
+++ b/infrastructure/dev/postgresql/.gitignore
@@ -0,0 +1 @@
+data
\ No newline at end of file
diff --git a/infrastructure/dev/postgresql/servers.json b/infrastructure/dev/postgresql/servers.json
index c274061..377cc94 100644
--- a/infrastructure/dev/postgresql/servers.json
+++ b/infrastructure/dev/postgresql/servers.json
@@ -5,7 +5,7 @@
       "Group": "Servers",
       "Host": "postgresql",
       "Port": 5432,
-      "MaintenanceDB": "postgres",
+      "MaintenanceDB": "lm-evaluation-harness",
       "Username": "admin",
       "PassFile": "/pgadmin4/pgpass",
       "SSLMode": "prefer"