Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

3 sampler interface between postgresql and manager #16

Merged
merged 14 commits into from
Apr 8, 2024
Merged
21 changes: 18 additions & 3 deletions apps/python/register/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ python3 apps/python/register/register.py \

# Dockers

* (Optional): Prepare a postgreSQL from `apps/python/postgresql` running `docker compose up`
* (Optional): Prepare a postgreSQL from `infrastructure/dev/postgresql` running `docker compose up postgresql pgadmin`

1. Build register
`./build.sh`
Expand All @@ -76,11 +76,26 @@ docker run -it --network host pocket_dataset_register \
/code/register.py \
--tasks arc_challenge,hellaswag,truthfulqa_mc2,mmlu_abstract_algebra,mmlu_anatomy,mmlu_astronomy,mmlu_business_ethics,mmlu_clinical_knowledge,mmlu_college_biology,mmlu_college_chemistry,mmlu_college_computer_science,mmlu_college_mathematics,mmlu_college_medicine,mmlu_college_physics,mmlu_computer_security,mmlu_conceptual_physics,mmlu_econometrics,mmlu_electrical_engineering,mmlu_elementary_mathematics,mmlu_formal_logic,mmlu_global_facts,mmlu_high_school_biology,mmlu_high_school_chemistry,mmlu_high_school_computer_science,mmlu_high_school_european_history,mmlu_high_school_geography,mmlu_high_school_government_and_politics,mmlu_high_school_macroeconomics,mmlu_high_school_mathematics,mmlu_high_school_microeconomics,mmlu_high_school_physics,mmlu_high_school_psychology,mmlu_high_school_statistics,mmlu_high_school_us_history,mmlu_high_school_world_history,mmlu_human_aging,mmlu_human_sexuality,mmlu_international_law,mmlu_jurisprudence,mmlu_logical_fallacies,mmlu_machine_learning,mmlu_management,mmlu_marketing,mmlu_medical_genetics,mmlu_miscellaneous,mmlu_moral_disputes,mmlu_moral_scenarios,mmlu_nutrition,mmlu_philosophy,mmlu_prehistory,mmlu_professional_accounting,mmlu_professional_law,mmlu_professional_medicine,mmlu_professional_psychology,mmlu_public_relations,mmlu_security_studies,mmlu_sociology,mmlu_us_foreign_policy,mmlu_virology,mmlu_world_religions,winogrande,gsm8k \
--dbname lm-evaluation-harness \
--user root \
--password root \
--user admin \
--password admin \
--host localhost \
--port 5432 \
--verbosity DEBUG
```

**Note**:If you have already downloaded HF datasets, mount them adding `-v path/to/huggingface/directory:/root/.cache/huggingface/` to avoid re-download.


### Accessing the DB with PG Admin

To explore the generated database, the PG Admin is available in the docker compose (`infrastructure/dev/docker-compose.yaml`).
To access the service just go to `127.0.0.1:5050` and use the credentials `[email protected]:admin`.
Then in the PG Admin page click on `Add New Server` and fill the data:
General tab:
- Name: `pokt-ml-datasets`
Connection tab:
- Host Name: `postgres_container`
- Port: `5432`
- Maintenance database: `lm-evaluation-harness`
- Username: `admin`
- Password: `admin`
199 changes: 5 additions & 194 deletions apps/python/register/register.py
Original file line number Diff line number Diff line change
@@ -1,197 +1,8 @@
################################
# lm-eval-harness (evaulator.py)
################################
import argparse
import json
import logging
import os
import re
import sys
from functools import partial
from pathlib import Path
from typing import Union
import utils.lmeh as lmeh

import numpy as np

from lm_eval import evaluator, utils
from lm_eval.evaluator import request_caching_arg_to_dict
from lm_eval.logging_utils import WandbLogger
from lm_eval.tasks import TaskManager, include_path, initialize_tasks
from lm_eval.utils import make_table, simple_parse_args_string

################################
# Custom modules
################################
from utils.uploader import get_ConfigurableTask
from utils.sql import create_dataset_table, register_task, create_task_table, checked_task
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

def parse_eval_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"--tasks",
"-t",
default=None,
metavar="task1,task2",
help="To get full list of tasks, use the command lm-eval --tasks list",
)
parser.add_argument(
"--dbname",
type=str,
default="postgres",
help="Name of the database",
)
parser.add_argument(
"--user",
type=str,
default="postgres",
help="Name of the user",
)
parser.add_argument(
"--password",
type=str,
default="password",
help="Password for the user",
)
parser.add_argument(
"--host",
type=str,
default="localhost",
help="Host name",
)
parser.add_argument(
"--port",
type=str,
default="5432",
help="Port number",
)
parser.add_argument(
"--include_path",
type=str,
default=None,
metavar="DIR",
help="Additional path to include if there are external tasks to include.",
)
parser.add_argument(
"--verbosity",
"-v",
type=str.upper,
default="INFO",
metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
help="Controls the reported logging error level. Set to DEBUG when testing + adding new task configurations for comprehensive log output.",
)
return parser.parse_args()

def cli_register_task(args: Union[argparse.Namespace, None] = None) -> None:
if not args:
# we allow for args to be passed externally, else we parse them ourselves
args = parse_eval_args()

eval_logger = utils.eval_logger
eval_logger.setLevel(getattr(logging, f"{args.verbosity}"))
eval_logger.info(f"Verbosity set to {args.verbosity}")

initialize_tasks(args.verbosity)
task_manager = TaskManager(args.verbosity, include_path=args.include_path)

if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")
include_path(args.include_path)

if args.tasks is None:
eval_logger.error("Need to specify task to evaluate.")
sys.exit()
elif args.tasks == "list":
eval_logger.info(
"Available Tasks:\n - {}".format("\n - ".join(task_manager.all_tasks))
)
sys.exit()
else:
if os.path.isdir(args.tasks):
import glob

task_names = []
yaml_path = os.path.join(args.tasks, "*.yaml")
for yaml_file in glob.glob(yaml_path):
config = utils.load_yaml_config(yaml_file)
task_names.append(config)
else:
task_list = args.tasks.split(",")
task_names = task_manager.match_tasks(task_list)
for task in [task for task in task_list if task not in task_names]:
if os.path.isfile(task):
config = utils.load_yaml_config(task)
task_names.append(config)
task_missing = [
task for task in task_list if task not in task_names and "*" not in task
] # we don't want errors if a wildcard ("*") task name was used

if task_missing:
missing = ", ".join(task_missing)
eval_logger.error(
f"Tasks were not found: {missing}\n"
f"{utils.SPACING}Try `lm-eval --tasks list` for list of available tasks",
)
raise ValueError(
f"Tasks not found: {missing}. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues."
)

task_dict = get_ConfigurableTask(
tasks=task_names,
num_fewshot=None,
check_integrity=False,
gen_kwargs=None,
task_manager= None,
verbosity= "INFO",
predict_only= False,
)

# check and connect to the database
try:
conn = psycopg2.connect(
dbname=args.dbname,
user=args.user,
password=args.password,
host=args.host,
port=args.port
)
eval_logger.info("Connected to the database")
# Obtain a DB Cursor
cursor = conn.cursor()
except Exception as e:
eval_logger.error("Unable to connect to the database")
exit(-1)

create_task_table(connection=conn)
if __name__ == "__main__":

for t in task_dict:
task_name_i = t
dataset_path = task_dict[t].config.dataset_path
dataset_name = task_dict[t].config.dataset_name
table_name = dataset_path + "--" + dataset_name if dataset_name else dataset_path
data = task_dict[t].dataset
# check if the task is already registered
if not checked_task(task_name_i, connection= conn):
# Register task
try:
# Create dataset table
create_dataset_table(table_name = table_name,
data = data,
connection = conn)
# Regist task/dataset pair
register_task(task_name = task_name_i,
dataset_table_name = table_name,
connection = conn)
except Exception as e:
eval_logger.error(f"Error: {e}")
conn.rollback()
cursor.close()
conn.close()
exit(-1)
eval_logger.info(f"Task {task_name_i} registered successfully")
else:
eval_logger.info(f"Task {task_name_i} already registered")
# This is the task to upload datasets

if __name__ == "__main__":
cli_register_task()
# TODO: read configuration from file and pass it to the function (mutiple tests support requirement)
lmeh.cli_register_task()
2 changes: 1 addition & 1 deletion apps/python/register/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
lm_eval[dev] @ git+https://github.com/EleutherAI/lm-evaluation-harness@7d9922c80114218eaf43975b7655bb48cda84f50
lm_eval[dev]==0.4.2
psycopg2==2.9.9
Loading
Loading