Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

61 evalator finish code for lmeh #65

Merged
merged 4 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/go/manager/records/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func (record *NodeRecord) AppendTask(framework string, task string, date time.Ti

newTask := SignatureTaskRecord{
TaskData: baseTaskData,
LastSignature: "83332a7f32e4188bb276a18ff78620acfd3c6edbd68002b746bda990ed30d56c",
LastSignature: "",
Signatures: make([]SignatureSample, bufferLen),
CircBuffer: types.CircularBuffer{
CircBufferLen: bufferLen,
Expand Down
51 changes: 22 additions & 29 deletions apps/python/evaluator/activities/lmeh/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from packages.python.lmeh.pocket_lm_eval.models.pocket_network import EvaluatorLM
from packages.python.lmeh.utils.common import get_task_manager
from packages.python.lmeh.utils import generator as lmeh_generator
from packages.python.lmeh.utils import task_config as open_llm_config
from packages.python.protocol.protocol import PocketNetworkEvaluationTaskRequest
from packages.python.lmeh.utils.mongodb import MongoOperator
from packages.python.common.auto_heartbeater import auto_heartbeater
Expand Down Expand Up @@ -41,6 +42,10 @@ async def lmeh_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:

if args.llm_args is None:
args.llm_args = {}
eval_logger.info(
"Starting activity lmeh_evaluate",
task_id = str(args.task_id),
)

mongo_client = config["mongo_client"]
mongo_operator = MongoOperator(client=mongo_client)
Expand Down Expand Up @@ -117,6 +122,8 @@ async def lmeh_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:

# generate configurable tasks
try:
open_llm_cfg = open_llm_config.get_task_config(task_names[0])
open_llm_metrics = open_llm_cfg["metric"]
task_dict = lmeh_generator.get_configurable_task(
tasks=[task_name],
num_fewshot=args.num_fewshot,
Expand Down Expand Up @@ -151,7 +158,7 @@ async def lmeh_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
try:
# it is loading data from sql to a dataset
await task_dict[task_name].load_from_sql()
eval_logger.info("Task loaded successfully:", task_dict=task_dict)
eval_logger.debug("Task loaded successfully:", task_dict=task_dict)
except ApplicationError as e:
raise e
except Exception as error:
Expand All @@ -168,31 +175,17 @@ async def lmeh_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
eval_logger.debug("Generating LM")
lm = EvaluatorLM(**args.llm_args)
eval_logger.debug("LM generated successfully.")
results = await lmeh_generator.evaluate(
lm=lm,
task_dict=task_dict,
task_id=args.task_id,
mongo_client=mongo_client,
eval_logger=eval_logger,
bootstrap_iters=args.bootstrap_iters,
)
eval_logger.info("Evaluation completed successfully.")

if lm.rank == 0:
# add info about the model and few shot config
results["config"] = {
"model": args.requester_args.address,
"model_args": args.llm_args,
"bootstrap_iters": args.bootstrap_iters,
"gen_kwargs": args.gen_kwargs,
}

# todo: resolve code below
# results["git_hash"] = get_git_commit_hash()
# results["date"] = start_date
# add_env_info(results) # additional environment info to results

# assign evaluation's result to a general result under task name, because we iterate over all the tasks
r[task_name] = results

return True
try:
result = await lmeh_generator.evaluate(
lm=lm,
task_dict=task_dict,
task_id=args.task_id,
mongo_client=mongo_client,
selected_metrics=open_llm_metrics,
eval_logger=eval_logger,
)
eval_logger.info("Evaluation completed successfully.")
except ApplicationError as e:
raise e

return result
34 changes: 20 additions & 14 deletions apps/python/evaluator/activities/signatures/tokenizer_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from hashlib import sha256
from datetime import datetime

from app.app import get_app_config, get_app_logger
from bson import ObjectId
Expand Down Expand Up @@ -45,30 +46,34 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
mongo_client = config["mongo_client"]
mongo_operator = MongoOperator(client=mongo_client)

# Retrieve Task request.
task_data = await mongo_operator.get_task(args.task_id)

# Retrieve all responses
responses = await mongo_operator.retrieve_responses(args.task_id)
if len(responses) != 1:
eval_logger.error(f"Found {len(responses)} responses, only 1 is expected.")
raise ApplicationError(
f"Task ID {args.task_id}: Found {len(responses)} responses, only 1 is expected.",
args.task_id,
str(args.task_id),
type="ResponseError",
non_retryable=False,
)

# Create the result, empty for now
result = PocketNetworkMongoDBResultSignature(task_id=args.task_id, num_samples=0, signatures=[])
result = PocketNetworkMongoDBResultSignature(task_id=args.task_id,
status=responses[0]["response"]["error_code"],
num_samples=0,
result_height=responses[0]["response"]["height"],
result_time=datetime.today().isoformat(),
signatures=[])



# Get tokenizer jsons
tokenizer_decoded = False
try:
tokenizer_jsons = json.loads(responses[0]["response"]["response"])
tokenizer_decoded = True
except Exception as e:
eval_logger.debug(f"Exeption:", Exeption=str(e))
eval_logger.debug("Exeption:", Exeption=str(e))

tokenizer_ok = False
if tokenizer_decoded:
Expand All @@ -94,33 +99,34 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
tokenizer_ok = True
except Exception as e:
# This is not an error is just a failure in retrieval of tokenizer
eval_logger.info(f"Cannot load tokenizer from response.")
eval_logger.debug(f"Exeption:", Exeption=str(e))
eval_logger.info("Cannot load tokenizer from response.")
eval_logger.debug("Exeption:", Exeption=str(e))
tokenizer_ok = False

tokenizer_new = False
if tokenizer_ok:
# check if the tokenizer exists in db
tokenizer_db = await mongo_operator.get_tokenizer_entry(tokenizer_mongo_new.hash)
if tokenizer_db == None:
if tokenizer_db is None:
eval_logger.debug("Tokenizer does not exists.")
# the tokenizer is not tracked, we need to create an entry
tokenizer_new = True
try:
async with mongo_client.start_transaction() as session:
await mongo_client.db["tokenizers"].insert_many(
[tokenizer_mongo_new],
[tokenizer_mongo_new.model_dump(by_alias=True)],
ordered=False,
session=session,
)
eval_logger.debug("Saved new tokenizer to DB.")
except Exception as e:
eval_logger.error("Failed to save Tokenizer to MongoDB.")
eval_logger.error(f"Exeption:", Exeption=str(e))
eval_logger.error("Exeption:", Exeption=str(e))
raise ApplicationError("Failed to save tokenizer to MongoDB.", non_retryable=True)

# Update the result with valid data
result.num_samples = 1 # Always one
result.status = 0 # OK
result.signatures = [
SignatureSample(signature=str(tokenizer_mongo_new.hash), id=0) # This task has a single sample id
]
Expand All @@ -136,11 +142,11 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
eval_logger.debug("Saved result to DB.")
except Exception as e:
eval_logger.error("Failed to save Result to MongoDB.")
eval_logger.error(f"Exeption:", Exeption=str(e))
eval_logger.error("Exeption:", Exeption=str(e))
raise ApplicationError("Failed to save result to MongoDB.", non_retryable=True)

eval_logger.info(
f"Status:", tokenizer_decoded=tokenizer_decoded, tokenizer_is_valid=tokenizer_ok, tokenizer_is_new=tokenizer_new
"Status:", tokenizer_decoded=tokenizer_decoded, tokenizer_is_valid=tokenizer_ok, tokenizer_is_new=tokenizer_new
)

return True
3 changes: 3 additions & 0 deletions apps/python/sampler/activities/lmeh/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from app.app import get_app_logger, get_app_config
from packages.python.protocol.protocol import PocketNetworkTaskRequest
from packages.python.lmeh.utils import generator as lmeh_generator
from packages.python.lmeh.utils import task_config as open_llm_config
from packages.python.lmeh.pocket_lm_eval.models.pocket_network import PocketNetworkLM
from activities.utils import auto_heartbeater
from packages.python.lmeh.utils import sql as lmeh_sql
Expand Down Expand Up @@ -57,6 +58,8 @@ async def lmeh_sample(args: PocketNetworkTaskRequest) -> bool:

# generate configurable tasks
try:
open_llm_cfg = open_llm_config.get_task_config(task_names[0])
args.num_fewshot = open_llm_cfg["num_fewshot"]
task_dict = lmeh_generator.get_configurable_task(
tasks=[task_name],
num_fewshot=args.num_fewshot,
Expand Down
2 changes: 1 addition & 1 deletion docker-compose/morse-poc/apps_configs/evaluator.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"postgres_uri": "postgresql://admin:admin@postgresql:5432/pocket-ml-testbench",
"mongodb_uri": "mongodb://mongodb:27017/pocket-ml-testbench",
"log_level": "DEBUG",
"log_level": "INFO",
"temporal": {
"host": "temporal",
"port": 7233,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
db = db.getSiblingDB('pocket-ml-testbench');

db.createCollection('tokenizers');
db.tokenizers.createIndex({hash: 1});
db.tokenizers.createIndex({hash: 1}, {unique: true});

db.createCollection('tasks');
db.tasks.createIndex({
Expand Down
Loading
Loading