-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Plugin support #263
base: main
Are you sure you want to change the base?
Plugin support #263
Changes from 13 commits
bf1d9fb
38bb8a7
5719230
8e8ee3e
32d7d93
28a38b8
bb3275f
3bc3822
06d4b7d
9174f00
7afa73f
3e45b25
5634f74
b798981
da9abbf
6b11f54
b84c16f
8d840cf
9725b36
5a658c3
cac5484
aab165b
265e42e
d9b2e83
bfb653f
9f83cd5
f656eee
5af7113
b22c2f2
c88fe2e
d66e0fe
c814ccb
a6d3efc
46cabe9
764e181
d9e8c1f
897a7da
d44023b
12022de
3cf27df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name = "nccl_test_epilogue" | ||
|
||
[[Tests]] | ||
id = "Tests.1" | ||
test_name = "nccl_test_all_gather" | ||
time_limit = "00:20:00" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name = "nccl_test_prologue" | ||
|
||
[[Tests]] | ||
id = "Tests.1" | ||
test_name = "nccl_test_all_reduce" | ||
time_limit = "00:20:00" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name = "nccl_test_all_gather" | ||
description = "all_gather" | ||
test_template_name = "NcclTest" | ||
|
||
[cmd_args] | ||
"subtest_name" = "all_gather_perf_mpi" | ||
"ngpus" = "1" | ||
"minbytes" = "128" | ||
"maxbytes" = "4G" | ||
"iters" = "100" | ||
"warmup_iters" = "50" | ||
|
||
[extra_cmd_args] | ||
"--stepfactor" = "2" | ||
|
||
[extra_env_vars] | ||
"NCCL_TEST_SPLIT_MASK" = "0x7" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name = "nccl_test_all_reduce" | ||
description = "all_reduce" | ||
test_template_name = "NcclTest" | ||
|
||
[cmd_args] | ||
"subtest_name" = "all_reduce_perf_mpi" | ||
"ngpus" = "1" | ||
"minbytes" = "128" | ||
"maxbytes" = "16G" | ||
"iters" = "100" | ||
"warmup_iters" = "50" | ||
|
||
[extra_cmd_args] | ||
"--stepfactor" = "2" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,8 @@ class _TestScenarioTOML(BaseModel): | |
name: str | ||
job_status_check: bool = True | ||
tests: list[_TestRunTOML] = Field(alias="Tests", min_length=1) | ||
prologue: str = "" | ||
epilogue: str = "" | ||
|
||
@model_validator(mode="after") | ||
def check_no_self_dependency(self): | ||
|
@@ -99,9 +101,10 @@ class TestScenarioParser: | |
|
||
__test__ = False | ||
|
||
def __init__(self, file_path: Path, test_mapping: Dict[str, Test]) -> None: | ||
def __init__(self, file_path: Path, test_mapping: Dict[str, Test], plugin_mapping: Dict[str, TestScenario]) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally speaking, I do not agree that a Plugin is equal to TestScenario: it doesn't support dependencies and To make it right, we should have another Pydantic model to hold Plugin definition and TestScenario would inherit from it. This will provide automatic verification and keep objects typed. This is not critical for this PR, but please keep that in mind in case of further development of this functionality. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should discuss. cc: @srinivas212 , @srivatsankrishnan . |
||
self.file_path = file_path | ||
self.test_mapping = test_mapping | ||
self.plugin_mapping = plugin_mapping | ||
|
||
def parse(self) -> TestScenario: | ||
""" | ||
|
@@ -136,8 +139,14 @@ def _parse_data(self, data: Dict[str, Any]) -> TestScenario: | |
total_weight = sum(tr.weight for tr in ts_model.tests) | ||
normalized_weight = 0 if total_weight == 0 else 100 / total_weight | ||
|
||
prologue_name = data.get("prologue", "") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you should use actual object: prologue, epilogue = None, None
if ts_model.prologue:
prologue = self.plugin_mapping.get(ts_model.prologue)
if ts_model.epilogue:
epilogue = self.plugin_mapping.get(ts_model.epilogue) |
||
epilogue_name = data.get("epilogue", "") | ||
|
||
prologue = self.plugin_mapping.get(prologue_name, None) if prologue_name else None | ||
epilogue = self.plugin_mapping.get(epilogue_name, None) if epilogue_name else None | ||
|
||
testruns_by_id: dict[str, TestRun] = { | ||
tr.id: self._create_section_test_run(tr, normalized_weight) for tr in ts_model.tests | ||
tr.id: self._create_section_test_run(tr, normalized_weight, prologue, epilogue) for tr in ts_model.tests | ||
} | ||
|
||
tests_data: dict[str, _TestRunTOML] = {tr.id: tr for tr in ts_model.tests} | ||
|
@@ -153,13 +162,21 @@ def _parse_data(self, data: Dict[str, Any]) -> TestScenario: | |
job_status_check=ts_model.job_status_check, | ||
) | ||
|
||
def _create_section_test_run(self, test_info: _TestRunTOML, normalized_weight: float) -> TestRun: | ||
def _create_section_test_run( | ||
self, | ||
test_info: _TestRunTOML, | ||
normalized_weight: float, | ||
prologue: Optional[TestScenario], | ||
TaekyungHeo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
epilogue: Optional[TestScenario], | ||
) -> TestRun: | ||
""" | ||
Create a section-specific Test object by copying from the test mapping. | ||
|
||
Args: | ||
test_info (Dict[str, Any]): Information of the test. | ||
normalized_weight (float): Normalized weight for the test. | ||
prologue (Optional[TestScenario]): TestScenario object representing the prologue sequence. | ||
epilogue (Optional[TestScenario]): TestScenario object representing the epilogue sequence. | ||
|
||
Returns: | ||
Test: Copied and updated Test object for the section. | ||
|
@@ -192,5 +209,7 @@ def _create_section_test_run(self, test_info: _TestRunTOML, normalized_weight: f | |
sol=test_info.sol, | ||
weight=test_info.weight * normalized_weight, | ||
ideal_perf=test_info.ideal_perf, | ||
prologue=prologue if prologue is not None else TestScenario(name="default_prologue", test_runs=[]), | ||
epilogue=epilogue if epilogue is not None else TestScenario(name="default_epilogue", test_runs=[]), | ||
) | ||
return tr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be optional and I suggest you add an automatic check that this field is not an empty string, see how it is done for
tests
field.