-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into fix/new_start_and_stop_wb_buttons
- Loading branch information
Showing
30 changed files
with
514 additions
and
319 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
...rces/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from kfp import compiler, dsl, kubernetes | ||
from kfp.dsl import PipelineTask | ||
|
||
# Runtime: Pytorch with ROCm and Python 3.9 (UBI 9) | ||
common_base_image = ( | ||
"quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf" | ||
) | ||
|
||
|
||
def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_limit: int): | ||
print(f"Adding GPU tolerations: {accelerator_type}({accelerator_limit})") | ||
task.set_accelerator_type(accelerator=accelerator_type) | ||
task.set_accelerator_limit(accelerator_limit) | ||
kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule") | ||
|
||
|
||
@dsl.component( | ||
base_image=common_base_image | ||
) | ||
def verify_gpu_availability(gpu_toleration: bool): | ||
import torch | ||
|
||
cuda_available = torch.cuda.is_available() | ||
device_count = torch.cuda.device_count() | ||
print("------------------------------") | ||
print("GPU availability") | ||
print("------------------------------") | ||
print(f"cuda available: {cuda_available}") | ||
print(f"device count: {device_count}") | ||
if gpu_toleration: | ||
assert torch.cuda.is_available() | ||
assert torch.cuda.device_count() > 0 | ||
t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda') | ||
else: | ||
assert not torch.cuda.is_available() | ||
assert torch.cuda.device_count() == 0 | ||
t = torch.tensor([5, 5, 5], dtype=torch.int64) | ||
print(f"tensor: {t}") | ||
print("GPU availability test: PASS") | ||
|
||
|
||
@dsl.pipeline( | ||
name="pytorch-amd-gpu-availability", | ||
description="Verifies pipeline tasks run on GPU nodes only when tolerations are added", | ||
) | ||
def pytorch_amd_gpu_availability(): | ||
verify_gpu_availability(gpu_toleration=False).set_caching_options(False) | ||
|
||
task_with_toleration = verify_gpu_availability(gpu_toleration=True).set_caching_options(False) | ||
add_gpu_toleration(task_with_toleration, "amd.com/gpu", 1) | ||
|
||
|
||
if __name__ == "__main__": | ||
compiler.Compiler().compile(pytorch_amd_gpu_availability, package_path=__file__.replace(".py", "_compiled.yaml")) |
139 changes: 139 additions & 0 deletions
139
...pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# PIPELINE DEFINITION | ||
# Name: pytorch-amd-gpu-availability | ||
# Description: Verifies pipeline tasks run on GPU nodes only when tolerations are added | ||
components: | ||
comp-verify-gpu-availability: | ||
executorLabel: exec-verify-gpu-availability | ||
inputDefinitions: | ||
parameters: | ||
gpu_toleration: | ||
parameterType: BOOLEAN | ||
comp-verify-gpu-availability-2: | ||
executorLabel: exec-verify-gpu-availability-2 | ||
inputDefinitions: | ||
parameters: | ||
gpu_toleration: | ||
parameterType: BOOLEAN | ||
deploymentSpec: | ||
executors: | ||
exec-verify-gpu-availability: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- verify_gpu_availability | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ | ||
$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ | ||
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ | ||
\ print(\"------------------------------\")\n print(\"GPU availability\"\ | ||
)\n print(\"------------------------------\")\n print(f\"cuda available:\ | ||
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ | ||
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \ | ||
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ | ||
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ | ||
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ | ||
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ | ||
\ availability test: PASS\")\n\n" | ||
image: quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf | ||
exec-verify-gpu-availability-2: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- verify_gpu_availability | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ | ||
$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ | ||
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ | ||
\ print(\"------------------------------\")\n print(\"GPU availability\"\ | ||
)\n print(\"------------------------------\")\n print(f\"cuda available:\ | ||
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ | ||
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \ | ||
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ | ||
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ | ||
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ | ||
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ | ||
\ availability test: PASS\")\n\n" | ||
image: quay.io/modh/runtime-images@sha256:76d2a5d9ca0bb35c4d9962a7ee6798672afd3a5c7d226d87ef65f952e7cb93cf | ||
resources: | ||
accelerator: | ||
count: '1' | ||
type: amd.com/gpu | ||
pipelineInfo: | ||
description: Verifies pipeline tasks run on GPU nodes only when tolerations are | ||
added | ||
name: pytorch-amd-gpu-availability | ||
root: | ||
dag: | ||
tasks: | ||
verify-gpu-availability: | ||
cachingOptions: {} | ||
componentRef: | ||
name: comp-verify-gpu-availability | ||
inputs: | ||
parameters: | ||
gpu_toleration: | ||
runtimeValue: | ||
constant: false | ||
taskInfo: | ||
name: verify-gpu-availability | ||
verify-gpu-availability-2: | ||
cachingOptions: {} | ||
componentRef: | ||
name: comp-verify-gpu-availability-2 | ||
inputs: | ||
parameters: | ||
gpu_toleration: | ||
runtimeValue: | ||
constant: true | ||
taskInfo: | ||
name: verify-gpu-availability-2 | ||
schemaVersion: 2.1.0 | ||
sdkVersion: kfp-2.9.0 | ||
--- | ||
platforms: | ||
kubernetes: | ||
deploymentSpec: | ||
executors: | ||
exec-verify-gpu-availability-2: | ||
tolerations: | ||
- effect: NoSchedule | ||
key: amd.com/gpu | ||
operator: Exists |
54 changes: 54 additions & 0 deletions
54
...s/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from kfp import compiler, dsl, kubernetes | ||
from kfp.dsl import PipelineTask | ||
|
||
# Runtime: Pytorch with CUDA and Python 3.9 (UBI 9) | ||
common_base_image = ( | ||
"quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa" | ||
) | ||
|
||
|
||
def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_limit: int): | ||
print(f"Adding GPU tolerations: {accelerator_type}({accelerator_limit})") | ||
task.set_accelerator_type(accelerator=accelerator_type) | ||
task.set_accelerator_limit(accelerator_limit) | ||
kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule") | ||
|
||
|
||
@dsl.component( | ||
base_image=common_base_image | ||
) | ||
def verify_gpu_availability(gpu_toleration: bool): | ||
import torch | ||
|
||
cuda_available = torch.cuda.is_available() | ||
device_count = torch.cuda.device_count() | ||
print("------------------------------") | ||
print("GPU availability") | ||
print("------------------------------") | ||
print(f"cuda available: {cuda_available}") | ||
print(f"device count: {device_count}") | ||
if gpu_toleration: | ||
assert torch.cuda.is_available() | ||
assert torch.cuda.device_count() > 0 | ||
t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda') | ||
else: | ||
assert not torch.cuda.is_available() | ||
assert torch.cuda.device_count() == 0 | ||
t = torch.tensor([5, 5, 5], dtype=torch.int64) | ||
print(f"tensor: {t}") | ||
print("GPU availability test: PASS") | ||
|
||
|
||
@dsl.pipeline( | ||
name="pytorch-nvidia-gpu-availability", | ||
description="Verifies pipeline tasks run on GPU nodes only when tolerations are added", | ||
) | ||
def pytorch_nvidia_gpu_availability(): | ||
verify_gpu_availability(gpu_toleration=False).set_caching_options(False) | ||
|
||
task_with_toleration = verify_gpu_availability(gpu_toleration=True).set_caching_options(False) | ||
add_gpu_toleration(task_with_toleration, "nvidia.com/gpu", 1) | ||
|
||
|
||
if __name__ == "__main__": | ||
compiler.Compiler().compile(pytorch_nvidia_gpu_availability, package_path=__file__.replace(".py", "_compiled.yaml")) |
139 changes: 139 additions & 0 deletions
139
...eline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# PIPELINE DEFINITION | ||
# Name: pytorch-nvidia-gpu-availability | ||
# Description: Verifies pipeline tasks run on GPU nodes only when tolerations are added | ||
components: | ||
comp-verify-gpu-availability: | ||
executorLabel: exec-verify-gpu-availability | ||
inputDefinitions: | ||
parameters: | ||
gpu_toleration: | ||
parameterType: BOOLEAN | ||
comp-verify-gpu-availability-2: | ||
executorLabel: exec-verify-gpu-availability-2 | ||
inputDefinitions: | ||
parameters: | ||
gpu_toleration: | ||
parameterType: BOOLEAN | ||
deploymentSpec: | ||
executors: | ||
exec-verify-gpu-availability: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- verify_gpu_availability | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ | ||
$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ | ||
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ | ||
\ print(\"------------------------------\")\n print(\"GPU availability\"\ | ||
)\n print(\"------------------------------\")\n print(f\"cuda available:\ | ||
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ | ||
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \ | ||
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ | ||
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ | ||
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ | ||
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ | ||
\ availability test: PASS\")\n\n" | ||
image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa | ||
exec-verify-gpu-availability-2: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- verify_gpu_availability | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ | ||
$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ | ||
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ | ||
\ print(\"------------------------------\")\n print(\"GPU availability\"\ | ||
)\n print(\"------------------------------\")\n print(f\"cuda available:\ | ||
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ | ||
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \ | ||
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ | ||
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ | ||
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ | ||
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ | ||
\ availability test: PASS\")\n\n" | ||
image: quay.io/modh/runtime-images@sha256:cee154f6db15de27929362f91baa128fc4f79b9c1930ab0f27561174d39aadfa | ||
resources: | ||
accelerator: | ||
count: '1' | ||
type: nvidia.com/gpu | ||
pipelineInfo: | ||
description: Verifies pipeline tasks run on GPU nodes only when tolerations are | ||
added | ||
name: pytorch-nvidia-gpu-availability | ||
root: | ||
dag: | ||
tasks: | ||
verify-gpu-availability: | ||
cachingOptions: {} | ||
componentRef: | ||
name: comp-verify-gpu-availability | ||
inputs: | ||
parameters: | ||
gpu_toleration: | ||
runtimeValue: | ||
constant: false | ||
taskInfo: | ||
name: verify-gpu-availability | ||
verify-gpu-availability-2: | ||
cachingOptions: {} | ||
componentRef: | ||
name: comp-verify-gpu-availability-2 | ||
inputs: | ||
parameters: | ||
gpu_toleration: | ||
runtimeValue: | ||
constant: true | ||
taskInfo: | ||
name: verify-gpu-availability-2 | ||
schemaVersion: 2.1.0 | ||
sdkVersion: kfp-2.9.0 | ||
--- | ||
platforms: | ||
kubernetes: | ||
deploymentSpec: | ||
executors: | ||
exec-verify-gpu-availability-2: | ||
tolerations: | ||
- effect: NoSchedule | ||
key: nvidia.com/gpu | ||
operator: Exists |
Oops, something went wrong.