diff --git a/.github/workflows/tests_unit.yml b/.github/workflows/tests_unit.yml index 28262cf16..5233b8fe6 100644 --- a/.github/workflows/tests_unit.yml +++ b/.github/workflows/tests_unit.yml @@ -16,6 +16,12 @@ on: # Allow manual triggers workflow_dispatch: +env: + XDG_CACHE_HOME: /home/runner/work/milabench/cache + XDG_DATA_HOME: /home/runner/work/milabench/data + XDG_CONFIG_HOME: /home/runner/work/milabench/config + XDG_STATE_HOME: /home/runner/work/milabench/state + jobs: tests: @@ -27,6 +33,15 @@ jobs: cancel-in-progress: true steps: + - uses: easimon/maximize-build-space@master + with: + remove-dotnet: 'true' + remove-codeql: 'true' + remove-haskell: 'true' + remove-android: 'true' + build-mount-path: /home/runner/work/milabench/ + root-reserve-mb: 20000 + - uses: actions/checkout@v3 - uses: actions/setup-python@v5 @@ -35,24 +50,25 @@ jobs: - name: dependencies run: | + cd /home/runner/work/milabench/milabench + pip install virtualenv + virtualenv ./env + source ./env/bin/activate + # pip install -U pip pip install poetry - poetry env use python3.10 - source $(poetry env info -p)/bin/activate + poetry export --dev -f requirements.txt --output requirements-dev.txt # # poetry doesnot work when installing those !? # pip install antlr4-python3-runtime==4.9.3 pip install -e . pip install -e benchmate - # - # - # - poetry install --with dev + pip install coverage pytest-regressions pytest-cov pytest - name: Simple Template run: | - source $(poetry env info -p)/bin/activate + source ./env/bin/activate milabench new --name simplebench --template simple cd benchmarks/simplebench make tests @@ -61,7 +77,7 @@ jobs: - name: Voir Template run: | - source $(poetry env info -p)/bin/activate + source ./env/bin/activate milabench new --name voirbench --template voir cd benchmarks/voirbench make tests @@ -74,10 +90,10 @@ jobs: - name: tests env: - MILABENCH_HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN}} + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN}} run: | - source $(poetry env info -p)/bin/activate - coverage run --source=milabench -m pytest --ignore=tests/integration tests/ + source ./env/bin/activate + coverage run --source=milabench -m pytest --ignore=tests/integration tests/ -vv -x coverage report -m coverage xml diff --git a/.gitignore b/.gitignore index 1bc7f879c..265dda6de 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ dependencies/ benchmarks/recursiongfn/gflownet benchmarks/recursiongfn/logs/ benchmarks/llm/tune/ +benchmarks/vjepa/jepa scripts/inventory.yaml output/ diff --git a/.pin/constraints-cuda-gnn.txt b/.pin/constraints-cuda-gnn.txt index ce1c65e30..cacbdfeae 100644 --- a/.pin/constraints-cuda-gnn.txt +++ b/.pin/constraints-cuda-gnn.txt @@ -104,9 +104,9 @@ mpmath==1.3.0 # botorch # gpytorch # sympy -msgpack==1.0.8 +msgpack==1.1.0 # via blosc2 -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl @@ -236,7 +236,7 @@ requests==2.32.3 # via # torch-geometric # wandb -rich==13.8.0 +rich==13.8.1 # via voir scikit-learn==1.5.1 # via @@ -310,6 +310,7 @@ typeguard==2.13.3 # linear-operator typing-extensions==4.12.2 # via + # multidict # reactivex # tables # torch @@ -330,7 +331,7 @@ wandb==0.17.9 # via -r benchmarks/recursiongfn/requirements.in werkzeug==3.0.4 # via tensorboard -yarl==1.11.0 +yarl==1.11.1 # via aiohttp # The following packages are considered to be unsafe in a requirements file: diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt index d0b1aa709..2717ed4ef 100644 --- a/.pin/constraints-cuda-torch.txt +++ b/.pin/constraints-cuda-torch.txt @@ -2,36 +2,43 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=.pin/constraints-cuda-torch.txt .pin/tmp-constraints.txt benchmarks/brax/requirements.in benchmarks/diffusion/requirements.in benchmarks/dinov2/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/lightning/requirements.in benchmarks/llama/requirements.in benchmarks/llava/requirements.in benchmarks/llm/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchatari/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in constraints/extra/torch.cuda.txt +# pip-compile --output-file=.pin/constraints-cuda-torch.txt .pin/tmp-constraints.txt benchmarks/brax/requirements.in benchmarks/diffusion/requirements.in benchmarks/dinov2/requirements.in benchmarks/flops/requirements.in benchmarks/geo_gnn/requirements-pre.in benchmarks/geo_gnn/requirements.in benchmarks/huggingface/requirements.in benchmarks/lightning/requirements.in benchmarks/llama/requirements.in benchmarks/llava/requirements.in benchmarks/llm/requirements.in benchmarks/purejaxrl/requirements.in benchmarks/recursiongfn/requirements.in benchmarks/rlhf/requirements.in benchmarks/timm/requirements.in benchmarks/torchatari/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in benchmarks/vjepa/requirements.in constraints/extra/torch.cuda.txt # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com absl-py==2.1.0 # via # brax # chex + # distrax # dm-env # ml-collections # mujoco # mujoco-mjx # optax # orbax-checkpoint + # rlax # tensorboard + # tensorflow-probability accelerate==0.34.2 # via # -r benchmarks/diffusion/requirements.in # -r benchmarks/llava/requirements.in # -r benchmarks/llm/requirements.in + # -r benchmarks/rlhf/requirements.in # diffusers + # trl aiohappyeyeballs==2.4.0 # via aiohttp aiohttp==3.10.5 # via # datasets # fsspec + # torch-geometric aiosignal==1.3.1 # via aiohttp antlr4-python3-runtime==4.9.3 @@ -42,59 +49,114 @@ argklass==1.4.4 # via # -r benchmarks/diffusion/requirements.in # -r benchmarks/llm/requirements.in + # -r benchmarks/purejaxrl/requirements.in +astroid==3.2.4 + # via pylint asttokens==2.4.1 # via giving async-timeout==4.0.3 # via aiohttp attrs==24.2.0 # via aiohttp +beartype==0.18.5 + # via -r benchmarks/vjepa/requirements.in +black==24.8.0 + # via navix blinker==1.8.2 # via flask blobfile==3.0.0 # via # -r benchmarks/llm/requirements.txt # torchtune +blosc2==2.7.1 + # via tables +botorch==0.11.3 + # via -r benchmarks/recursiongfn/requirements.in +braceexpand==0.1.7 + # via + # -r benchmarks/vjepa/requirements.in + # webdataset brax==0.10.5 - # via -r benchmarks/brax/requirements.in + # via + # -r benchmarks/brax/requirements.in + # -r benchmarks/purejaxrl/requirements.in cantilever==0.1.0 # via -r benchmarks/torchatari/requirements.in certifi==2024.8.30 - # via requests + # via + # requests + # sentry-sdk charset-normalizer==3.3.2 # via requests chex==0.1.86 - # via optax + # via + # distrax + # evosax + # flashbax + # gymnax + # optax + # rlax click==8.1.7 - # via flask + # via + # black + # flask + # wandb cloudpickle==3.0.0 # via # gym # gymnasium # submitit + # tensorflow-probability codefind==0.1.7 # via ptera contextlib2==21.6.0 # via ml-collections -datasets==2.21.0 +contourpy==1.3.0 + # via matplotlib +cvxopt==1.3.2 + # via -r benchmarks/recursiongfn/requirements.in +cycler==0.12.1 + # via matplotlib +datasets==3.0.0 # via # -r benchmarks/diffusion/requirements.in # -r benchmarks/llama/requirements.in # -r benchmarks/llava/requirements.in + # -r benchmarks/rlhf/requirements.in # torchtune + # trl +decorator==5.1.1 + # via tensorflow-probability +decord==0.6.0 + # via -r benchmarks/vjepa/requirements.in diffusers[torch]==0.30.2 # via -r benchmarks/diffusion/requirements.in dill==0.3.8 # via # datasets # multiprocess + # pylint +distrax==0.1.5 + # via + # -r benchmarks/purejaxrl/requirements.in + # rlax dm-env==1.6 # via # brax # envpool + # rlax dm-tree==0.1.8 - # via dm-env + # via + # dm-env + # tensorflow-probability +docker-pycreds==0.4.0 + # via wandb docstring-parser==0.16 # via tyro +dotmap==1.3.30 + # via evosax +einops==0.8.0 + # via -r benchmarks/vjepa/requirements.in envpool==0.8.4 # via -r benchmarks/torchatari/requirements.in etils[epath,epy]==1.9.4 @@ -104,7 +166,11 @@ etils[epath,epy]==1.9.4 # mujoco-mjx # optax # orbax-checkpoint -executing==1.2.0 +evosax==0.1.6 + # via -r benchmarks/purejaxrl/requirements.in +exceptiongroup==1.2.2 + # via pytest +executing==2.1.0 # via varname fairscale==0.4.13 # via @@ -126,6 +192,10 @@ fire==0.6.0 # via # -r benchmarks/llama/requirements.in # -r benchmarks/llm/requirements.txt +flake8==7.1.1 + # via navix +flashbax==0.1.2 + # via -r benchmarks/purejaxrl/requirements.in flask==3.0.3 # via # brax @@ -133,7 +203,15 @@ flask==3.0.3 flask-cors==5.0.0 # via brax flax==0.9.0 - # via brax + # via + # -r benchmarks/purejaxrl/requirements.in + # brax + # evosax + # flashbax + # gymnax + # navix +fonttools==4.53.1 + # via matplotlib frozenlist==1.4.1 # via # aiohttp @@ -146,45 +224,66 @@ fsspec[http]==2024.6.1 # lightning # pytorch-lightning # torch + # torch-geometric fvcore==0.1.5.post20221221 # via -r benchmarks/dinov2/requirements.in -giving==0.4.2 +gast==0.6.0 + # via tensorflow-probability +gitdb==4.0.11 + # via gitpython +gitpython==3.1.43 + # via + # -r benchmarks/recursiongfn/requirements.in + # wandb +giving==0.4.3 # via # ptera # voir glfw==2.7.0 # via mujoco +gpytorch==1.12 + # via + # -r benchmarks/recursiongfn/requirements.in + # botorch grpcio==1.66.1 # via # brax # tensorboard -gym==0.23.1 +gym==0.26.2 # via # -r benchmarks/torchatari/requirements.in # brax # envpool + # gymnax gym-notices==0.0.8 # via gym gymnasium==0.29.1 - # via envpool + # via + # envpool + # gymnax +gymnax==0.0.8 + # via + # -c .pin/../constraints/cuda.txt + # -r benchmarks/purejaxrl/requirements.in hjson==3.1.0 # via argklass -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -r benchmarks/timm/requirements.in # accelerate # datasets # diffusers + # timm # tokenizers # torchtune # transformers humanize==4.10.0 # via orbax-checkpoint -idna==3.8 +idna==3.10 # via # requests # yarl -importlib-metadata==8.4.0 +importlib-metadata==8.5.0 # via diffusers importlib-resources==6.4.5 # via @@ -192,23 +291,33 @@ importlib-resources==6.4.5 # cantilever # etils # torchcompat +iniconfig==2.0.0 + # via pytest iopath==0.1.10 # via # -r benchmarks/dinov2/requirements.in # fvcore +isort==5.13.2 + # via pylint itsdangerous==2.2.0 # via flask jax[cuda12]==0.4.31 # via # -r benchmarks/brax/requirements.in + # -r benchmarks/purejaxrl/requirements.in # -r constraints/extra/torch.cuda.txt # brax # chex + # distrax + # evosax + # flashbax # flax + # gymnax # jaxopt # mujoco-mjx # optax # orbax-checkpoint + # rlax jax-cuda12-pjrt==0.4.31 # via jax-cuda12-plugin jax-cuda12-plugin[with-cuda]==0.4.31 @@ -217,18 +326,30 @@ jaxlib==0.4.31 # via # brax # chex + # distrax + # evosax + # flashbax + # gymnax # jax # jaxopt # mujoco-mjx # optax # orbax-checkpoint + # rlax jaxopt==0.8.3 # via brax +jaxtyping==0.2.34 + # via linear-operator jinja2==3.1.4 # via # brax # flask # torch + # torch-geometric +joblib==1.4.2 + # via scikit-learn +kiwisolver==1.4.7 + # via matplotlib lightning==2.4.0 # via -r benchmarks/lightning/requirements.in lightning-utilities==0.11.7 @@ -236,6 +357,10 @@ lightning-utilities==0.11.7 # lightning # pytorch-lightning # torchmetrics +linear-operator==0.5.2 + # via + # botorch + # gpytorch lxml==5.3.0 # via blobfile markdown==3.7 @@ -246,19 +371,32 @@ markupsafe==2.1.5 # via # jinja2 # werkzeug +matplotlib==3.9.2 + # via + # evosax + # gymnax + # seaborn +mccabe==0.7.0 + # via + # flake8 + # pylint mdurl==0.1.2 # via markdown-it-py ml-collections==0.1.1 # via brax -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # jax # jaxlib # tensorstore mpmath==1.3.0 - # via sympy -msgpack==1.0.8 # via + # botorch + # gpytorch + # sympy +msgpack==1.1.0 + # via + # blosc2 # flax # orbax-checkpoint mujoco==3.2.2 @@ -267,52 +405,88 @@ mujoco==3.2.2 # mujoco-mjx mujoco-mjx==3.2.2 # via brax -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl +multipledispatch==1.0.0 + # via botorch multiprocess==0.70.16 # via datasets +mypy-extensions==1.0.0 + # via black +navix==0.7.0 + # via -r benchmarks/purejaxrl/requirements.in +ndindex==1.8 + # via blosc2 nest-asyncio==1.6.0 # via orbax-checkpoint networkx==3.3 - # via torch + # via + # -r benchmarks/recursiongfn/requirements.in + # torch +numexpr==2.10.1 + # via + # blosc2 + # tables numpy==1.26.4 # via + # -r benchmarks/geo_gnn/requirements.in # -r benchmarks/llava/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/purejaxrl/requirements.in # -r benchmarks/torchatari/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate + # blosc2 + # botorch # brax # chex + # contourpy # datasets + # decord # diffusers + # distrax # dm-env # envpool + # evosax # fairscale + # flashbax # fvcore # gym # gymnasium # jax # jaxlib # jaxopt + # matplotlib # ml-dtypes # mujoco + # navix + # numexpr # opencv-python # opt-einsum # optax # orbax-checkpoint # pandas # pyarrow + # pyro-ppl + # rdkit + # rlax + # scikit-learn # scipy + # seaborn + # tables # tensorboard # tensorboardx + # tensorflow-probability # tensorstore + # torch-geometric # torchmetrics # torchtune # torchvision # transformers # trimesh + # trl + # webdataset # xformers nvidia-cublas-cu12==12.1.3.1 # via @@ -367,19 +541,23 @@ nvidia-nvtx-cu12==12.1.105 omegaconf==2.3.0 # via # -r benchmarks/dinov2/requirements.in + # -r benchmarks/recursiongfn/requirements.in # torchtune # voir opencv-python==4.10.0.84 - # via -r benchmarks/super-slomo/requirements.in + # via -r benchmarks/vjepa/requirements.in opt-einsum==3.3.0 - # via jax + # via + # jax + # pyro-ppl optax==0.2.3 # via + # -r benchmarks/purejaxrl/requirements.in # brax # flax optree==0.12.1 # via envpool -orbax-checkpoint==0.6.1 +orbax-checkpoint==0.6.3 # via # brax # flax @@ -388,18 +566,30 @@ ovld==0.3.9 packaging==24.1 # via # accelerate + # black # datasets # envpool # huggingface-hub # lightning # lightning-utilities + # matplotlib + # pytest # pytorch-lightning + # setuptools-scm + # tables # tensorboard # tensorboardx # torchmetrics # transformers pandas==2.2.2 - # via datasets + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/vjepa/requirements.in + # datasets + # seaborn +pathspec==0.12.1 + # via black pillow==10.4.0 # via # -r benchmarks/huggingface/requirements.in @@ -407,55 +597,104 @@ pillow==10.4.0 # brax # diffusers # fvcore + # matplotlib + # navix + # rdkit # torchvision +platformdirs==4.3.3 + # via + # black + # pylint + # wandb +pluggy==1.5.0 + # via pytest portalocker==2.10.1 # via iopath -protobuf==5.28.0 +protobuf==5.28.1 # via # orbax-checkpoint # tensorboard # tensorboardx + # wandb psutil==5.9.8 # via # accelerate + # torch-geometric # voir + # wandb ptera==1.4.1 # via voir +py-cpuinfo==9.0.0 + # via + # blosc2 + # tables pyarrow==17.0.0 - # via datasets + # via + # -r benchmarks/recursiongfn/requirements.in + # datasets +pycodestyle==2.12.1 + # via flake8 pycryptodomex==3.20.0 # via blobfile +pyflakes==3.2.0 + # via flake8 pygments==2.18.0 # via rich +pylint==3.2.7 + # via navix pyopengl==3.1.7 # via mujoco +pyparsing==3.1.4 + # via + # matplotlib + # torch-geometric +pyro-api==0.1.2 + # via pyro-ppl +pyro-ppl==1.9.1 + # via + # -r benchmarks/recursiongfn/requirements.in + # botorch +pytest==8.3.3 + # via navix python-dateutil==2.9.0.post0 - # via pandas + # via + # matplotlib + # pandas pytinyrenderer==0.0.14 # via brax pytorch-lightning==2.4.0 # via lightning -pytz==2024.1 +pytz==2024.2 # via pandas pyyaml==6.0.2 # via # -r benchmarks/llm/requirements.in # -r benchmarks/timm/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate # datasets + # evosax # flax # fvcore + # gymnax # huggingface-hub # lightning # ml-collections # omegaconf # orbax-checkpoint # pytorch-lightning + # timm # transformers + # wandb + # webdataset # yacs +rdkit==2024.3.5 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in reactivex==4.0.4 # via giving -regex==2024.7.24 +regex==2024.9.11 # via # diffusers # tiktoken @@ -466,64 +705,111 @@ requests==2.32.3 # diffusers # huggingface-hub # tiktoken + # torch-geometric # transformers -rich==13.8.0 + # wandb +rich==13.8.1 # via # flax # tyro # voir +rlax==0.1.6 + # via navix safetensors==0.4.5 # via # -r benchmarks/timm/requirements.in # accelerate # diffusers + # timm # torchtune # transformers +scikit-learn==1.5.2 + # via gpytorch scipy==1.14.1 # via # -r benchmarks/dinov2/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # botorch # brax + # gpytorch # jax # jaxlib # jaxopt + # linear-operator # mujoco-mjx + # scikit-learn + # torch-cluster + # torch-sparse +seaborn==0.13.2 + # via gymnax sentencepiece==0.2.0 # via # -r benchmarks/llama/requirements.in # torchtune +sentry-sdk==2.14.0 + # via wandb +setproctitle==1.3.3 + # via wandb +setuptools-scm==8.1.0 + # via navix shtab==1.7.1 # via tyro six==1.16.0 # via # asttokens + # docker-pycreds # fire # ml-collections # python-dateutil # tensorboard + # tensorflow-probability +smmap==5.0.1 + # via gitdb submitit==1.5.1 - # via -r benchmarks/dinov2/requirements.in + # via + # -r benchmarks/dinov2/requirements.in + # -r benchmarks/vjepa/requirements.in sympy==1.13.2 # via torch +tables==3.10.1 + # via -r benchmarks/recursiongfn/requirements.in tabulate==0.9.0 # via fvcore tensorboard==2.17.1 - # via -r benchmarks/torchatari/requirements.in + # via + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/torchatari/requirements.in tensorboard-data-server==0.7.2 # via tensorboard tensorboardx==2.6.2.2 # via brax +tensorflow-probability==0.24.0 + # via distrax tensorstore==0.1.65 # via + # flashbax # flax # orbax-checkpoint termcolor==2.4.0 # via # fire # fvcore +threadpoolctl==3.5.0 + # via scikit-learn tiktoken==0.7.0 # via torchtune +timm==1.0.9 + # via -r benchmarks/vjepa/requirements.in tokenizers==0.19.1 # via transformers +tomli==2.0.1 + # via + # black + # pylint + # pytest + # setuptools-scm +tomlkit==0.13.2 + # via pylint toolz==0.12.1 # via chex torch==2.4.0+cu121 @@ -531,25 +817,50 @@ torch==2.4.0+cu121 # -r benchmarks/brax/requirements.in # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in + # -r benchmarks/geo_gnn/requirements-pre.in # -r benchmarks/huggingface/requirements.in # -r benchmarks/lightning/requirements.in # -r benchmarks/llama/requirements.in # -r benchmarks/llava/requirements.in # -r benchmarks/llm/requirements.in # -r benchmarks/llm/requirements.txt - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/purejaxrl/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/rlhf/requirements.in # -r benchmarks/timm/requirements.in # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate + # botorch # diffusers # fairscale # lightning + # linear-operator + # pyro-ppl # pytorch-lightning + # timm # torchmetrics # torchvision + # trl # xformers +torch-cluster==1.6.3+pt24cu121 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-geometric==2.6.0 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-scatter==2.1.2+pt24cu121 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-sparse==0.6.18+pt24cu121 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in torchao==0.3.1+cu121 # via torchtune torchcompat==1.1.4 @@ -560,7 +871,7 @@ torchcompat==1.1.4 # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in -torchmetrics==1.4.1 +torchmetrics==1.4.2 # via # -r benchmarks/dinov2/requirements.in # lightning @@ -573,15 +884,15 @@ torchvision==0.19.0+cu121 # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in # -r benchmarks/lightning/requirements.in - # -r benchmarks/super-slomo/requirements.in # -r benchmarks/timm/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in + # -r benchmarks/vjepa/requirements.in + # timm tqdm==4.66.5 # via # -r benchmarks/diffusion/requirements.in # -r benchmarks/flops/requirements.in - # -r benchmarks/super-slomo/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in # datasets @@ -589,7 +900,9 @@ tqdm==4.66.5 # huggingface-hub # iopath # lightning + # pyro-ppl # pytorch-lightning + # torch-geometric # torchtune # transformers transformers==4.44.2 @@ -599,42 +912,60 @@ transformers==4.44.2 # -r benchmarks/llama/requirements.in # -r benchmarks/llava/requirements.in # -r benchmarks/llm/requirements.in + # -r benchmarks/rlhf/requirements.in + # trl trimesh==4.4.9 # via # brax # mujoco-mjx triton==3.0.0 # via torch +trl==0.10.1 + # via -r benchmarks/rlhf/requirements.in +typeguard==2.13.3 + # via + # jaxtyping + # linear-operator types-protobuf==5.27.0.20240907 # via envpool typing-extensions==4.12.2 # via + # astroid + # black # brax # chex # envpool # etils + # flashbax # flax # gymnasium # huggingface-hub # iopath # lightning # lightning-utilities + # multidict + # navix # optree # orbax-checkpoint # pytorch-lightning # reactivex # submitit + # tables # torch # tyro tyro==0.8.10 - # via -r benchmarks/torchatari/requirements.in + # via + # -r benchmarks/torchatari/requirements.in + # navix + # trl tzdata==2024.1 # via pandas -urllib3==2.2.2 +urllib3==2.2.3 # via # blobfile # requests -varname==0.10.0 + # sentry-sdk +varname==0.13.3 # via giving voir==0.2.19 # via @@ -643,16 +974,26 @@ voir==0.2.19 # -r benchmarks/diffusion/requirements.in # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in + # -r benchmarks/geo_gnn/requirements.in # -r benchmarks/huggingface/requirements.in # -r benchmarks/lightning/requirements.in # -r benchmarks/llama/requirements.in # -r benchmarks/llava/requirements.in # -r benchmarks/llm/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/purejaxrl/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/rlhf/requirements.in # -r benchmarks/timm/requirements.in # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in + # -r benchmarks/vjepa/requirements.in +wandb==0.18.0 + # via + # -r benchmarks/recursiongfn/requirements.in + # navix +webdataset==0.2.100 + # via -r benchmarks/vjepa/requirements.in werkzeug==3.0.4 # via # flask @@ -665,9 +1006,9 @@ xxhash==3.5.0 # via datasets yacs==0.1.8 # via fvcore -yarl==1.11.0 +yarl==1.11.1 # via aiohttp -zipp==3.20.1 +zipp==3.20.2 # via # etils # importlib-metadata diff --git a/benchmarks/brax/requirements.cuda.txt b/benchmarks/brax/requirements.cuda.txt index 5666c0798..aa883171c 100644 --- a/benchmarks/brax/requirements.cuda.txt +++ b/benchmarks/brax/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com absl-py==2.1.0 @@ -72,7 +73,7 @@ etils[epath,epy]==1.9.4 # mujoco-mjx # optax # orbax-checkpoint -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -99,7 +100,7 @@ fsspec==2024.6.1 # -c .pin/../.pin/constraints-cuda-torch.txt # etils # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -112,7 +113,7 @@ grpcio==1.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # brax -gym==0.23.1 +gym==0.26.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # brax @@ -189,7 +190,7 @@ ml-collections==0.1.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # brax -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -199,7 +200,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -msgpack==1.0.8 +msgpack==1.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # flax @@ -323,7 +324,7 @@ optax==0.2.3 # -c .pin/../.pin/constraints-cuda-torch.txt # brax # flax -orbax-checkpoint==0.6.1 +orbax-checkpoint==0.6.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # brax @@ -340,7 +341,7 @@ pillow==10.4.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # brax -protobuf==5.28.0 +protobuf==5.28.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # orbax-checkpoint @@ -376,7 +377,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # flax @@ -435,7 +436,7 @@ typing-extensions==4.12.2 # orbax-checkpoint # reactivex # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -452,7 +453,7 @@ xformers==0.0.27.post2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r .pin/../constraints/extra/torch.cuda.txt -zipp==3.20.1 +zipp==3.20.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # etils diff --git a/benchmarks/diffusion/main.py b/benchmarks/diffusion/main.py index 4b060c05d..0bcb67d50 100755 --- a/benchmarks/diffusion/main.py +++ b/benchmarks/diffusion/main.py @@ -229,6 +229,12 @@ def batch_size(x): return observer, bench_monitor def main(): + # --- + import resource + hard, _ = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (hard, hard)) + # --- + from benchmate.metrics import StopProgram observer, monitor = prepare_voir() diff --git a/benchmarks/diffusion/requirements.cuda.txt b/benchmarks/diffusion/requirements.cuda.txt index ae6aad547..6a062a7a0 100644 --- a/benchmarks/diffusion/requirements.cuda.txt +++ b/benchmarks/diffusion/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com accelerate==0.34.2 @@ -59,7 +60,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -datasets==2.21.0 +datasets==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/diffusion/requirements.in @@ -72,7 +73,7 @@ dill==0.3.8 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # multiprocess -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -96,7 +97,7 @@ fsspec[http]==2024.6.1 # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -105,7 +106,7 @@ hjson==3.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # argklass -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -113,12 +114,12 @@ huggingface-hub==0.24.6 # diffusers # tokenizers # transformers -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests # yarl -importlib-metadata==8.4.0 +importlib-metadata==8.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # diffusers @@ -158,7 +159,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -167,7 +168,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -314,7 +315,7 @@ python-dateutil==2.9.0.post0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas @@ -330,7 +331,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2024.7.24 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-cuda-torch.txt # diffusers @@ -342,7 +343,7 @@ requests==2.32.3 # diffusers # huggingface-hub # transformers -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -400,17 +401,18 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # multidict # reactivex # torch tzdata==2024.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -427,11 +429,11 @@ xxhash==3.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets -yarl==1.11.0 +yarl==1.11.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp -zipp==3.20.1 +zipp==3.20.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # importlib-metadata diff --git a/benchmarks/dinov2/requirements.cuda.txt b/benchmarks/dinov2/requirements.cuda.txt index 2b9a2ad5a..aef36dbf3 100644 --- a/benchmarks/dinov2/requirements.cuda.txt +++ b/benchmarks/dinov2/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -25,7 +26,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -42,7 +43,7 @@ fvcore==0.1.5.post20221221 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/dinov2/requirements.in -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -88,7 +89,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -231,7 +232,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -268,7 +269,7 @@ torch==2.4.0+cu121 # torchmetrics # torchvision # xformers -torchmetrics==1.4.1 +torchmetrics==1.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/dinov2/requirements.in @@ -293,7 +294,7 @@ typing-extensions==4.12.2 # reactivex # submitit # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt index da9d3cc6c..afb7ff130 100644 --- a/benchmarks/flops/requirements.cuda.txt +++ b/benchmarks/flops/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -21,7 +22,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -34,7 +35,7 @@ fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -75,7 +76,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -203,7 +204,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -248,7 +249,7 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/geo_gnn/dev.yaml b/benchmarks/geo_gnn/dev.yaml index 7fadaea5f..6f261c895 100644 --- a/benchmarks/geo_gnn/dev.yaml +++ b/benchmarks/geo_gnn/dev.yaml @@ -1,4 +1,4 @@ -dimenet: +pna: inherits: _defaults definition: . install-variant: cuda @@ -6,11 +6,11 @@ dimenet: plan: method: per_gpu argv: - --model: 'DimeNet' - --num-samples: 10000 - --use3d: True + --model: 'PNA' + --num-samples: 100000 + --batch-size: 4096 -pna: +dimenet: inherits: _defaults definition: . install-variant: cuda @@ -18,5 +18,7 @@ pna: plan: method: per_gpu argv: - --model: 'PNA' - --num-samples: 10000 \ No newline at end of file + --model: 'DimeNet' + --num-samples: 10000 + --use3d: True + --batch-size: 512 \ No newline at end of file diff --git a/benchmarks/geo_gnn/main.py b/benchmarks/geo_gnn/main.py index 714707f65..71e1c8827 100644 --- a/benchmarks/geo_gnn/main.py +++ b/benchmarks/geo_gnn/main.py @@ -9,6 +9,7 @@ from pcqm4m_subset import PCQM4Mv2Subset from torch_geometric.datasets import QM9 from torch_geometric.loader import DataLoader +from torch_geometric.nn import global_max_pool from benchmate.observer import BenchObserver @@ -102,26 +103,25 @@ def main(): args = parser().parse_args() def batch_size(x): - shape = x.y.shape - return shape[0] + # assert len(x.batch.unique()) == int(x.batch[-1] - x.batch[0] + 1) + return int(x.batch[-1] - x.batch[0] + 1) observer = BenchObserver(batch_size_fn=batch_size) - # train_dataset = PCQM4Mv2Subset(args.num_samples, args.root) - train_dataset = QM9(args.root) + train_dataset = PCQM4Mv2Subset(args.num_samples, args.root) sample = next(iter(train_dataset)) - info = models[args.model](args, - sample=sample, - degree=lambda: train_degree(train_dataset), + info = models[args.model]( + args, + sample=sample, + degree=lambda: train_degree(train_dataset), ) TRAIN_mean, TRAIN_std = ( mean(train_dataset).item(), std(train_dataset).item(), ) - print("Train mean: {}\tTrain std: {}".format(TRAIN_mean, TRAIN_std)) DataLoaderClass = DataLoader dataloader_kwargs = {} @@ -131,7 +131,7 @@ def batch_size(x): batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, - **dataloader_kwargs + **dataloader_kwargs, ) device = accelerator.fetch_device(0) @@ -148,33 +148,26 @@ def batch_size(x): lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs) num_batches = len(train_loader) - for epoch in range(1, args.epochs + 1): - model.train() + loader = observer.loader(train_loader) - for step, batch in enumerate(observer.iterate(train_loader)): - # QM9 => DataBatch(x=[290, 11], edge_index=[2, 602], edge_attr=[602, 4], y=[16, 19], pos=[290, 3], z=[290], smiles=[16], name=[16], idx=[16], batch=[290], ptr=[17]) - # PCQM4Mv2Subset => DataBatch(x=[229, 9], edge_index=[2, 476], edge_attr=[476, 3], y=[16], pos=[229, 3], smiles=[16], batch=[229], ptr=[17]) + model.train() # No eval ever. + for epoch in range(1, args.epochs + 1): + for step, batch in enumerate(loader): batch = batch.to(device) - + if args.use3d: - - if hasattr(batch, "z"): - z = batch.z - else: - z = batch.batch - - molecule_repr = model(z=z, pos=batch.pos, batch=batch.batch) + molecule_repr = model(z=batch.z, pos=batch.pos, batch=batch.batch) else: - molecule_repr = model(x=batch.x, batch=batch.batch, edge_index=batch.edge_index, batch_size=batch_size(batch)) + molecule_repr = model( + x=batch.x.type(torch.float), + batch=batch.batch, + edge_index=batch.edge_index, + batch_size=batch_size(batch), + ) + molecule_repr = global_max_pool(molecule_repr, batch.batch) pred = molecule_repr.squeeze() - # Dimenet : pred: torch.Size([ 16, 19]) - # PNA : pred: torch.Size([292, 19]) <= (with x=batch.x) WTF !? 292 = batch.x.shape[0] - # batch : torch.Size([ 16, 19]) - # print(molecule_repr.shape) - # print(batch.y.shape) - B = pred.size()[0] y = batch.y.view(B, -1) # normalize @@ -192,7 +185,8 @@ def batch_size(x): lr_scheduler.step() - print("Epoch: {}\nLoss: {}".format(epoch)) + if loader.is_done(): + break if __name__ == "__main__": diff --git a/benchmarks/geo_gnn/pcqm4m_subset.py b/benchmarks/geo_gnn/pcqm4m_subset.py index 615aea2bb..2d6e0e2bd 100644 --- a/benchmarks/geo_gnn/pcqm4m_subset.py +++ b/benchmarks/geo_gnn/pcqm4m_subset.py @@ -35,6 +35,7 @@ def __init__( "smiles": str, "pos": dict(dtype=torch.float32, size=(-1, 3)), "y": float, + "z": dict(dtype=torch.long, size=(-1,)), } self.from_smiles = from_smiles or _from_smiles @@ -49,12 +50,10 @@ def raw_file_names(self): ] def download(self): - print(self.raw_paths) if all(os.path.exists(path) for path in self.raw_paths): return # Download 2d graphs - print(self.raw_dir) super().download() # Download 3D coordinates @@ -78,6 +77,9 @@ def process(self) -> None: data.pos = torch.tensor( extra.GetConformer().GetPositions(), dtype=torch.float ) + data.z = torch.tensor( + [atom.GetAtomicNum() for atom in extra.GetAtoms()], dtype=torch.long + ) data_list.append(data) if ( @@ -104,4 +106,5 @@ def std(self): def serialize(self, data: BaseData) -> Dict[str, Any]: rval = super().serialize(data) rval["pos"] = data.pos + rval["z"] = data.z return rval diff --git a/benchmarks/geo_gnn/prepare.py b/benchmarks/geo_gnn/prepare.py index 2b352f8ce..b3ac374b0 100755 --- a/benchmarks/geo_gnn/prepare.py +++ b/benchmarks/geo_gnn/prepare.py @@ -12,7 +12,7 @@ def parser(): "--num-samples", type=int, help="Number of samples to process in the dataset", - default=10000, + default=100000, ) parser.add_argument( "--root", @@ -26,7 +26,4 @@ def parser(): if __name__ == "__main__": args, _ = parser().parse_known_args() - # TODO: Handle argument for the number of samples - train_dataset = QM9(args.root) - # dataset = PCQM4Mv2Subset(args.num_samples, root=args.root) - + dataset = PCQM4Mv2Subset(args.num_samples, root=args.root) diff --git a/benchmarks/geo_gnn/requirements-pre.cuda.txt b/benchmarks/geo_gnn/requirements-pre.cuda.txt index 396cdd441..0ec4d88dd 100644 --- a/benchmarks/geo_gnn/requirements-pre.cuda.txt +++ b/benchmarks/geo_gnn/requirements-pre.cuda.txt @@ -2,104 +2,161 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=benchmarks/geo_gnn/requirements-pre.cuda.txt .pin/tmp-constraints-cuda-geo_gnn.txt benchmarks/geo_gnn/requirements-pre.in +# pip-compile --output-file=benchmarks/geo_gnn/requirements-pre.cuda.txt .pin/tmp-constraints-cuda-dimenet.txt benchmarks/geo_gnn/requirements-pre.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 ---find-links https://data.pyg.org/whl/torch-2.3.0+cu121.html +--find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com filelock==3.16.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton -fsspec==2024.9.0 +fsspec==2024.6.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch +jax[cuda12]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt +jax-cuda12-pjrt==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin +jax-cuda12-plugin[with-cuda]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +jaxlib==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax jinja2==3.1.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch markupsafe==2.1.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib mpmath==1.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # sympy networkx==3.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib + # ml-dtypes + # opt-einsum + # scipy + # xformers nvidia-cublas-cu12==12.1.3.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch +nvidia-cuda-nvcc-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin nvidia-cuda-nvrtc-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch nvidia-cuda-runtime-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-cufft-cu12==11.0.2.54 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-curand-cu12==10.3.2.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch nvidia-cusolver-cu12==11.4.5.107 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-cusparse-cu12==12.1.0.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # torch nvidia-nccl-cu12==2.20.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-nvjitlink-cu12==12.6.68 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch +opt-einsum==3.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib sympy==1.13.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.3.1+cu121 +torch==2.4.0+cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # -r .pin/../constraints/extra/gnn.cuda.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.in -triton==2.3.1 + # xformers +triton==3.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch typing-extensions==4.12.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch +xformers==0.0.27.post2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt diff --git a/benchmarks/geo_gnn/requirements.cuda.txt b/benchmarks/geo_gnn/requirements.cuda.txt index 5bf4a0707..88e329e6d 100644 --- a/benchmarks/geo_gnn/requirements.cuda.txt +++ b/benchmarks/geo_gnn/requirements.cuda.txt @@ -2,339 +2,385 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=benchmarks/geo_gnn/requirements.cuda.txt .pin/tmp-constraints-cuda-geo_gnn.txt benchmarks/geo_gnn/requirements-pre.cuda.txt benchmarks/geo_gnn/requirements.in +# pip-compile --output-file=benchmarks/geo_gnn/requirements.cuda.txt .pin/tmp-constraints-cuda-dimenet.txt benchmarks/geo_gnn/requirements-pre.cuda.txt benchmarks/geo_gnn/requirements.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 ---find-links https://data.pyg.org/whl/torch-2.3.0+cu121.html +--find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com aiohappyeyeballs==2.4.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp aiohttp==3.10.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric aiosignal==1.3.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp antlr4-python3-runtime==4.9.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf asttokens==2.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving async-timeout==4.0.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp attrs==24.2.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp certifi==2024.8.30 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests charset-normalizer==3.3.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests codefind==0.1.7 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # varname filelock==3.16.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch # triton frozenlist==1.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # aiosignal -fsspec==2024.9.0 +fsspec==2024.6.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch # torch-geometric -giving==0.4.2 +giving==0.4.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -idna==3.8 +idna==3.10 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # yarl +jax[cuda12]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt +jax-cuda12-pjrt==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin +jax-cuda12-plugin[with-cuda]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax +jaxlib==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax jinja2==3.1.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch # torch-geometric -joblib==1.4.2 - # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # scikit-learn markdown-it-py==3.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rich markupsafe==2.1.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # jinja2 mdurl==0.1.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax + # jaxlib mpmath==1.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl networkx==3.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch numpy==1.26.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # -r benchmarks/geo_gnn/requirements.in + # jax + # jaxlib + # ml-dtypes + # opt-einsum # pandas # rdkit - # scikit-learn # scipy # torch-geometric + # xformers nvidia-cublas-cu12==12.1.3.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch +nvidia-cuda-nvcc-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin nvidia-cuda-nvrtc-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch nvidia-cuda-runtime-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch nvidia-cufft-cu12==11.0.2.54 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch nvidia-curand-cu12==10.3.2.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch nvidia-cusolver-cu12==11.4.5.107 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch nvidia-cusparse-cu12==12.1.0.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # torch nvidia-ml-py==12.560.30 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir nvidia-nccl-cu12==2.20.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # torch nvidia-nvjitlink-cu12==12.6.68 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch omegaconf==2.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir +opt-einsum==3.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax ovld==0.3.9 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir pandas==2.2.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in pillow==10.4.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rdkit psutil==5.9.8 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric # voir ptera==1.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir pygments==2.18.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rich pyparsing==3.1.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric python-dateutil==2.9.0.post0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas pyyaml==6.0.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf rdkit==2024.3.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in reactivex==4.0.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving requests==2.32.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric -rich==13.8.0 +rich==13.8.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir -scikit-learn==1.5.1 - # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # torch-geometric scipy==1.14.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # scikit-learn + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # jax + # jaxlib # torch-cluster - # torch-geometric # torch-sparse six==1.16.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens # python-dateutil sympy==1.13.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch -threadpoolctl==3.5.0 - # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # scikit-learn -torch==2.3.1+cu121 +torch==2.4.0+cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # -r .pin/../constraints/extra/gnn.cuda.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt -torch-cluster==1.6.3+pt23cu121 + # xformers +torch-cluster==1.6.3+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in -torch-geometric==2.5.3 +torch-geometric==2.6.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in -torch-scatter==2.1.2+pt23cu121 +torch-scatter==2.1.2+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in -torch-sparse==0.6.18+pt23cu121 +torch-sparse==0.6.18+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements.in tqdm==4.66.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric -triton==2.3.1 +triton==3.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt # torch typing-extensions==4.12.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/geo_gnn/requirements-pre.cuda.txt + # multidict # reactivex # torch tzdata==2024.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving voir==0.2.19 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt # -r benchmarks/geo_gnn/requirements.in -yarl==1.11.0 +xformers==0.0.27.post2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt + # -r benchmarks/geo_gnn/requirements-pre.cuda.txt +yarl==1.11.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp diff --git a/benchmarks/huggingface/requirements.cuda.txt b/benchmarks/huggingface/requirements.cuda.txt index 22dd9bd40..d4bcacca7 100644 --- a/benchmarks/huggingface/requirements.cuda.txt +++ b/benchmarks/huggingface/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -29,7 +30,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -45,17 +46,17 @@ fsspec==2024.6.1 # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tokenizers # transformers -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -91,7 +92,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -226,7 +227,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2024.7.24 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -235,7 +236,7 @@ requests==2.32.3 # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # transformers -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -284,11 +285,11 @@ typing-extensions==4.12.2 # huggingface-hub # reactivex # torch -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/huggingface/tunableop_results0.csv b/benchmarks/huggingface/tunableop_results0.csv deleted file mode 100644 index 6a38d561a..000000000 --- a/benchmarks/huggingface/tunableop_results0.csv +++ /dev/null @@ -1,17 +0,0 @@ -Validator,PT_VERSION,2.4.0 -Validator,ROCBLAS_VERSION,4.0.0-88df9726-dirty -Validator,HIPBLASLT_VERSION,0.6.0-592518e7 -Validator,ROCM_VERSION,6.0.0.0-91-08e5094 -Validator,GCN_ARCH_NAME,gfx942:sramecc+:xnack- -GemmTunableOp_float_NT,nt_768_3072_16384,Gemm_Rocblas_69720,0.751226 -GemmTunableOp_float_NT,nt_3072_768_16384,Gemm_Rocblas_69733,0.684042 -GemmTunableOp_float_NT,nt_768_768_16384,Gemm_Hipblaslt_NT_28806,0.264226 -GemmTunableOp_float_NT,nt_768_30522_16384,Gemm_Hipblaslt_NT_27808,5.73919 -GemmTunableOp_float_NN,nn_768_16384_3072,Gemm_Hipblaslt_NN_33293,0.701076 -GemmTunableOp_float_NN,nn_768_16384_768,Gemm_Hipblaslt_NN_33685,0.209309 -GemmTunableOp_float_NN,nn_3072_16384_768,Gemm_Hipblaslt_NN_33225,0.69655 -GemmTunableOp_float_NN,nn_768_16384_30522,Gemm_Hipblaslt_NN_33924,5.81957 -GemmTunableOp_float_TN,tn_30522_16384_768,Default,6.06459 -GemmTunableOp_float_TN,tn_768_16384_3072,Gemm_Hipblaslt_TN_34830,0.584625 -GemmTunableOp_float_TN,tn_3072_16384_768,Gemm_Rocblas_69037,0.742789 -GemmTunableOp_float_TN,tn_768_16384_768,Gemm_Rocblas_69047,0.211827 diff --git a/benchmarks/lightning/requirements.cuda.txt b/benchmarks/lightning/requirements.cuda.txt index 2cb0780c3..d6823c252 100644 --- a/benchmarks/lightning/requirements.cuda.txt +++ b/benchmarks/lightning/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com aiohappyeyeballs==2.4.0 @@ -41,7 +42,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -61,12 +62,12 @@ fsspec[http]==2024.6.1 # lightning # pytorch-lightning # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # yarl @@ -116,7 +117,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -125,7 +126,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -263,7 +264,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -294,7 +295,7 @@ torchcompat==1.1.4 # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt # -r benchmarks/lightning/requirements.in -torchmetrics==1.4.1 +torchmetrics==1.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # lightning @@ -317,10 +318,11 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-cuda-torch.txt # lightning # lightning-utilities + # multidict # pytorch-lightning # reactivex # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -333,7 +335,7 @@ xformers==0.0.27.post2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r .pin/../constraints/extra/torch.cuda.txt -yarl==1.11.0 +yarl==1.11.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp diff --git a/benchmarks/llama/requirements.cuda.txt b/benchmarks/llama/requirements.cuda.txt index a9a5f3e7a..7d972b40f 100644 --- a/benchmarks/llama/requirements.cuda.txt +++ b/benchmarks/llama/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com aiohappyeyeballs==2.4.0 @@ -50,7 +51,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -datasets==2.21.0 +datasets==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/llama/requirements.in @@ -59,7 +60,7 @@ dill==0.3.8 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # multiprocess -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -90,18 +91,18 @@ fsspec[http]==2024.6.1 # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # tokenizers # transformers -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -138,7 +139,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -147,7 +148,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -285,7 +286,7 @@ python-dateutil==2.9.0.post0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas @@ -300,7 +301,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2024.7.24 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -310,7 +311,7 @@ requests==2.32.3 # datasets # huggingface-hub # transformers -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -369,17 +370,18 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # multidict # reactivex # torch tzdata==2024.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -396,7 +398,7 @@ xxhash==3.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets -yarl==1.11.0 +yarl==1.11.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp diff --git a/benchmarks/llava/main.py b/benchmarks/llava/main.py index 94aec57b6..879baca01 100755 --- a/benchmarks/llava/main.py +++ b/benchmarks/llava/main.py @@ -103,6 +103,11 @@ def batch_size_fn(batch): inputs = processor( text=prompt, images=image, return_tensors="pt", padding=True ) + + labels = inputs["input_ids"].clone() + labels[labels == processor.tokenizer.pad_token_id] = -100 + inputs["labels"] = labels + inputs = { k: v.to( accelerator.device, @@ -111,8 +116,6 @@ def batch_size_fn(batch): for k, v in inputs.items() } - inputs["labels"] = inputs["input_ids"] - outputs = model(**inputs) loss = outputs.loss diff --git a/benchmarks/llava/requirements.cuda.txt b/benchmarks/llava/requirements.cuda.txt index bb2638766..02cc24fbc 100644 --- a/benchmarks/llava/requirements.cuda.txt +++ b/benchmarks/llava/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com accelerate==0.34.2 @@ -54,7 +55,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -datasets==2.21.0 +datasets==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/llava/requirements.in @@ -63,7 +64,7 @@ dill==0.3.8 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # multiprocess -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -86,19 +87,19 @@ fsspec[http]==2024.6.1 # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # datasets # tokenizers # transformers -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -135,7 +136,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -144,7 +145,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -289,7 +290,7 @@ python-dateutil==2.9.0.post0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas @@ -305,7 +306,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2024.7.24 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -315,7 +316,7 @@ requests==2.32.3 # datasets # huggingface-hub # transformers -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -366,17 +367,18 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # multidict # reactivex # torch tzdata==2024.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -393,7 +395,7 @@ xxhash==3.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets -yarl==1.11.0 +yarl==1.11.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp diff --git a/benchmarks/llm/requirements.cuda.txt b/benchmarks/llm/requirements.cuda.txt index 976e4eafd..0e1e0010a 100644 --- a/benchmarks/llm/requirements.cuda.txt +++ b/benchmarks/llm/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com accelerate==0.34.2 @@ -63,7 +64,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -datasets==2.21.0 +datasets==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchtune @@ -72,7 +73,7 @@ dill==0.3.8 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # multiprocess -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -105,7 +106,7 @@ fsspec[http]==2024.6.1 # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -114,7 +115,7 @@ hjson==3.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # argklass -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -122,7 +123,7 @@ huggingface-hub==0.24.6 # tokenizers # torchtune # transformers -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -167,7 +168,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -176,7 +177,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -323,7 +324,7 @@ python-dateutil==2.9.0.post0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas @@ -340,7 +341,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2024.7.24 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tiktoken @@ -352,7 +353,7 @@ requests==2.32.3 # huggingface-hub # tiktoken # transformers -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -428,18 +429,19 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # multidict # reactivex # torch tzdata==2024.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # blobfile # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving @@ -456,7 +458,7 @@ xxhash==3.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets -yarl==1.11.0 +yarl==1.11.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp diff --git a/benchmarks/llm/tune b/benchmarks/llm/tune deleted file mode 160000 index a83eeff00..000000000 --- a/benchmarks/llm/tune +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a83eeff0079a73ee04a11e8fc2573ed8f671b231 diff --git a/benchmarks/purejaxrl/dqn.py b/benchmarks/purejaxrl/dqn.py index 16fa55f52..17c839147 100644 --- a/benchmarks/purejaxrl/dqn.py +++ b/benchmarks/purejaxrl/dqn.py @@ -11,8 +11,8 @@ import optax import flax.linen as nn from flax.training.train_state import TrainState -from gymnax.wrappers.purerl import FlattenObservationWrapper, LogWrapper import gymnax +from gymnax.wrappers.purerl import FlattenObservationWrapper, LogWrapper import flashbax as fbx from benchmate.metrics import give_push diff --git a/benchmarks/purejaxrl/requirements.cuda.txt b/benchmarks/purejaxrl/requirements.cuda.txt index aa28e8cfe..a59468762 100644 --- a/benchmarks/purejaxrl/requirements.cuda.txt +++ b/benchmarks/purejaxrl/requirements.cuda.txt @@ -6,10 +6,13 @@ # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 +--find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com absl-py==2.1.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # chex # distrax @@ -22,27 +25,45 @@ absl-py==2.1.0 # rlax # tensorflow-probability antlr4-python3-runtime==4.9.3 - # via omegaconf + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf argklass==1.4.4 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in astroid==3.2.4 - # via pylint + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pylint asttokens==2.4.1 - # via giving + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving black==24.8.0 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix blinker==1.8.2 - # via flask + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # flask brax==0.10.5 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in certifi==2024.8.30 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # sentry-sdk charset-normalizer==3.3.2 - # via requests + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests chex==0.1.86 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # distrax # evosax # flashbax @@ -51,75 +72,116 @@ chex==0.1.86 # rlax click==8.1.7 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # black # flask # wandb cloudpickle==3.0.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # gym # gymnasium # tensorflow-probability codefind==0.1.7 - # via ptera + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera contextlib2==21.6.0 - # via ml-collections + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ml-collections contourpy==1.3.0 - # via matplotlib + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # matplotlib cycler==0.12.1 - # via matplotlib + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # matplotlib decorator==5.1.1 - # via tensorflow-probability + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tensorflow-probability dill==0.3.8 - # via pylint + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pylint distrax==0.1.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/purejaxrl/requirements.in # rlax dm-env==1.6 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # rlax dm-tree==0.1.8 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # dm-env # tensorflow-probability docker-pycreds==0.4.0 - # via wandb + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # wandb docstring-parser==0.16 - # via tyro + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tyro dotmap==1.3.30 - # via evosax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # evosax etils[epath,epy]==1.9.4 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # mujoco # mujoco-mjx # optax # orbax-checkpoint evosax==0.1.6 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in exceptiongroup==1.2.2 - # via pytest -executing==1.2.0 - # via varname + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pytest +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname farama-notifications==0.0.4 - # via gymnasium + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gymnasium filelock==3.16.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton flake8==7.1.1 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix flashbax==0.1.2 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in flask==3.0.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # flask-cors flask-cors==5.0.0 - # via brax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax flax==0.9.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/purejaxrl/requirements.in # brax # evosax @@ -127,53 +189,90 @@ flax==0.9.0 # gymnax # navix fonttools==4.53.1 - # via matplotlib -fsspec==2024.9.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # matplotlib +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # etils # torch gast==0.6.0 - # via tensorflow-probability + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tensorflow-probability gitdb==4.0.11 - # via gitpython + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gitpython gitpython==3.1.43 - # via wandb -giving==0.4.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # wandb +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir glfw==2.7.0 - # via mujoco + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # mujoco grpcio==1.66.1 - # via brax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax gym==0.26.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # gymnax gym-notices==0.0.8 - # via gym + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gym gymnasium==0.29.1 - # via gymnax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gymnax gymnax==0.0.8 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -c .pin/../constraints/cuda.txt + # -r benchmarks/purejaxrl/requirements.in hjson==3.1.0 - # via argklass + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # argklass humanize==4.10.0 - # via orbax-checkpoint -idna==3.8 - # via requests + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # orbax-checkpoint +idna==3.10 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests importlib-resources==6.4.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # argklass # etils iniconfig==2.0.0 - # via pytest + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pytest isort==5.13.2 - # via pylint + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pylint itsdangerous==2.2.0 - # via flask -jax==0.4.31 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # flask +jax[cuda12]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt # -r benchmarks/purejaxrl/requirements.in # brax # chex @@ -187,8 +286,17 @@ jax==0.4.31 # optax # orbax-checkpoint # rlax +jax-cuda12-pjrt==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin +jax-cuda12-plugin[with-cuda]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax jaxlib==0.4.31 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # chex # distrax @@ -202,60 +310,90 @@ jaxlib==0.4.31 # orbax-checkpoint # rlax jaxopt==0.8.3 - # via brax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax jinja2==3.1.4 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # flask # torch kiwisolver==1.4.7 - # via matplotlib + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # matplotlib markdown-it-py==3.0.0 - # via rich + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich markupsafe==2.1.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 # werkzeug matplotlib==3.9.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # evosax # gymnax # seaborn mccabe==0.7.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # flake8 # pylint mdurl==0.1.2 - # via markdown-it-py + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py ml-collections==0.1.1 - # via brax -ml-dtypes==0.4.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax # jaxlib # tensorstore mpmath==1.3.0 - # via sympy -msgpack==1.0.8 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy +msgpack==1.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # flax # orbax-checkpoint mujoco==3.2.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # mujoco-mjx mujoco-mjx==3.2.2 - # via brax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax mypy-extensions==1.0.0 - # via black + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # black navix==0.7.0 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in nest-asyncio==1.6.0 - # via orbax-checkpoint + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # orbax-checkpoint networkx==3.3 - # via torch -numpy==2.1.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/purejaxrl/requirements.in # brax # chex @@ -284,112 +422,190 @@ numpy==2.1.1 # tensorflow-probability # tensorstore # trimesh + # xformers nvidia-cublas-cu12==12.1.3.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-cuda-nvcc-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin nvidia-cuda-nvrtc-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch nvidia-cuda-runtime-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch nvidia-cudnn-cu12==9.1.0.70 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch nvidia-cufft-cu12==11.0.2.54 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch nvidia-curand-cu12==10.3.2.106 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch nvidia-cusolver-cu12==11.4.5.107 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch nvidia-cusparse-cu12==12.1.0.106 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # torch nvidia-ml-py==12.560.30 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir nvidia-nccl-cu12==2.20.5 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch nvidia-nvjitlink-cu12==12.6.68 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch omegaconf==2.3.0 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir opt-einsum==3.3.0 - # via jax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax optax==0.2.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/purejaxrl/requirements.in # brax # flax -orbax-checkpoint==0.6.1 +orbax-checkpoint==0.6.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # flax ovld==0.3.9 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir packaging==24.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # black # matplotlib # pytest # setuptools-scm # tensorboardx pandas==2.2.2 - # via seaborn + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # seaborn pathspec==0.12.1 - # via black + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # black pillow==10.4.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # matplotlib # navix -platformdirs==4.3.2 +platformdirs==4.3.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # black # pylint # wandb pluggy==1.5.0 - # via pytest -protobuf==5.28.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pytest +protobuf==5.28.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # orbax-checkpoint # tensorboardx # wandb psutil==5.9.8 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # voir # wandb ptera==1.4.1 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir pycodestyle==2.12.1 - # via flake8 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # flake8 pyflakes==3.2.0 - # via flake8 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # flake8 pygments==2.18.0 - # via rich + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich pylint==3.2.7 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix pyopengl==3.1.7 - # via mujoco + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # mujoco pyparsing==3.1.4 - # via matplotlib -pytest==8.3.2 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # matplotlib +pytest==8.3.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix python-dateutil==2.9.0.post0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # matplotlib # pandas pytinyrenderer==0.0.14 - # via brax -pytz==2024.1 - # via pandas + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas pyyaml==6.0.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # evosax # flax # gymnax @@ -398,73 +614,113 @@ pyyaml==6.0.2 # orbax-checkpoint # wandb reactivex==4.0.4 - # via giving + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving requests==2.32.3 - # via wandb -rich==13.8.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # wandb +rich==13.8.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # flax # tyro # voir rlax==0.1.6 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix scipy==1.14.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # jax # jaxlib # jaxopt # mujoco-mjx seaborn==0.13.2 - # via gymnax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gymnax sentry-sdk==2.14.0 - # via wandb + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # wandb setproctitle==1.3.3 - # via wandb + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # wandb setuptools-scm==8.1.0 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix shtab==1.7.1 - # via tyro + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tyro six==1.16.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens # docker-pycreds # ml-collections # python-dateutil # tensorflow-probability smmap==5.0.1 - # via gitdb + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # gitdb sympy==1.13.2 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch tensorboardx==2.6.2.2 - # via brax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # brax tensorflow-probability==0.24.0 - # via distrax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # distrax tensorstore==0.1.65 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # flashbax # flax # orbax-checkpoint tomli==2.0.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # black # pylint # pytest # setuptools-scm tomlkit==0.13.2 - # via pylint + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pylint toolz==0.12.1 - # via chex -torch==2.4.1+cu121 - # via -r benchmarks/purejaxrl/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # chex +torch==2.4.0+cu121 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # xformers trimesh==4.4.9 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # brax # mujoco-mjx triton==3.0.0 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch typing-extensions==4.12.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # astroid # black # brax @@ -479,25 +735,43 @@ typing-extensions==4.12.2 # torch # tyro tyro==0.8.10 - # via navix + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix tzdata==2024.1 - # via pandas -urllib3==2.2.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # sentry-sdk -varname==0.10.0 - # via giving +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving voir==0.2.19 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt # -r benchmarks/purejaxrl/requirements.in -wandb==0.17.9 - # via navix +wandb==0.18.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # navix werkzeug==3.0.4 - # via flask -zipp==3.20.1 - # via etils + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # flask +xformers==0.0.27.post2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt +zipp==3.20.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # etils # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/benchmarks/recursiongfn/requirements.cuda.txt b/benchmarks/recursiongfn/requirements.cuda.txt index b586dd4c3..89c02624f 100644 --- a/benchmarks/recursiongfn/requirements.cuda.txt +++ b/benchmarks/recursiongfn/requirements.cuda.txt @@ -2,198 +2,223 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=benchmarks/recursiongfn/requirements.cuda.txt .pin/tmp-constraints-cuda-recursiongfn_gnn.txt benchmarks/recursiongfn/requirements.in +# pip-compile --output-file=benchmarks/recursiongfn/requirements.cuda.txt .pin/tmp-constraints-cuda-recursiongfn.txt benchmarks/recursiongfn/requirements.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 ---find-links https://data.pyg.org/whl/torch-2.3.0+cu121.html +--find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com absl-py==2.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard aiohappyeyeballs==2.4.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp aiohttp==3.10.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric aiosignal==1.3.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp antlr4-python3-runtime==4.9.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf asttokens==2.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving async-timeout==4.0.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp attrs==24.2.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp blosc2==2.7.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tables botorch==0.11.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in certifi==2024.8.30 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # sentry-sdk charset-normalizer==3.3.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests click==8.1.7 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # wandb codefind==0.1.7 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera cvxopt==1.3.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in docker-pycreds==0.4.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # wandb -executing==1.2.0 +executing==2.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # varname filelock==3.16.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton frozenlist==1.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # aiosignal -fsspec==2024.9.0 +fsspec==2024.6.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch # torch-geometric gitdb==4.0.11 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # gitpython gitpython==3.1.43 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # wandb -giving==0.4.2 +giving==0.4.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir gpytorch==1.12 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # botorch grpcio==1.66.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -idna==3.8 +idna==3.10 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # yarl +jax[cuda12]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt +jax-cuda12-pjrt==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin +jax-cuda12-plugin[with-cuda]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +jaxlib==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax jaxtyping==0.2.34 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # linear-operator jinja2==3.1.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch # torch-geometric joblib==1.4.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # scikit-learn linear-operator==0.5.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # botorch # gpytorch markdown==3.7 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard markdown-it-py==3.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rich markupsafe==2.1.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 # werkzeug mdurl==0.1.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib mpmath==1.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # botorch # gpytorch # sympy -msgpack==1.0.8 +msgpack==1.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # blosc2 -multidict==6.0.5 +multidict==6.1.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl multipledispatch==1.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # botorch ndindex==1.8 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # blosc2 networkx==3.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # torch numexpr==2.10.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # blosc2 # tables numpy==1.26.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # blosc2 # botorch + # jax + # jaxlib + # ml-dtypes # numexpr # opt-einsum # pandas @@ -205,291 +230,311 @@ numpy==1.26.4 # tables # tensorboard # torch-geometric + # xformers nvidia-cublas-cu12==12.1.3.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch +nvidia-cuda-nvcc-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin nvidia-cuda-nvrtc-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch nvidia-cuda-runtime-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-cufft-cu12==11.0.2.54 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-curand-cu12==10.3.2.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch nvidia-cusolver-cu12==11.4.5.107 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-cusparse-cu12==12.1.0.106 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # torch nvidia-ml-py==12.560.30 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir nvidia-nccl-cu12==2.20.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # torch nvidia-nvjitlink-cu12==12.6.68 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch omegaconf==2.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # voir opt-einsum==3.3.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax # pyro-ppl ovld==0.3.9 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir packaging==24.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tables # tensorboard pandas==2.2.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in pillow==10.4.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rdkit -platformdirs==4.3.2 +platformdirs==4.3.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # wandb -protobuf==5.28.0 +protobuf==5.28.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard # wandb psutil==5.9.8 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric # voir # wandb ptera==1.4.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir py-cpuinfo==9.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # blosc2 # tables pyarrow==17.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in pygments==2.18.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # rich pyparsing==3.1.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric pyro-api==0.1.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pyro-ppl pyro-ppl==1.9.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # botorch python-dateutil==2.9.0.post0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas pyyaml==6.0.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf # wandb rdkit==2024.3.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in reactivex==4.0.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving requests==2.32.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch-geometric # wandb -rich==13.8.0 +rich==13.8.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # voir -scikit-learn==1.5.1 +scikit-learn==1.5.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # gpytorch - # torch-geometric scipy==1.14.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # botorch # gpytorch + # jax + # jaxlib # linear-operator # scikit-learn # torch-cluster - # torch-geometric # torch-sparse sentry-sdk==2.14.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # wandb setproctitle==1.3.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # wandb six==1.16.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens # docker-pycreds # python-dateutil # tensorboard smmap==5.0.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # gitdb sympy==1.13.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch tables==3.10.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in tensorboard==2.17.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in tensorboard-data-server==0.7.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard threadpoolctl==3.5.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # scikit-learn -torch==2.3.1+cu121 +torch==2.4.0+cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt - # -r .pin/../constraints/extra/gnn.cuda.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in # botorch # linear-operator # pyro-ppl -torch-cluster==1.6.3+pt23cu121 + # xformers +torch-cluster==1.6.3+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in -torch-geometric==2.5.3 +torch-geometric==2.6.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in -torch-scatter==2.1.2+pt23cu121 +torch-scatter==2.1.2+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in -torch-sparse==0.6.18+pt23cu121 +torch-sparse==0.6.18+pt24cu121 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in tqdm==4.66.5 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pyro-ppl # torch-geometric -triton==2.3.1 +triton==3.0.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # torch typeguard==2.13.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # jaxtyping # linear-operator typing-extensions==4.12.2 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt + # multidict # reactivex # tables # torch tzdata==2024.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==2.2.2 +urllib3==2.2.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # sentry-sdk -varname==0.10.0 +varname==0.13.3 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # giving voir==0.2.19 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt # -r benchmarks/recursiongfn/requirements.in -wandb==0.17.9 +wandb==0.18.0 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/recursiongfn/requirements.in werkzeug==3.0.4 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -yarl==1.11.0 +xformers==0.0.27.post2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt +yarl==1.11.1 # via - # -c .pin/../.pin/constraints-cuda-gnn.txt + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # The following packages are considered to be unsafe in a requirements file: diff --git a/benchmarks/rwkv/README.md b/benchmarks/retired/rwkv/README.md similarity index 100% rename from benchmarks/rwkv/README.md rename to benchmarks/retired/rwkv/README.md diff --git a/benchmarks/rwkv/benchfile.py b/benchmarks/retired/rwkv/benchfile.py similarity index 100% rename from benchmarks/rwkv/benchfile.py rename to benchmarks/retired/rwkv/benchfile.py diff --git a/benchmarks/rwkv/prepare.py b/benchmarks/retired/rwkv/prepare.py similarity index 100% rename from benchmarks/rwkv/prepare.py rename to benchmarks/retired/rwkv/prepare.py diff --git a/benchmarks/rwkv/requirements.cuda.txt b/benchmarks/retired/rwkv/requirements.cuda.txt similarity index 100% rename from benchmarks/rwkv/requirements.cuda.txt rename to benchmarks/retired/rwkv/requirements.cuda.txt diff --git a/benchmarks/rwkv/requirements.hpu.txt b/benchmarks/retired/rwkv/requirements.hpu.txt similarity index 100% rename from benchmarks/rwkv/requirements.hpu.txt rename to benchmarks/retired/rwkv/requirements.hpu.txt diff --git a/benchmarks/rwkv/requirements.in b/benchmarks/retired/rwkv/requirements.in similarity index 100% rename from benchmarks/rwkv/requirements.in rename to benchmarks/retired/rwkv/requirements.in diff --git a/benchmarks/rwkv/requirements.rocm.txt b/benchmarks/retired/rwkv/requirements.rocm.txt similarity index 100% rename from benchmarks/rwkv/requirements.rocm.txt rename to benchmarks/retired/rwkv/requirements.rocm.txt diff --git a/benchmarks/rwkv/requirements.xpu.txt b/benchmarks/retired/rwkv/requirements.xpu.txt similarity index 100% rename from benchmarks/rwkv/requirements.xpu.txt rename to benchmarks/retired/rwkv/requirements.xpu.txt diff --git a/benchmarks/rwkv/rwkv-v4neo/20B_tokenizer.json b/benchmarks/retired/rwkv/rwkv-v4neo/20B_tokenizer.json similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/20B_tokenizer.json rename to benchmarks/retired/rwkv/rwkv-v4neo/20B_tokenizer.json diff --git a/benchmarks/rwkv/rwkv-v4neo/LICENSE b/benchmarks/retired/rwkv/rwkv-v4neo/LICENSE similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/LICENSE rename to benchmarks/retired/rwkv/rwkv-v4neo/LICENSE diff --git a/benchmarks/rwkv/rwkv-v4neo/ORIGIN.md b/benchmarks/retired/rwkv/rwkv-v4neo/ORIGIN.md similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/ORIGIN.md rename to benchmarks/retired/rwkv/rwkv-v4neo/ORIGIN.md diff --git a/benchmarks/rwkv/rwkv-v4neo/chat.py b/benchmarks/retired/rwkv/rwkv-v4neo/chat.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/chat.py rename to benchmarks/retired/rwkv/rwkv-v4neo/chat.py diff --git a/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu b/benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu rename to benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu diff --git a/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda_bf16.cu b/benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_cuda_bf16.cu similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda_bf16.cu rename to benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_cuda_bf16.cu diff --git a/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op.cpp b/benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_op.cpp similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op.cpp rename to benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_op.cpp diff --git a/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op_bf16.cpp b/benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_op_bf16.cpp similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op_bf16.cpp rename to benchmarks/retired/rwkv/rwkv-v4neo/cuda/wkv_op_bf16.cpp diff --git a/benchmarks/rwkv/rwkv-v4neo/img_demoAE.py b/benchmarks/retired/rwkv/rwkv-v4neo/img_demoAE.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/img_demoAE.py rename to benchmarks/retired/rwkv/rwkv-v4neo/img_demoAE.py diff --git a/benchmarks/rwkv/rwkv-v4neo/run.py b/benchmarks/retired/rwkv/rwkv-v4neo/run.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/run.py rename to benchmarks/retired/rwkv/rwkv-v4neo/run.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/__init__.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/__init__.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/__init__.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/__init__.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/binidx.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/binidx.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/binidx.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/binidx.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/dataset.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/dataset.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/dataset.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/dataset.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/model.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/model.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/model.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model_img.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/model_img.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/model_img.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/model_img.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model_run.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/model_run.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/model_run.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/model_run.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/trainer.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/trainer.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/trainer.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/trainer.py diff --git a/benchmarks/rwkv/rwkv-v4neo/src/utils.py b/benchmarks/retired/rwkv/rwkv-v4neo/src/utils.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/src/utils.py rename to benchmarks/retired/rwkv/rwkv-v4neo/src/utils.py diff --git a/benchmarks/rwkv/rwkv-v4neo/train.py b/benchmarks/retired/rwkv/rwkv-v4neo/train.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/train.py rename to benchmarks/retired/rwkv/rwkv-v4neo/train.py diff --git a/benchmarks/rwkv/rwkv-v4neo/verify.py b/benchmarks/retired/rwkv/rwkv-v4neo/verify.py similarity index 100% rename from benchmarks/rwkv/rwkv-v4neo/verify.py rename to benchmarks/retired/rwkv/rwkv-v4neo/verify.py diff --git a/benchmarks/rwkv/voirfile.py b/benchmarks/retired/rwkv/voirfile.py similarity index 100% rename from benchmarks/rwkv/voirfile.py rename to benchmarks/retired/rwkv/voirfile.py diff --git a/benchmarks/super-slomo/README.md b/benchmarks/retired/super-slomo/README.md similarity index 100% rename from benchmarks/super-slomo/README.md rename to benchmarks/retired/super-slomo/README.md diff --git a/benchmarks/super-slomo/benchfile.py b/benchmarks/retired/super-slomo/benchfile.py similarity index 100% rename from benchmarks/super-slomo/benchfile.py rename to benchmarks/retired/super-slomo/benchfile.py diff --git a/benchmarks/super-slomo/prepare.py b/benchmarks/retired/super-slomo/prepare.py similarity index 100% rename from benchmarks/super-slomo/prepare.py rename to benchmarks/retired/super-slomo/prepare.py diff --git a/benchmarks/super-slomo/requirements.cuda.txt b/benchmarks/retired/super-slomo/requirements.cuda.txt similarity index 99% rename from benchmarks/super-slomo/requirements.cuda.txt rename to benchmarks/retired/super-slomo/requirements.cuda.txt index b2a5dc620..66ce02581 100644 --- a/benchmarks/super-slomo/requirements.cuda.txt +++ b/benchmarks/retired/super-slomo/requirements.cuda.txt @@ -205,7 +205,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir diff --git a/benchmarks/super-slomo/requirements.hpu.txt b/benchmarks/retired/super-slomo/requirements.hpu.txt similarity index 100% rename from benchmarks/super-slomo/requirements.hpu.txt rename to benchmarks/retired/super-slomo/requirements.hpu.txt diff --git a/benchmarks/super-slomo/requirements.in b/benchmarks/retired/super-slomo/requirements.in similarity index 100% rename from benchmarks/super-slomo/requirements.in rename to benchmarks/retired/super-slomo/requirements.in diff --git a/benchmarks/super-slomo/requirements.rocm.txt b/benchmarks/retired/super-slomo/requirements.rocm.txt similarity index 100% rename from benchmarks/super-slomo/requirements.rocm.txt rename to benchmarks/retired/super-slomo/requirements.rocm.txt diff --git a/benchmarks/super-slomo/requirements.xpu.txt b/benchmarks/retired/super-slomo/requirements.xpu.txt similarity index 100% rename from benchmarks/super-slomo/requirements.xpu.txt rename to benchmarks/retired/super-slomo/requirements.xpu.txt diff --git a/benchmarks/super-slomo/slomo/LICENSE b/benchmarks/retired/super-slomo/slomo/LICENSE similarity index 100% rename from benchmarks/super-slomo/slomo/LICENSE rename to benchmarks/retired/super-slomo/slomo/LICENSE diff --git a/benchmarks/super-slomo/slomo/ORIGIN.md b/benchmarks/retired/super-slomo/slomo/ORIGIN.md similarity index 100% rename from benchmarks/super-slomo/slomo/ORIGIN.md rename to benchmarks/retired/super-slomo/slomo/ORIGIN.md diff --git a/benchmarks/super-slomo/slomo/README.md b/benchmarks/retired/super-slomo/slomo/README.md similarity index 100% rename from benchmarks/super-slomo/slomo/README.md rename to benchmarks/retired/super-slomo/slomo/README.md diff --git a/benchmarks/super-slomo/slomo/data/adobe240fps/test_list.txt b/benchmarks/retired/super-slomo/slomo/data/adobe240fps/test_list.txt similarity index 100% rename from benchmarks/super-slomo/slomo/data/adobe240fps/test_list.txt rename to benchmarks/retired/super-slomo/slomo/data/adobe240fps/test_list.txt diff --git a/benchmarks/super-slomo/slomo/data/adobe240fps/train_list.txt b/benchmarks/retired/super-slomo/slomo/data/adobe240fps/train_list.txt similarity index 100% rename from benchmarks/super-slomo/slomo/data/adobe240fps/train_list.txt rename to benchmarks/retired/super-slomo/slomo/data/adobe240fps/train_list.txt diff --git a/benchmarks/super-slomo/slomo/data/create_dataset.py b/benchmarks/retired/super-slomo/slomo/data/create_dataset.py similarity index 100% rename from benchmarks/super-slomo/slomo/data/create_dataset.py rename to benchmarks/retired/super-slomo/slomo/data/create_dataset.py diff --git a/benchmarks/super-slomo/slomo/dataloader.py b/benchmarks/retired/super-slomo/slomo/dataloader.py similarity index 100% rename from benchmarks/super-slomo/slomo/dataloader.py rename to benchmarks/retired/super-slomo/slomo/dataloader.py diff --git a/benchmarks/super-slomo/slomo/eval.py b/benchmarks/retired/super-slomo/slomo/eval.py similarity index 100% rename from benchmarks/super-slomo/slomo/eval.py rename to benchmarks/retired/super-slomo/slomo/eval.py diff --git a/benchmarks/super-slomo/slomo/model.py b/benchmarks/retired/super-slomo/slomo/model.py similarity index 100% rename from benchmarks/super-slomo/slomo/model.py rename to benchmarks/retired/super-slomo/slomo/model.py diff --git a/benchmarks/super-slomo/slomo/synth.py b/benchmarks/retired/super-slomo/slomo/synth.py similarity index 100% rename from benchmarks/super-slomo/slomo/synth.py rename to benchmarks/retired/super-slomo/slomo/synth.py diff --git a/benchmarks/super-slomo/slomo/train.py b/benchmarks/retired/super-slomo/slomo/train.py similarity index 100% rename from benchmarks/super-slomo/slomo/train.py rename to benchmarks/retired/super-slomo/slomo/train.py diff --git a/benchmarks/super-slomo/voirfile.py b/benchmarks/retired/super-slomo/voirfile.py similarity index 100% rename from benchmarks/super-slomo/voirfile.py rename to benchmarks/retired/super-slomo/voirfile.py diff --git a/benchmarks/rlhf/main.py b/benchmarks/rlhf/main.py index 5ddf459e4..0be12d282 100755 --- a/benchmarks/rlhf/main.py +++ b/benchmarks/rlhf/main.py @@ -17,8 +17,9 @@ class PPOv2TrainerIntrumented(PPOv2Trainer): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, config: PPOv2Config, *args, **kwargs): + config.report_to = [] + super().__init__(config, *args, **kwargs) def batch_size_fn(batch): x, y = batch['input_ids'].shape @@ -45,6 +46,7 @@ def save_model(self, *args, **kwargs): def main(): + parser = HfArgumentParser((PPOv2Config, ModelConfig)) config, model_config = parser.parse_args_into_dataclasses() # remove output_dir if exists diff --git a/benchmarks/rlhf/requirements.cuda.txt b/benchmarks/rlhf/requirements.cuda.txt index 764afb978..12a24c6c4 100644 --- a/benchmarks/rlhf/requirements.cuda.txt +++ b/benchmarks/rlhf/requirements.cuda.txt @@ -2,53 +2,81 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=benchmarks/rlhf/requirements.cuda.txt .pin/tmp-constraints-cuda-rlhf.txt benchmarks/rlhf/requirements.in +# pip-compile --output-file=benchmarks/rlhf/requirements.cuda.txt .pin/tmp-constraints-cuda-rlhf-gpus.txt benchmarks/rlhf/requirements.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com accelerate==0.34.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/rlhf/requirements.in # trl aiohappyeyeballs==2.4.0 - # via aiohttp + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # aiohttp aiohttp==3.10.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # fsspec aiosignal==1.3.1 - # via aiohttp + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # aiohttp antlr4-python3-runtime==4.9.3 - # via omegaconf + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf asttokens==2.4.1 - # via giving + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving async-timeout==4.0.3 - # via aiohttp + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # aiohttp attrs==24.2.0 - # via aiohttp + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # aiohttp certifi==2024.8.30 - # via requests + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests charset-normalizer==3.3.2 - # via requests + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests codefind==0.1.7 - # via ptera -datasets==2.21.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera +datasets==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/rlhf/requirements.in # trl dill==0.3.8 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # multiprocess docstring-parser==0.16 - # via tyro -executing==1.2.0 - # via varname + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tyro +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname filelock==3.16.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # huggingface-hub # torch @@ -56,59 +84,89 @@ filelock==3.16.0 # triton frozenlist==1.4.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # aiosignal fsspec[http]==2024.6.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # datasets # tokenizers # transformers -idna==3.8 +idna==3.10 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # requests # yarl jax[cuda12]==0.4.31 - # via -r .pin/../constraints/extra/torch.cuda.txt + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt jax-cuda12-pjrt==0.4.31 - # via jax-cuda12-plugin + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin jax-cuda12-plugin[with-cuda]==0.4.31 - # via jax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax jaxlib==0.4.31 - # via jax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax jinja2==3.1.4 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch markdown-it-py==3.0.0 - # via rich + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich markupsafe==2.1.5 - # via jinja2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jinja2 mdurl==0.1.2 - # via markdown-it-py -ml-dtypes==0.4.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax # jaxlib mpmath==1.3.0 - # via sympy + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy multidict==6.1.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl multiprocess==0.70.16 - # via datasets + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # datasets networkx==3.3 - # via torch -numpy==2.1.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # datasets # jax @@ -123,161 +181,239 @@ numpy==2.1.1 # xformers nvidia-cublas-cu12==12.1.3.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-cuda-nvcc-cu12==12.6.68 - # via jax-cuda12-plugin + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin nvidia-cuda-nvrtc-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch nvidia-cuda-runtime-cu12==12.1.105 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-cudnn-cu12==9.1.0.70 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-cufft-cu12==11.0.2.54 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-curand-cu12==10.3.2.106 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch nvidia-cusolver-cu12==11.4.5.107 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-cusparse-cu12==12.1.0.106 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # nvidia-cusolver-cu12 # torch nvidia-ml-py==12.560.30 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # torch nvidia-nvjitlink-cu12==12.6.68 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax-cuda12-plugin # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch omegaconf==2.3.0 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir opt-einsum==3.3.0 - # via jax + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax ovld==0.3.9 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir packaging==24.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # datasets # huggingface-hub # transformers pandas==2.2.2 - # via datasets + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # datasets psutil==5.9.8 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # voir ptera==1.4.1 - # via voir + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir pyarrow==17.0.0 - # via datasets + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # datasets pygments==2.18.0 - # via rich + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich python-dateutil==2.9.0.post0 - # via pandas -pytz==2024.1 - # via pandas + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas pyyaml==6.0.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # datasets # huggingface-hub # omegaconf # transformers reactivex==4.0.4 - # via giving -regex==2024.7.24 - # via transformers + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +regex==2024.9.11 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # transformers requests==2.32.3 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # huggingface-hub # transformers rich==13.8.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # tyro # voir safetensors==0.4.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate # transformers scipy==1.14.1 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # jax # jaxlib shtab==1.7.1 - # via tyro + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # tyro six==1.16.0 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens # python-dateutil sympy==1.13.2 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch tokenizers==0.19.1 - # via transformers + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # transformers torch==2.4.0+cu121 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/rlhf/requirements.in # accelerate # trl # xformers tqdm==4.66.5 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # huggingface-hub # transformers transformers==4.44.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/rlhf/requirements.in # trl triton==3.0.0 - # via torch + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch trl==0.10.1 - # via -r benchmarks/rlhf/requirements.in + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/rlhf/requirements.in typing-extensions==4.12.2 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # multidict # reactivex # torch # tyro tyro==0.8.10 - # via trl + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # trl tzdata==2024.1 - # via pandas -urllib3==2.2.2 - # via requests -varname==0.10.0 - # via giving + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving voir==0.2.19 # via + # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt # -r benchmarks/rlhf/requirements.in xformers==0.0.27.post2 - # via -r .pin/../constraints/extra/torch.cuda.txt + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt xxhash==3.5.0 - # via datasets + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # datasets yarl==1.11.1 - # via aiohttp + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # aiohttp diff --git a/benchmarks/timm/requirements.cuda.txt b/benchmarks/timm/requirements.cuda.txt index 98586aca3..4554f91ec 100644 --- a/benchmarks/timm/requirements.cuda.txt +++ b/benchmarks/timm/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -29,7 +30,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -44,16 +45,16 @@ fsspec==2024.6.1 # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.24.6 +huggingface-hub==0.24.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/timm/requirements.in -idna==3.8 +idna==3.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -89,7 +90,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -227,7 +228,7 @@ requests==2.32.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -272,11 +273,11 @@ typing-extensions==4.12.2 # huggingface-hub # reactivex # torch -urllib3==2.2.2 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/torchatari/main.py b/benchmarks/torchatari/main.py index 62c9b3a07..bf5b7ef65 100644 --- a/benchmarks/torchatari/main.py +++ b/benchmarks/torchatari/main.py @@ -97,7 +97,12 @@ def reset(self, **kwargs): return observations def step(self, action): - observations, rewards, dones, infos = super().step(action) + # np, np, np, np, dict + data = super().step(action) + + # FIXME: make sure this is valid + observations, rewards, terminated, truncated, infos = data + self.episode_returns += infos["reward"] self.episode_lengths += 1 self.returned_episode_returns[:] = self.episode_returns @@ -109,7 +114,7 @@ def step(self, action): return ( observations, rewards, - dones, + terminated, infos, ) @@ -211,7 +216,10 @@ def main(): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) + state, _ = envs.reset() + + # print(type(a), type(b)) + next_obs = torch.Tensor(state).to(device) next_done = torch.zeros(args.num_envs).to(device) iterations = range(1, args.num_iterations + 1) diff --git a/benchmarks/torchatari/requirements.cuda.txt b/benchmarks/torchatari/requirements.cuda.txt index b1a6d380b..2b0aa99d6 100644 --- a/benchmarks/torchatari/requirements.cuda.txt +++ b/benchmarks/torchatari/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com absl-py==2.1.0 @@ -55,7 +56,7 @@ envpool==0.8.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/torchatari/requirements.in -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -72,7 +73,7 @@ fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -81,7 +82,7 @@ grpcio==1.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -gym==0.23.1 +gym==0.26.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/torchatari/requirements.in @@ -136,7 +137,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -254,7 +255,7 @@ packaging==24.1 # -c .pin/../.pin/constraints-cuda-torch.txt # envpool # tensorboard -protobuf==5.28.0 +protobuf==5.28.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -278,7 +279,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tyro @@ -340,7 +341,7 @@ tyro==0.8.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -r benchmarks/torchatari/requirements.in -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/torchatari/requirements.in b/benchmarks/torchatari/requirements.in index c264f5563..59ca0358f 100644 --- a/benchmarks/torchatari/requirements.in +++ b/benchmarks/torchatari/requirements.in @@ -1,5 +1,5 @@ envpool -gym==0.23.1 +gym>=0.23.1 numpy torch tyro diff --git a/benchmarks/torchvision/requirements.cuda.txt b/benchmarks/torchvision/requirements.cuda.txt index dc0a16404..6b1a837f0 100644 --- a/benchmarks/torchvision/requirements.cuda.txt +++ b/benchmarks/torchvision/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -21,7 +22,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -34,7 +35,7 @@ fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -75,7 +76,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -203,7 +204,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -248,7 +249,7 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/torchvision_ddp/requirements.cuda.txt b/benchmarks/torchvision_ddp/requirements.cuda.txt index f2ec62699..28c6198b2 100644 --- a/benchmarks/torchvision_ddp/requirements.cuda.txt +++ b/benchmarks/torchvision_ddp/requirements.cuda.txt @@ -7,6 +7,7 @@ --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html --trusted-host pypi.ngc.nvidia.com antlr4-python3-runtime==4.9.3 @@ -21,7 +22,7 @@ codefind==0.1.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname @@ -34,7 +35,7 @@ fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera @@ -75,7 +76,7 @@ mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jax @@ -203,7 +204,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -rich==13.8.0 +rich==13.8.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -248,7 +249,7 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving diff --git a/benchmarks/vjepa/Makefile b/benchmarks/vjepa/Makefile new file mode 100644 index 000000000..b701efd5e --- /dev/null +++ b/benchmarks/vjepa/Makefile @@ -0,0 +1,31 @@ +# Use global base if possible +ifndef MILABENCH_BASE + MILABENCH_BASE="base" +endif + +export MILABENCH_BASE + +BENCH_NAME=vjepa +MILABENCH_CONFIG=dev.yaml +MILABENCH_ARGS=--config $(MILABENCH_CONFIG) --base $(MILABENCH_BASE) + +all: + install prepare single gpus nodes + +install: + milabench install $(MILABENCH_ARGS) --force + +prepare: + milabench prepare $(MILABENCH_ARGS) + +tests: install prepare + milabench run $(MILABENCH_ARGS) + +single: + milabench run $(MILABENCH_ARGS) --select $(BENCH_NAME)-single + +gpus: + milabench run $(MILABENCH_ARGS) --select $(BENCH_NAME)-gpus + +nodes: + milabench run $(MILABENCH_ARGS) --select $(BENCH_NAME)-nodes diff --git a/benchmarks/vjepa/README.md b/benchmarks/vjepa/README.md new file mode 100644 index 000000000..686d7a80e --- /dev/null +++ b/benchmarks/vjepa/README.md @@ -0,0 +1,4 @@ + +# Vjepa + +Rewrite this README to explain what the benchmark is! diff --git a/benchmarks/vjepa/benchfile.py b/benchmarks/vjepa/benchfile.py new file mode 100644 index 000000000..d25b47b53 --- /dev/null +++ b/benchmarks/vjepa/benchfile.py @@ -0,0 +1,46 @@ +from milabench.pack import Package + + + +BRANCH = "3081b0ad7b9651373ccef40c1d46b62f46cb7146" +URL = "https://github.com/facebookresearch/jepa.git" + + +class Vjepa(Package): + # Requirements file installed by install(). It can be empty or absent. + base_requirements = "requirements.in" + + # The preparation script called by prepare(). It must be executable, + # but it can be any type of script. It can be empty or absent. + prepare_script = "prepare.py" + + # The main script called by run(). It must be a Python file. It has to + # be present. + main_script = "main.py" + + # You can remove the functions below if you don't need to modify them. + + def make_env(self): + # Return a dict of environment variables for prepare_script and + # main_script. + return super().make_env() + + async def install(self): + vjepa = self.dirs.code / "jepa" + if not vjepa.exists(): + vjepa.clone_subtree(URL, BRANCH) + + await super().install() # super() call installs the requirements + + async def prepare(self): + await super().prepare() # super() call executes prepare_script + + def build_run_plan(self): + from milabench.commands import TorchrunAllNodes + + # self.config is not the right config for this + plan = super().build_run_plan() + + return TorchrunAllNodes(plan).use_stdout() + +__pack__ = Vjepa diff --git a/benchmarks/vjepa/config/vith16.yaml b/benchmarks/vjepa/config/vith16.yaml new file mode 100644 index 000000000..d1d5461a5 --- /dev/null +++ b/benchmarks/vjepa/config/vith16.yaml @@ -0,0 +1,88 @@ +app: vjepa +nodes: 16 +tasks_per_node: 8 +data: + dataset_type: VideoDataset + datasets: + - /your_path_to_kinetics710_csv_file_index.csv + decode_one_clip: true + batch_size: 24 + num_clips: 1 + num_frames: 16 + tubelet_size: 2 + sampling_rate: 4 + crop_size: 224 + patch_size: 16 + pin_mem: true + num_workers: 12 + filter_short_videos: false + clip_duration: null +data_aug: + auto_augment: false + motion_shift: false + random_resize_aspect_ratio: + - 0.75 + - 1.35 + random_resize_scale: + - 0.3 + - 1.0 + reprob: 0.0 +logging: + folder: /your_absolute_file_path_for_saving_logs_and_checkpoints/ + write_tag: jepa +loss: + loss_exp: 1.0 + reg_coeff: 0.0 +mask: + - aspect_ratio: + - 0.75 + - 1.5 + num_blocks: 8 + spatial_scale: + - 0.15 + - 0.15 + temporal_scale: + - 1.0 + - 1.0 + max_temporal_keep: 1.0 + max_keep: null + - aspect_ratio: + - 0.75 + - 1.5 + num_blocks: 2 + spatial_scale: + - 0.7 + - 0.7 + temporal_scale: + - 1.0 + - 1.0 + max_temporal_keep: 1.0 + max_keep: null +meta: + load_checkpoint: false + read_checkpoint: null + seed: 234 + eval_freq: 100 + use_sdpa: true + dtype: bfloat16 +model: + model_name: vit_huge + pred_depth: 12 + pred_embed_dim: 384 + uniform_power: true + use_mask_tokens: true + zero_init_mask_tokens: true +optimization: + ipe: 300 + ipe_scale: 1.25 + clip_grad: 10.0 + weight_decay: 0.04 + final_weight_decay: 0.4 + epochs: 300 + warmup: 40 + start_lr: 0.0002 + lr: 0.000625 + final_lr: 1.0e-06 + ema: + - 0.998 + - 1.0 \ No newline at end of file diff --git a/benchmarks/vjepa/dev.yaml b/benchmarks/vjepa/dev.yaml new file mode 100644 index 000000000..7c374e3f8 --- /dev/null +++ b/benchmarks/vjepa/dev.yaml @@ -0,0 +1,32 @@ + + +_vjepa: + inherits: _defaults + definition: . + install-variant: unpinned + install_group: torch + + argv: + --dataset: "{milabench_data}/FakeVideo/video_metainfo.csv" + --output: "{milabench_extra}" + +vjepa-single: + inherits: _vjepa + plan: + method: per_gpu + +vjepa-gpus: + inherits: _vjepa + plan: + method: njobs + n: 1 + +vjepa-nodes: + inherits: _vjepa + plan: + method: njobs + n: 1 + + num_machines: 2 + requires_capabilities: + - "len(nodes) >= ${num_machines}" \ No newline at end of file diff --git a/benchmarks/vjepa/main.py b/benchmarks/vjepa/main.py new file mode 100644 index 000000000..74ca606f7 --- /dev/null +++ b/benchmarks/vjepa/main.py @@ -0,0 +1,656 @@ +#!/usr/bin/env python + +import os +import copy +import time +import sys + +current_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(current_dir, 'jepa')) + + +import numpy as np +import torch +import torch.multiprocessing as mp +import torch.nn.functional as F +from torch.nn.parallel import DistributedDataParallel +import torchcompat.core as acc + +from src.datasets.data_manager import init_data +from src.masks.random_tube import MaskCollator as TubeMaskCollator +from src.masks.multiblock3d import MaskCollator as MB3DMaskCollator +from src.masks.utils import apply_masks +from src.utils.distributed import init_distributed, AllReduce +from src.utils.logging import ( + CSVLogger, + gpu_timer, + get_logger, + grad_logger, + adamw_logger, + AverageMeter) +from src.utils.tensors import repeat_interleave_batch + +from app.vjepa.utils import ( + load_checkpoint, + init_video_model, + init_opt, +) +from app.vjepa.transforms import make_transforms + + +# -- +log_timings = True +log_freq = 10 +checkpoint_freq = 1 +# -- + +_GLOBAL_SEED = 0 +np.random.seed(_GLOBAL_SEED) +torch.manual_seed(_GLOBAL_SEED) +torch.backends.cudnn.benchmark = True + + +logger = get_logger(__name__) + + +def _main(args, resume_preempt=False): + # ----------------------------------------------------------------------- # + # PASSED IN PARAMS FROM CONFIG FILE + # ----------------------------------------------------------------------- # + + # -- META + cfgs_meta = args.get('meta') + load_model = cfgs_meta.get('load_checkpoint') or resume_preempt + r_file = cfgs_meta.get('read_checkpoint', None) + seed = cfgs_meta.get('seed', _GLOBAL_SEED) + save_every_freq = cfgs_meta.get('save_every_freq', -1) + skip_batches = cfgs_meta.get('skip_batches', -1) + use_sdpa = cfgs_meta.get('use_sdpa', False) + which_dtype = cfgs_meta.get('dtype') + logger.info(f'{which_dtype=}') + if which_dtype.lower() == 'bfloat16': + dtype = torch.bfloat16 + mixed_precision = True + elif which_dtype.lower() == 'float16': + dtype = torch.float16 + mixed_precision = True + else: + dtype = torch.float32 + mixed_precision = False + + # -- MASK + cfgs_mask = args.get('mask') + + # -- MODEL + cfgs_model = args.get('model') + model_name = cfgs_model.get('model_name') + pred_depth = cfgs_model.get('pred_depth') + pred_embed_dim = cfgs_model.get('pred_embed_dim') + uniform_power = cfgs_model.get('uniform_power', True) + use_mask_tokens = cfgs_model.get('use_mask_tokens', True) + zero_init_mask_tokens = cfgs_model.get('zero_init_mask_tokens', True) + + # -- DATA + cfgs_data = args.get('data') + dataset_type = cfgs_data.get('dataset_type', 'videodataset') + mask_type = cfgs_data.get('mask_type', 'multiblock3d') + dataset_paths = cfgs_data.get('datasets', []) + datasets_weights = cfgs_data.get('datasets_weights', None) + if datasets_weights is not None: + assert len(datasets_weights) == len(dataset_paths), 'Must have one sampling weight specified for each dataset' + batch_size = cfgs_data.get('batch_size') + num_clips = cfgs_data.get('num_clips') + num_frames = cfgs_data.get('num_frames') + tubelet_size = cfgs_data.get('tubelet_size') + sampling_rate = cfgs_data.get('sampling_rate') + duration = cfgs_data.get('clip_duration', None) + crop_size = cfgs_data.get('crop_size', 224) + patch_size = cfgs_data.get('patch_size') + pin_mem = cfgs_data.get('pin_mem', False) + num_workers = cfgs_data.get('num_workers', 1) + filter_short_videos = cfgs_data.get('filter_short_videos', False) + decode_one_clip = cfgs_data.get('decode_one_clip', True) + log_resource_util_data = cfgs_data.get('log_resource_utilization', False) + + # -- DATA AUGS + cfgs_data_aug = args.get('data_aug') + ar_range = cfgs_data_aug.get('random_resize_aspect_ratio', [3/4, 4/3]) + rr_scale = cfgs_data_aug.get('random_resize_scale', [0.3, 1.0]) + motion_shift = cfgs_data_aug.get('motion_shift', False) + reprob = cfgs_data_aug.get('reprob', 0.) + use_aa = cfgs_data_aug.get('auto_augment', False) + + # -- LOSS + cfgs_loss = args.get('loss') + loss_exp = cfgs_loss.get('loss_exp') + reg_coeff = cfgs_loss.get('reg_coeff') + + # -- OPTIMIZATION + cfgs_opt = args.get('optimization') + ipe = cfgs_opt.get('ipe', None) + ipe_scale = cfgs_opt.get('ipe_scale', 1.0) + clip_grad = cfgs_opt.get('clip_grad', None) + wd = float(cfgs_opt.get('weight_decay')) + final_wd = float(cfgs_opt.get('final_weight_decay')) + num_epochs = cfgs_opt.get('epochs') + warmup = cfgs_opt.get('warmup') + start_lr = cfgs_opt.get('start_lr') + lr = cfgs_opt.get('lr') + final_lr = cfgs_opt.get('final_lr') + ema = cfgs_opt.get('ema') + betas = cfgs_opt.get('betas', (0.9, 0.999)) + eps = cfgs_opt.get('eps', 1.e-8) + + # -- LOGGING + cfgs_logging = args.get('logging') + folder = cfgs_logging.get('folder') + tag = cfgs_logging.get('write_tag') + + # ----------------------------------------------------------------------- # + # ----------------------------------------------------------------------- # + + np.random.seed(seed) + torch.manual_seed(seed) + torch.backends.cudnn.benchmark = True + try: + mp.set_start_method('spawn') + except Exception: + pass + + # -- init torch distributed backend + world_size, rank = init_distributed() + logger.info(f'Initialized (rank/world-size) {rank}/{world_size}') + + device = acc.fetch_device(int(os.getenv("LOCAL_RANK", 0))) + acc.set_device(device) + + # -- log/checkpointing paths + log_file = os.path.join(folder, f'{tag}_r{rank}.csv') + latest_file = f'{tag}-latest.pth.tar' + latest_path = os.path.join(folder, latest_file) + load_path = None + if load_model: + load_path = os.path.join(folder, r_file) if r_file is not None else latest_path + if not os.path.exists(load_path): + load_path = None + load_model = False + + # -- make csv_logger + csv_logger = CSVLogger( + log_file, + ('%d', 'epoch'), + ('%d', 'itr'), + ('%.5f', 'loss'), + ('%.5f', 'loss-jepa'), + ('%.5f', 'reg-loss'), + ('%.5f', 'enc-grad-norm'), + ('%.5f', 'pred-grad-norm'), + ('%d', 'gpu-time(ms)'), + ('%d', 'wall-time(ms)'), + ) + + # -- init model + encoder, predictor = init_video_model( + uniform_power=uniform_power, + use_mask_tokens=use_mask_tokens, + num_mask_tokens=len(cfgs_mask), + zero_init_mask_tokens=zero_init_mask_tokens, + device=device, + patch_size=patch_size, + num_frames=num_frames, + tubelet_size=tubelet_size, + model_name=model_name, + crop_size=crop_size, + pred_depth=pred_depth, + pred_embed_dim=pred_embed_dim, + use_sdpa=use_sdpa, + ) + target_encoder = copy.deepcopy(encoder) + + # -- make data transforms + if mask_type == 'multiblock3d': + logger.info('Initializing basic multi-block mask') + mask_collator = MB3DMaskCollator( + crop_size=crop_size, + num_frames=num_frames, + patch_size=patch_size, + tubelet_size=tubelet_size, + cfgs_mask=cfgs_mask) + else: + logger.info('Initializing random tube mask') + mask_collator = TubeMaskCollator( + crop_size=crop_size, + num_frames=num_frames, + patch_size=patch_size, + tubelet_size=tubelet_size, + cfgs_mask=cfgs_mask) + transform = make_transforms( + random_horizontal_flip=True, + random_resize_aspect_ratio=ar_range, + random_resize_scale=rr_scale, + reprob=reprob, + auto_augment=use_aa, + motion_shift=motion_shift, + crop_size=crop_size) + + # -- init data-loaders/samplers + (unsupervised_loader, + unsupervised_sampler) = init_data( + data=dataset_type, + root_path=dataset_paths, + batch_size=batch_size, + training=True, + clip_len=num_frames, + frame_sample_rate=sampling_rate, + filter_short_videos=filter_short_videos, + decode_one_clip=decode_one_clip, + duration=duration, + num_clips=num_clips, + transform=transform, + datasets_weights=datasets_weights, + collator=mask_collator, + num_workers=num_workers, + world_size=world_size, + pin_mem=pin_mem, + rank=rank, + log_dir=folder if log_resource_util_data else None) + try: + _dlen = len(unsupervised_loader) + except Exception: # Different interface for webdataset + _dlen = unsupervised_loader.num_batches + if ipe is None: + ipe = _dlen + logger.info(f'iterations per epoch/dataest length: {ipe}/{_dlen}') + + # Add observer here + def get_batch_size(batch): + # Tuple[[[Tensor]], [Tesnor], [Tensor]] + udata, _, _ = batch + + # torch.Size([24, 3, 16, 224, 224]) + return udata[0][0].shape[0] + + from benchmate.observer import BenchObserver + observer = BenchObserver( + earlystop=65, + batch_size_fn=get_batch_size, + raise_stop_program=True, + stdout=True, + ) + unsupervised_loader = observer.iterate(unsupervised_loader) + + # -- init optimizer and scheduler + optimizer, scaler, scheduler, wd_scheduler = init_opt( + encoder=encoder, + predictor=predictor, + wd=wd, + final_wd=final_wd, + start_lr=start_lr, + ref_lr=lr, + final_lr=final_lr, + iterations_per_epoch=ipe, + warmup=warmup, + num_epochs=num_epochs, + ipe_scale=ipe_scale, + mixed_precision=mixed_precision, + betas=betas, + eps=eps) + + if os.getenv("RANK", -1) != -1: + encoder = DistributedDataParallel(encoder, static_graph=True) + predictor = DistributedDataParallel(predictor, static_graph=True) + target_encoder = DistributedDataParallel(target_encoder) + + for p in target_encoder.parameters(): + p.requires_grad = False + + # -- momentum schedule + momentum_scheduler = (ema[0] + i*(ema[1]-ema[0])/(ipe*num_epochs*ipe_scale) + for i in range(int(ipe*num_epochs*ipe_scale)+1)) + + start_epoch = 0 + # -- load training checkpoint + if load_model or os.path.exists(latest_path): + ( + encoder, + predictor, + target_encoder, + optimizer, + scaler, + start_epoch, + ) = load_checkpoint( + r_path=load_path, + encoder=encoder, + predictor=predictor, + target_encoder=target_encoder, + opt=optimizer, + scaler=scaler) + for _ in range(start_epoch * ipe): + scheduler.step() + wd_scheduler.step() + next(momentum_scheduler) + mask_collator.step() + + def save_checkpoint(epoch, path): + if rank != 0: + return + save_dict = { + 'encoder': encoder.state_dict(), + 'predictor': predictor.state_dict(), + 'opt': optimizer.state_dict(), + 'scaler': None if scaler is None else scaler.state_dict(), + 'target_encoder': target_encoder.state_dict(), + 'epoch': epoch, + 'loss': loss_meter.avg, + 'batch_size': batch_size, + 'world_size': world_size, + 'lr': lr, + } + try: + torch.save(save_dict, path) + except Exception as e: + logger.info(f'Encountered exception when saving checkpoint: {e}') + + logger.info('Initializing loader...') + loader = iter(unsupervised_loader) + + if skip_batches > 0: + logger.info(f'Skip {skip_batches} batches') + unsupervised_sampler.set_epoch(start_epoch) + for itr in range(skip_batches): + if itr % 10 == 0: + logger.info(f'Skip {itr}/{skip_batches} batches') + try: + udata = next(loader) + except Exception: + loader = iter(unsupervised_loader) + udata = next(loader) + + next_count = 0 + + # -- TRAINING LOOP + for epoch in range(start_epoch, num_epochs): + logger.info('Epoch %d' % (epoch + 1)) + + # -- update distributed-data-loader epoch + unsupervised_sampler.set_epoch(epoch) + + loss_meter = AverageMeter() + input_var_meter = AverageMeter() + input_var_min_meter = AverageMeter() + jepa_loss_meter = AverageMeter() + reg_loss_meter = AverageMeter() + mask_meters = [AverageMeter() for _ in range(len(cfgs_mask))] + gpu_time_meter = AverageMeter() + wall_time_meter = AverageMeter() + + for itr in range(ipe): + itr_start_time = time.time() + + try: + udata, masks_enc, masks_pred = next(loader) + next_count += 1 + except StopIteration: + logger.info('Exhausted data loaders after %d. Refreshing...', next_count) + next_count = 0 + loader = iter(unsupervised_loader) + udata, masks_enc, masks_pred = next(loader) + assert len(masks_enc) == len(masks_pred), \ + 'Currently require num encoder masks = num predictor masks' + + def load_clips(): + # -- unsupervised video clips + # Put each clip on the GPU and concatenate along batch + # dimension + clips = torch.cat([u.to(device, non_blocking=True) for u in udata[0]], dim=0) + + # Put each mask-enc/mask-pred pair on the GPU and reuse the + # same mask pair for each clip + _masks_enc, _masks_pred = [], [] + for _me, _mp in zip(masks_enc, masks_pred): + _me = _me.to(device, non_blocking=True) + _mp = _mp.to(device, non_blocking=True) + _me = repeat_interleave_batch(_me, batch_size, repeat=num_clips) + _mp = repeat_interleave_batch(_mp, batch_size, repeat=num_clips) + _masks_enc.append(_me) + _masks_pred.append(_mp) + + return (clips, _masks_enc, _masks_pred) + clips, masks_enc, masks_pred = load_clips() + + for _i, m in enumerate(mask_meters): + m.update(masks_enc[_i][0].size(-1)) + + def train_step(): + _new_lr = scheduler.step() + _new_wd = wd_scheduler.step() + # -- + + def forward_target(c): + """ + Returns list of tensors of shape [B, N, D], one for each + mask-pred. + """ + with torch.no_grad(): + h = target_encoder(c) + h = F.layer_norm(h, (h.size(-1),)) # normalize over feature-dim [B, N, D] + # -- create targets (masked regions of h) + h = apply_masks(h, masks_pred, concat=False) + return h + + def forward_context(c, h): + """ + Returns list of tensors of shape [B, N, D], one for each + mask-pred. + """ + z = encoder(c, masks_enc) + z = predictor(z, h, masks_enc, masks_pred) + return z + + def loss_fn(z, h): + loss = 0. + # Compute loss and accumulate for each mask-enc/mask-pred pair + for zi, hi in zip(z, h): + loss += torch.mean(torch.abs(zi - hi)**loss_exp) / loss_exp + loss /= len(masks_pred) + return loss + + def reg_fn(z): + return sum([torch.sqrt(zi.var(dim=1) + 0.0001) for zi in z]) / len(z) + + # Step 1. Forward + loss_jepa, loss_reg = 0., 0. + with acc.amp.autocast(dtype=dtype, enabled=mixed_precision): + h = forward_target(clips) + z = forward_context(clips, h) + loss_jepa = loss_fn(z, h) # jepa prediction loss + pstd_z = reg_fn(z) # predictor variance across patches + loss_reg += torch.mean(F.relu(1.-pstd_z)) + loss = loss_jepa + reg_coeff * loss_reg + + # Step 2. Backward & step + _enc_norm, _pred_norm = 0., 0. + if mixed_precision: + scaler.scale(loss).backward() + scaler.unscale_(optimizer) + else: + loss.backward() + if (epoch > warmup) and (clip_grad is not None): + _enc_norm = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip_grad) + _pred_norm = torch.nn.utils.clip_grad_norm_(predictor.parameters(), clip_grad) + if mixed_precision: + scaler.step(optimizer) + scaler.update() + else: + optimizer.step() + grad_stats = grad_logger(encoder.named_parameters()) + grad_stats.global_norm = float(_enc_norm) + grad_stats_pred = grad_logger(predictor.named_parameters()) + grad_stats_pred.global_norm = float(_pred_norm) + optimizer.zero_grad() + optim_stats = adamw_logger(optimizer) + + # Step 3. momentum update of target encoder + m = next(momentum_scheduler) + with torch.no_grad(): + for param_q, param_k in zip(encoder.parameters(), target_encoder.parameters()): + param_k.data.mul_(m).add_((1.-m) * param_q.detach().data) + + return ( + float(loss), + float(loss_jepa), + float(loss_reg), + _new_lr, + _new_wd, + grad_stats, + grad_stats_pred, + optim_stats, + ) + (loss, loss_jepa, loss_reg, _new_lr, _new_wd, grad_stats, grad_stats_pred, optim_stats,), gpu_etime_ms = gpu_timer(train_step) + iter_elapsed_time_ms = (time.time() - itr_start_time) * 1000. + loss_meter.update(loss) + input_var = float(AllReduce.apply(clips.view(clips.shape[0], -1).var(dim=1).mean(dim=0))) + input_var_min = float(AllReduce.apply(torch.min(clips.view(clips.shape[0], -1).var(dim=1)))) + input_var_meter.update(input_var) + input_var_min_meter.update(input_var_min) + jepa_loss_meter.update(loss_jepa) + reg_loss_meter.update(loss_reg) + gpu_time_meter.update(gpu_etime_ms) + wall_time_meter.update(iter_elapsed_time_ms) + + observer.record_loss(loss) + + # -- Logging + def log_stats(): + csv_logger.log( + epoch + 1, + itr, + loss, + loss_jepa, + loss_reg, + grad_stats.global_norm, + grad_stats_pred.global_norm, + gpu_etime_ms, + iter_elapsed_time_ms) + if (itr % log_freq == 0) or np.isnan(loss) or np.isinf(loss): + logger.info( + '[%d, %5d] loss: %.3f | p%.3f r%.3f | ' + 'input_var: %.3f %.3f | ' + 'masks: %s ' + '[wd: %.2e] [lr: %.2e] ' + '[mem: %.2e] ' + '[gpu: %.1f ms]' + '[wall: %.1f ms]' + % (epoch + 1, itr, + loss_meter.avg, + jepa_loss_meter.avg, + reg_loss_meter.avg, + input_var_meter.avg, + input_var_min_meter.avg, + '[' + ', '.join(['%.1f' % m.avg for m in mask_meters]) + ']', + _new_wd, + _new_lr, + acc.max_memory_allocated() / 1024.0**2, + gpu_time_meter.avg, + wall_time_meter.avg)) + + if optim_stats is not None: + logger.info( + '[%d, %5d] first moment: %.2e [%.2e %.2e] second moment: %.2e [%.2e %.2e]' + % (epoch + 1, itr, + optim_stats.get('exp_avg').avg, + optim_stats.get('exp_avg').min, + optim_stats.get('exp_avg').max, + optim_stats.get('exp_avg_sq').avg, + optim_stats.get('exp_avg_sq').min, + optim_stats.get('exp_avg_sq').max)) + + if grad_stats is not None: + logger.info( + '[%d, %5d] enc_grad_stats: f/l[%.2e %.2e] mn/mx(%.2e, %.2e) %.2e' + % (epoch + 1, itr, + grad_stats.first_layer, + grad_stats.last_layer, + grad_stats.min, + grad_stats.max, + grad_stats.global_norm)) + + if grad_stats_pred is not None: + logger.info( + '[%d, %5d] pred_grad_stats: f/l[%.2e %.2e] mn/mx(%.2e, %.2e) %.2e' + % (epoch + 1, itr, + grad_stats_pred.first_layer, + grad_stats_pred.last_layer, + grad_stats_pred.min, + grad_stats_pred.max, + grad_stats_pred.global_norm)) + log_stats() + assert not np.isnan(loss), 'loss is nan' + + # -- Save Checkpoint + logger.info('avg. loss %.3f' % loss_meter.avg) + # -- Save Last + if epoch % checkpoint_freq == 0 or epoch == (num_epochs - 1): + save_checkpoint(epoch + 1, latest_path) + if save_every_freq > 0 and epoch % save_every_freq == 0: + save_every_file = f'{tag}-e{epoch}.pth.tar' + save_every_path = os.path.join(folder, save_every_file) + save_checkpoint(epoch + 1, save_every_path) + + + +def main(): + from argparse import ArgumentParser + import torchcompat.core as acc + from benchmate.monitor import bench_monitor + from voir.phase import StopProgram + import yaml + + parser = ArgumentParser() + parser.add_argument("--dataset", help="path to the csv that list all videos", type=str) + parser.add_argument("--output", help="path to an output directory", type=str) + parser.add_argument("--batch_size", type=int, default=24) + parser.add_argument("--num_frames", type=int, default=16) + parser.add_argument("--num_workers", type=int, default=12) + args = parser.parse_args() + + # relying on environment variables is annoying in multinode setups + # mlbench = json.loads(os.environ["MILABENCH_CONFIG"]) + + configfile = os.path.join(os.path.dirname(__file__), 'config', 'vith16.yaml') + params = None + with open(configfile, 'r') as y_file: + params = yaml.load(y_file, Loader=yaml.FullLoader) + logger.info('loaded params...') + + params["data"]["datasets"] = [args.dataset] + params["data"]["batch_size"] = args.batch_size + params["data"]["num_frames"] = args.num_frames + params["data"]["num_workers"] = args.num_workers + + params["logging"]["folder"] = args.output + + gpu_per_nodes = int(os.getenv("LOCAL_WORLD_SIZE", 1)) + total_gpu = int(os.getenv("WORLD_SIZE", 1)) + nnodes = total_gpu / gpu_per_nodes + + params["nodes"] = nnodes + params["tasks_per_node"] = gpu_per_nodes + + if os.getenv("RANK", -1) != -1: + acc.init_process_group() + + try: + with bench_monitor(): + _main(params) + except StopProgram: + pass + + finally: + if os.getenv("RANK", -1) != -1: + acc.destroy_process_group() + + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/benchmarks/vjepa/prepare.py b/benchmarks/vjepa/prepare.py new file mode 100755 index 000000000..28abf23fb --- /dev/null +++ b/benchmarks/vjepa/prepare.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import os +import cv2 +import numpy as np + +def generate_random_video(output_file, width=640, height=480, num_frames=300, fps=30): + """ + Generates a .mp4 video file with random content. + + :param output_file: Path and name of the output video file + :param width: Width of the video (in pixels) + :param height: Height of the video (in pixels) + :param num_frames: Number of frames in the video + :param fps: Frames per second (frame rate) of the video + """ + fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use MP4 encoding + video_writer = cv2.VideoWriter(output_file, fourcc, fps, (width, height)) + + for _ in range(num_frames): + frame = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) + video_writer.write(frame) + + video_writer.release() + + +if __name__ == "__main__": + import sys + import csv + import os + import tqdm + import multiprocessing + + sys.path.append(os.path.dirname(__file__) + "/jepa/") + data_directory = os.environ["MILABENCH_DIR_DATA"] + dest = os.path.join(data_directory, "FakeVideo") + os.makedirs(dest, exist_ok=True) + + csv_file = os.path.join(dest, "video_metainfo.csv") + + num_videos = 1000 # Change this to generate more or fewer videos + num_frames = 300 + + # Make the generation faster for the CI + if overrides := os.getenv("MILABENCH_TESTING_PREPARE"): + num1, num2 = overrides.split(",") + num_videos = int(num1) + num_frames = int(num2) + + def gen_video(i): + output_file = os.path.join(dest, f"{i + 1}.mp4") + if not os.path.exists(output_file): + generate_random_video(output_file=output_file, width=640, height=480, num_frames=num_frames, fps=30) + + n_worker = min(multiprocessing.cpu_count(), 16) + + with multiprocessing.Pool(n_worker) as pool: + for _ in tqdm.tqdm(pool.imap_unordered(gen_video, range(num_videos)), total=num_videos): + pass + + with open(csv_file, mode='w', newline='') as file: + # CSV separated by space genius + writer = csv.writer(file, delimiter=" ") + for file in tqdm.tqdm(os.listdir(dest)): + if file.endswith(".mp4"): + writer.writerow([os.path.join(dest, file), 0]) + + print(f"Generated {num_videos} videos and created {csv_file}") + + # If there is nothing to download or generate, just delete this file. diff --git a/benchmarks/vjepa/requirements.cuda.txt b/benchmarks/vjepa/requirements.cuda.txt new file mode 100644 index 000000000..c6e6ebb0e --- /dev/null +++ b/benchmarks/vjepa/requirements.cuda.txt @@ -0,0 +1,356 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/vjepa/requirements.cuda.txt .pin/tmp-constraints-cuda-vjepa-gpus.txt benchmarks/vjepa/requirements.in +# +--extra-index-url https://pypi.ngc.nvidia.com +--extra-index-url https://download.pytorch.org/whl/cu121 +--find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html +--trusted-host pypi.ngc.nvidia.com + +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +beartype==0.18.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +braceexpand==0.1.7 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in + # webdataset +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +cloudpickle==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # submitit +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera +decord==0.6.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +einops==0.8.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname +filelock==3.16.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera + # voir +huggingface-hub==0.24.7 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # timm +idna==3.10 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +jax[cuda12]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt +jax-cuda12-pjrt==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin +jax-cuda12-plugin[with-cuda]==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +jaxlib==0.4.31 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in + # decord + # jax + # jaxlib + # ml-dtypes + # opencv-python + # opt-einsum + # pandas + # scipy + # torchvision + # webdataset + # xformers +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-cuda-nvcc-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # torch +nvidia-nvjitlink-cu12==12.6.68 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax-cuda12-plugin + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +opencv-python==4.10.0.84 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +opt-einsum==3.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub +pandas==2.2.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in + # huggingface-hub + # omegaconf + # timm + # webdataset +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub +rich==13.8.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # timm +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jax + # jaxlib +six==1.16.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # asttokens + # python-dateutil +submitit==1.5.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +sympy==1.13.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +timm==1.0.9 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +torch==2.4.0+cu121 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in + # timm + # torchvision + # xformers +torchvision==0.19.0+cu121 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in + # timm +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # huggingface-hub + # reactivex + # submitit + # torch +tzdata==2024.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -c .pin/../constraints/cuda.txt + # -r benchmarks/vjepa/requirements.in +webdataset==0.2.100 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/vjepa/requirements.in +xformers==0.0.27.post2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r .pin/../constraints/extra/torch.cuda.txt diff --git a/benchmarks/vjepa/requirements.in b/benchmarks/vjepa/requirements.in new file mode 100644 index 000000000..248a1c98b --- /dev/null +++ b/benchmarks/vjepa/requirements.in @@ -0,0 +1,14 @@ +voir>=0.2.19,<0.3 +torch>=2 +torchvision +pyyaml +numpy +opencv-python +submitit +braceexpand +webdataset +timm +decord +pandas +einops +beartype \ No newline at end of file diff --git a/benchmarks/vjepa/voirfile.py b/benchmarks/vjepa/voirfile.py new file mode 100644 index 000000000..d93f886cd --- /dev/null +++ b/benchmarks/vjepa/voirfile.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass + +from voir import configurable +from voir.instruments import dash, early_stop, log, rate +from benchmate.monitor import monitor_monogpu + +@dataclass +class Config: + """voir configuration""" + + # Whether to display the dash or not + dash: bool = False + + # How often to log the rates + interval: str = "1s" + + # Number of rates to skip before logging + skip: int = 5 + + # Number of rates to log before stopping + stop: int = 20 + + # Number of seconds between each gpu poll + gpu_poll: int = 3 + + +@configurable +def instrument_main(ov, options: Config): + yield ov.phases.init + + if options.dash: + ov.require(dash) + + ov.require( + log("value", "progress", "rate", "units", "loss", "gpudata", context="task"), + early_stop(n=options.stop, key="rate", task="train"), + monitor_monogpu(poll_interval=options.gpu_poll), + ) diff --git a/benchmate/benchmate/monitor.py b/benchmate/benchmate/monitor.py index dd8202ba1..5d2624201 100644 --- a/benchmate/benchmate/monitor.py +++ b/benchmate/benchmate/monitor.py @@ -44,9 +44,13 @@ def _smuggle_monitor(poll_interval=10, worker_init=None, **monitors): data_file = SmuggleWriter(sys.stdout) def mblog(data): nonlocal data_file - if data_file is not None: - print(json.dumps(data), file=data_file) + if data_file is not None: + try: + print(json.dumps(data), file=data_file) + except ValueError: + print("Is bench ending?, ignoring ValueError") + def get(): t = time.time() entries = [] diff --git a/config/base.yaml b/config/base.yaml index 89f113deb..28a72afb7 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -1,4 +1,5 @@ _defaults: + enabled: false max_duration: 600 voir: options: @@ -32,6 +33,8 @@ _torchvision_ddp: definition: ../benchmarks/torchvision_ddp group: torchvision install_group: torch + tags: + - multigpu plan: method: njobs n: 1 @@ -52,6 +55,8 @@ _flops: tags: - diagnostic - flops + - monogpu + - nobatch argv: --number: 10 @@ -67,6 +72,8 @@ llama: - nlp - llm - inference + - monogpu + - nobatch voir: options: @@ -192,6 +199,7 @@ resnet50: - classification - convnet - resnet + - monogpu argv: --model: resnet50 @@ -207,6 +215,7 @@ resnet50-noio: - convnet - resnet - noio + - monogpu argv: --model: resnet50 @@ -220,6 +229,7 @@ resnet152-ddp-gpus: - classification - convnet - resnet + - multigpu argv: --model: resnet152 @@ -234,6 +244,7 @@ _convnext_large-base: - classification - convnet - precision-showcase + - monogpu argv: --model: convnext_large --batch-size: 128 @@ -269,6 +280,7 @@ regnet_y_128gf: - convnet - resnet - lstm + - monogpu argv: --model: regnet_y_128gf --batch-size: 64 @@ -282,6 +294,7 @@ _bert-base: - huggingface - precision-showcase - noio + - monogpu argv: --model: "Bert" --batch-size: 32 @@ -317,6 +330,7 @@ t5: - transformer - huggingface - noio + - monogpu argv: --model: "T5" --batch-size: 16 @@ -329,6 +343,7 @@ reformer: - transformer - huggingface - noio + - monogpu argv: --model: "Reformer" --batch-size: 64 @@ -339,6 +354,7 @@ whisper: - audio - huggingface - noio + - monogpu argv: --model: "Whisper" --batch-size: 64 @@ -349,34 +365,19 @@ focalnet: - vision - classification - convnet + - monogpu plan: method: per_gpu argv: --model: focalnet_base_lrf -super-slomo: - inherits: _defaults - tags: - - vision - - video-interpolation - - unet - - convnet - definition: ../benchmarks/super-slomo - group: super-slomo - install_group: torch - plan: - method: per_gpu - argv: - --train_batch_size: 64 - --dataset_root: "{milabench_data}/FakeImageNet" - --loader: pytorch - --num_workers: "auto({n_worker}, 8)" - brax: inherits: _defaults tags: - rl - jax + - multigpu + - gym definition: ../benchmarks/brax group: brax install_group: torch @@ -408,11 +409,15 @@ _diffusion: diffusion-single: inherits: _diffusion num_machines: 1 + tags: + - monogpu plan: method: per_gpu diffusion-gpus: inherits: _diffusion + tags: + - multigpu plan: method: njobs n: 1 @@ -445,12 +450,16 @@ _lightning: lightning: inherits: _lightning + tags: + - monogpu num_machines: 1 plan: method: per_gpu lightning-gpus: inherits: _lightning + tags: + - multigpu num_machines: 1 plan: method: njobs @@ -476,7 +485,8 @@ dinov2-giant-single: inherits: _dinov2 plan: method: per_gpu - + tags: + - monogpu argv: --config-file: "{benchmark_folder}/src/dinov2/configs/train/vitg14.yaml" # THOSE NEED TO BE LAST @@ -487,6 +497,8 @@ dinov2-giant-single: dinov2-giant-gpus: inherits: _dinov2 + tags: + - multigpu plan: method: njobs n: 1 @@ -499,7 +511,6 @@ dinov2-giant-gpus: train.num_workers=10: true dinov2-giant-nodes: - enabled: false plan: method: njobs n: 1 @@ -535,6 +546,8 @@ _llm: llm-lora-single: inherits: _llm + tags: + - monogpu plan: method: per_gpu argv: @@ -556,7 +569,8 @@ llm-lora-ddp-gpus: plan: method: njobs n: 1 - + tags: + - multigpu argv: "{milabench_code}/recipes/lora_finetune_distributed.py": true --config: "{milabench_code}/configs/llama3_8B_lora_single_device.yaml" @@ -600,6 +614,8 @@ llm-lora-ddp-nodes: llm-lora-mp-gpus: inherits: _llm + tags: + - multigpu plan: method: njobs n: 1 @@ -620,6 +636,8 @@ llm-lora-mp-gpus: llm-full-mp-gpus: inherits: _llm + tags: + - multigpu plan: method: njobs n: 1 @@ -667,6 +685,11 @@ llm-full-mp-nodes: _purejaxrl: inherits: _defaults + install_group: torch + tags: + - monogpu + - gym + - rl definition: ../benchmarks/purejaxrl plan: method: per_gpu @@ -694,17 +717,17 @@ ppo: _geo_gnn: inherits: _defaults tags: + - monogpu - graph - # FIXME: torch cluster is laging behind pytorch - # we are forced to use torch==2.3 instead of torch==2.4 - install_group: gnn - group: geo_gnn + install_group: torch definition: ../benchmarks/geo_gnn plan: method: per_gpu dimenet: inherits: _geo_gnn + tags: + - monogpu argv: --model: 'DimeNet' --num-samples: 10000 @@ -713,10 +736,10 @@ dimenet: recursiongfn: inherits: _defaults definition: ../benchmarks/recursiongfn - install_group: gnn - group: recursiongfn_gnn + install_group: torch tags: - graph + - monogpu plan: method: per_gpu @@ -735,6 +758,8 @@ torchatari: method: per_gpu tags: - rl + - monogpu + - gym argv: --num-minibatches: 16 --update-epochs: 4 @@ -743,34 +768,36 @@ torchatari: --total-timesteps: 1000000 --env-id: Breakout-v5 - -llava-single: +_llava: inherits: _defaults definition: ../benchmarks/llava install_group: torch plan: method: per_gpu - tags: - llm + - monogpu argv: --batch_size: 1 --num_workers: 4 --gradient_accumulation_steps: 1 -llava-gpus: - # This OOM - enabled: false +llava-single: + inherits: _llava + plan: + method: per_gpu + argv: + --batch_size: 1 + --num_workers: 4 + --gradient_accumulation_steps: 1 - inherits: _defaults - definition: ../benchmarks/llava - install_group: torch +llava-gpus: + inherits: _llava + tags: + - multigpu plan: method: njobs n: 1 - - tags: - - llm argv: --batch_size: 1 --num_workers: 4 @@ -780,11 +807,12 @@ llava-gpus: _rlhf: inherits: _defaults definition: ../benchmarks/rlhf - install-variant: unpinned install_group: torch plan: method: per_gpu tags: + - monogpu + - rl - rlhf - llm argv: @@ -802,6 +830,35 @@ rlhf-single: rlhf-gpus: inherits: _rlhf + tags: + - multigpu + plan: + method: njobs + n: 1 + +_vjepa: + inherits: _defaults + install_group: torch + definition: ../benchmarks/vjepa + tags: + - video + argv: + --batch_size: 24 + --num_workers: "auto({n_worker}, 12)" + --dataset: "{milabench_data}/FakeVideo/video_metainfo.csv" + --output: "{milabench_extra}" + +vjepa-single: + inherits: _vjepa + tags: + - monogpu + plan: + method: per_gpu + +vjepa-gpus: + inherits: _vjepa + tags: + - multigpu plan: method: njobs n: 1 diff --git a/config/fast.yaml b/config/fast.yaml new file mode 100644 index 000000000..44932e5bc --- /dev/null +++ b/config/fast.yaml @@ -0,0 +1,33 @@ +# +# Configuration for fast testing, single node +# +# - Flops: check that the GPUs are performing according to spec +# - Multi GPU: checks that GPUs can talk to each other +# + +include: + - base.yaml + +fp16: + enabled: true + weight: 1.0 + +bf16: + enabled: true + weight: 1.0 + +tf32: + enabled: true + weight: 1.0 + +fp32: + enabled: true + weight: 1.0 + +lightning-gpus: + enabled: True + weight: 1.0 + +llm-lora-ddp-nodes: + enabled: True + weight: 1.0 \ No newline at end of file diff --git a/config/high.yaml b/config/high.yaml new file mode 100644 index 000000000..4eb43816c --- /dev/null +++ b/config/high.yaml @@ -0,0 +1,9 @@ +# +# Configuration for high fidelity +# +# - Prefer original dataset over generated/fake datasets +# - Prefer pretrained model when possible +# + +include: + - standard.yaml \ No newline at end of file diff --git a/config/medium.yaml b/config/medium.yaml new file mode 100644 index 000000000..d52f4e47f --- /dev/null +++ b/config/medium.yaml @@ -0,0 +1,9 @@ +# +# Configuration for medium fidelity +# +# - Prefer generated/fake datasets for big datasets +# - Prefer pretrained model when weights are small +# + +include: + - standard.yaml \ No newline at end of file diff --git a/config/retired.yaml b/config/retired.yaml index a73abf0b2..11ef99299 100644 --- a/config/retired.yaml +++ b/config/retired.yaml @@ -378,3 +378,27 @@ rwkv: dlrm: enabled: true weight: 1.0 + +super-slomo: + enabled: false + weight: 1.0 + + +super-slomo: + inherits: _defaults + tags: + - vision + - video-interpolation + - unet + - convnet + - monogpu + definition: ../benchmarks/super-slomo + group: super-slomo + install_group: torch + plan: + method: per_gpu + argv: + --train_batch_size: 64 + --dataset_root: "{milabench_data}/FakeImageNet" + --loader: pytorch + --num_workers: "auto({n_worker}, 8)" \ No newline at end of file diff --git a/config/scaling.yaml b/config/scaling.yaml index 0a9907e5a..09f3f9ae5 100644 --- a/config/scaling.yaml +++ b/config/scaling.yaml @@ -442,3 +442,36 @@ whisper: 128: 71634.375 MiB 144: 80412.75 MiB optimized: 128 + + +llava-single: + arg: --batch_size + optimized: 1 + +llava-gpus: + arg: --batch_size + optimized: 1 + +rlhf-single: + arg: --per_device_train_batch_size + optimized: 64 + +rlhf-gpus: + arg: --per_device_train_batch_size + optimized: 64 + +vjepa-single: + arg: --batch_size + optimized: 24 + +vjepa-gpus: + arg: --batch_size + optimized: 24 + +ppo: + arg: --num_minibatches + optimized: 32 + +dqn: + arg: --buffer_batch_size + optimized: 128 \ No newline at end of file diff --git a/config/standard.yaml b/config/standard.yaml index 9c3d2424b..588e35e9a 100644 --- a/config/standard.yaml +++ b/config/standard.yaml @@ -69,10 +69,6 @@ focalnet: enabled: true weight: 2.0 -super-slomo: - enabled: true - weight: 1.0 - fp16: enabled: true weight: 0.0 @@ -93,20 +89,42 @@ brax: enabled: true weight: 1.0 +# Diffusion +diffusion-single: + enabled: true + weight: 1.0 + diffusion-gpus: enabled: True weight: 1.0 +diffusion-nodes: + enabled: true + weight: 1.0 + +# lightning +lightning: + enabled: true + weight: 1.0 + lightning-gpus: enabled: True weight: 1.0 +# dinov2 +dinov2-giant-single: + enabled: True + weight: 1.0 + dinov2-giant-gpus: enabled: True weight: 1.0 -# LLM +dinov2-giant-nodes: + enabled: false + weight: 1.0 +# LLM llm-lora-single: enabled: True weight: 1.0 @@ -131,6 +149,60 @@ llm-full-mp-nodes: enabled: True weight: 1.0 +resnet152-ddp-gpus: + enabled: true + weight: 0.0 + +# purejaxrl +dqn: + enabled: true + weight: 1.0 + +ppo: + enabled: true + weight: 1.0 + +# Geo +dimenet: + enabled: true + weight: 1.0 + +recursiongfn: + enabled: true + weight: 1.0 + +# torchatari +torchatari: + enabled: True + weight: 1.0 + +# llava +llava-single: + enabled: true + weight: 1.0 + +llava-gpus: # This OOM + enabled: false + weight: 1.0 + +# rlhf +rlhf-single: + enabled: True + weight: 1.0 + +rlhf-gpus: + enabled: true + weight: 1.0 + +# vjepa +vjepa-single: + enabled: true + weight: 1.0 + +vjepa-gpus: + enabled: true + weight: 1.0 + ################## # Disabled tests # ################## diff --git a/constraints/cuda.txt b/constraints/cuda.txt index 90219e078..eb6bbcedf 100644 --- a/constraints/cuda.txt +++ b/constraints/cuda.txt @@ -3,4 +3,5 @@ # # voir >= 0.2.19 -torchcompat >= 1.0.0 \ No newline at end of file +torchcompat >= 1.0.0 +gymnax >= 0.0.8 diff --git a/constraints/extra/gnn.cuda.txt b/constraints/extra/gnn.cuda.txt deleted file mode 100644 index e5decec56..000000000 --- a/constraints/extra/gnn.cuda.txt +++ /dev/null @@ -1,4 +0,0 @@ ---find-links https://data.pyg.org/whl/torch-2.3.0+cu121.html - -torch>=2.3.0,<2.4.0 - diff --git a/constraints/extra/gnn.hpu.txt b/constraints/extra/gnn.hpu.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/constraints/extra/gnn.rocm.txt b/constraints/extra/gnn.rocm.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/constraints/extra/gnn.xpu.txt b/constraints/extra/gnn.xpu.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/constraints/extra/torch.cuda.txt b/constraints/extra/torch.cuda.txt index 09e3393a2..942277d5c 100644 --- a/constraints/extra/torch.cuda.txt +++ b/constraints/extra/torch.cuda.txt @@ -7,4 +7,8 @@ jax[cuda12] # --extra-index-url https://download.pytorch.org/whl/cu121 # --find-links https://download.pytorch.org/whl/xformers/ -xformers==0.0.27.post2 \ No newline at end of file +xformers==0.0.27.post2 + + +# Torch geometric +--find-links https://data.pyg.org/whl/torch-2.4.0+cu121.html diff --git a/constraints/hpu.txt b/constraints/hpu.txt index 6313b8786..23a110bd2 100644 --- a/constraints/hpu.txt +++ b/constraints/hpu.txt @@ -5,4 +5,4 @@ # voir >= 0.2.19 torchcompat >= 1.0.0 - +gymnax >= 0.0.8 \ No newline at end of file diff --git a/constraints/rocm.txt b/constraints/rocm.txt index 559a3f68d..b86ce00d3 100644 --- a/constraints/rocm.txt +++ b/constraints/rocm.txt @@ -3,4 +3,5 @@ # # voir >= 0.2.19 -torchcompat >= 1.0.0 \ No newline at end of file +torchcompat >= 1.0.0 +gymnax >= 0.0.8 diff --git a/constraints/xpu.txt b/constraints/xpu.txt index 5aa7739a2..2fd966c1e 100644 --- a/constraints/xpu.txt +++ b/constraints/xpu.txt @@ -15,4 +15,5 @@ intel-extension-for-openxla # # voir >= 0.2.19 -torchcompat >= 1.0.0 \ No newline at end of file +torchcompat >= 1.0.0 +gymnax >= 0.0.8 diff --git a/milabench/_version.py b/milabench/_version.py index 1d8d51c47..4b49d0506 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v0.1.0-83-ge5505ee0" -__commit__ = "e5505ee0c6e0fe547af149b4ca87d0d7538cdd58" -__date__ = "2024-09-05 17:19:59 -0400" +__tag__ = "v0.1.0-113-g9a5dfe3e" +__commit__ = "9a5dfe3ef36e6baab6584faa3fa939e63ba2aed5" +__date__ = "2024-09-16 09:08:28 -0400" diff --git a/milabench/commands/__init__.py b/milabench/commands/__init__.py index 3de44337a..e97ac4e58 100644 --- a/milabench/commands/__init__.py +++ b/milabench/commands/__init__.py @@ -674,8 +674,12 @@ def make_base_executor(cls, executor, *args, **kwargs): main_host = node_address(main) # add them as option so we could tweak them if necessary main_port = option("torchrun.port", int, default=29400) - backend = option("torchrun.backend", str, default="c10d") + backend = option("torchrun.backend", str, default="static") + filters = option("torchrun.local_ranks_filder", str, default="0") + if backend == "c10d": + print("Warning: c10d can select the wrong node for RANK=0") + main_addr = f"{main_host}:{main_port}" config = executor.pack.config @@ -685,12 +689,27 @@ def make_base_executor(cls, executor, *args, **kwargs): f"--nnodes={len(nodes)}", f"--rdzv-backend={backend}", f"--rdzv-endpoint={main_addr}", - # f"--master-addr={main_host}", - # f"--master-port={main_port}", + f"--master-addr={main_host}", + f"--master-port={main_port}", + f"--local-ranks-filter={filters}", *args, **kwargs ) + def make_new_node_executor(self, rank, node, base): + """Make a new environment and create a new executor for the node""" + executor: TorchrunAllGPU = super().make_new_node_executor(rank, node, base) + + # Specify the node rank so rank 0 is consistently on the local node + new_args = list(executor.wrapper_argv) + [ + f"--node-rank={rank}", + f"--local-addr={node['ip']}", + f"--rdzv-conf=rank={rank}", + ] + executor.wrapper_argv = new_args + + return executor + def __init__(self, executor: Command, *args, **kwargs) -> None: base_exec = TorchrunAllNodes.make_base_executor( TorchrunAllGPU, diff --git a/milabench/pack.py b/milabench/pack.py index cbe3b2d92..1cdde0939 100644 --- a/milabench/pack.py +++ b/milabench/pack.py @@ -349,6 +349,7 @@ def make_env(self): # building an image, but it is overall nicer for development to use # the default cache). env["XDG_CACHE_HOME"] = str(self.dirs.cache) + return env def full_env(self, env={}): diff --git a/milabench/report.py b/milabench/report.py index 7b6ccb5ed..aebcaf093 100644 --- a/milabench/report.py +++ b/milabench/report.py @@ -304,7 +304,9 @@ def print_meta(out, meta): if k == "accelerators": gpus = v["gpus"] n = len(gpus) - _, gpu = gpus.popitem() + gpu = {} + if n > 0: + _, gpu = gpus.popitem() stats = { "n": n, "product": gpu.get("product", "NA"), @@ -325,7 +327,9 @@ def short_meta(out, meta): if k == "accelerators": gpus = v["gpus"] n = len(gpus) - _, gpu = gpus.popitem() + gpu = {} + if n > 0: + _, gpu = gpus.popitem() stats["product"] = gpu.get("product", "NA") stats["n_gpu"] = n stats["memory"] = str(gpu.get("memory", {}).get("total", 0)) @@ -486,21 +490,32 @@ def pandas_to_string(df, formatters=_formatters): columns = df.columns.tolist() - sep = " | " - lines = [] + # Compute column size col_size = defaultdict(int) - for index, row in df.iterrows(): - line = [f"{index:<30}"] + col_size["bench"] = max(col_size["bench"], len(index)) for col, val in zip(columns, row): fmt = formatters.get(col) - if fmt is not None: val = fmt(val) col_size[col] = max(col_size[col], len(val)) + + # Generate report + sep = " | " + lines = [] + for index, row in df.iterrows(): + size = col_size["bench"] + line = [f"{index:<{size}}"] + + for col, val in zip(columns, row): + fmt = formatters.get(col) + if fmt is not None: + val = fmt(val) else: val = str(val) + size = col_size[col] + val = f"{val:>{size}}" line.append(val) lines.append(sep.join(line)) @@ -509,7 +524,8 @@ def fmtcol(col): size = col_size[col] return f"{col:>{size}}" - header = sep.join([f"{'bench':<30}"] + [fmtcol(col) for col in columns]) + size = col_size["bench"] + header = sep.join([f"{'bench':<{size}}"] + [fmtcol(col) for col in columns]) return "\n".join([header] + lines) diff --git a/milabench/summary.py b/milabench/summary.py index de9ced414..de3583f28 100644 --- a/milabench/summary.py +++ b/milabench/summary.py @@ -236,7 +236,8 @@ def _summarize(group, query=tuple([])) -> Summary: "name": config["name"], "group": config["group"], "n": len(agg["success"]), - "ngpu": sum(agg["ngpu"]) / len(agg["ngpu"]), + # In case of failure it is possible ngpu is 0 or 1 + "ngpu": max(agg["ngpu"]), "successes": sum(agg["success"]), "failures": sum(not x for x in agg["success"]), "train_rate": _metrics(agg["train_rate"]), diff --git a/milabench/system.py b/milabench/system.py index d29f4cd27..c237baf2c 100644 --- a/milabench/system.py +++ b/milabench/system.py @@ -1,11 +1,11 @@ import contextvars +import ipaddress import os import socket -from dataclasses import dataclass, field -import sys import subprocess +import sys from contextlib import contextmanager -import ipaddress +from dataclasses import dataclass, field import psutil import yaml @@ -193,11 +193,11 @@ class Torchrun: @dataclass class Options: - sizer: SizerOptions = SizerOptions() - cpu: CPUOptions = CPUOptions() - dataset: DatasetConfig = DatasetConfig() - dirs: Dirs = Dirs() - torchrun: Torchrun = Torchrun() + sizer: SizerOptions = field(default_factory=SizerOptions) + cpu: CPUOptions = field(default_factory=CPUOptions) + dataset: DatasetConfig = field(default_factory=DatasetConfig) + dirs: Dirs = field(default_factory=Dirs) + torchrun: Torchrun = field(default_factory=Torchrun) @dataclass @@ -231,18 +231,19 @@ def default_device(): @dataclass class SystemConfig: """This is meant to be an exhaustive list of all the environment overrides""" + arch: str = defaultfield("gpu.arch", str, default_device()) sshkey: str = defaultfield("ssh", str, "~/.ssh/id_rsa") docker_image: str = None nodes: list[Nodes] = field(default_factory=list) - gpu: GPUConfig = GPUConfig() - options: Options = Options() + gpu: GPUConfig = field(default_factory=GPUConfig) + options: Options = field(default_factory=Options) base: str = defaultfield("base", str, None) config: str = defaultfield("config", str, None) dash: bool = defaultfield("dash", bool, 1) noterm: bool = defaultfield("noterm", bool, 0) - github: Github = Github() + github: Github = field(default_factory=Github) def check_node_config(nodes): diff --git a/milabench/utils.py b/milabench/utils.py index 8495d117e..a046d3868 100644 --- a/milabench/utils.py +++ b/milabench/utils.py @@ -239,7 +239,8 @@ def select_nodes(nodes, n): else: ranked.append(node) - return ranked[: max(1, min(n, len(ranked)))] + selected = ranked[: max(1, min(n, len(ranked)))] + return selected def enumerate_rank(nodes): diff --git a/scripts/article/run_cuda.sh b/scripts/article/run_cuda.sh index df0cf2f92..b7b31eed3 100644 --- a/scripts/article/run_cuda.sh +++ b/scripts/article/run_cuda.sh @@ -8,6 +8,8 @@ export MILABENCH_BASE="$MILABENCH_WORDIR/results" export MILABENCH_VENV="$MILABENCH_WORDIR/env" export BENCHMARK_VENV="$MILABENCH_WORDIR/results/venv/torch" +export MILABENCH_SIZER_SAVE="$MILABENCH_WORDIR/scaling.yaml" + if [ -z "${MILABENCH_PREPARE}" ]; then export MILABENCH_PREPARE=0 @@ -40,13 +42,14 @@ install_prepare() { pip install -e $MILABENCH_SOURCE + milabench slurm_system > $MILABENCH_WORDIR/system.yaml # # Install milabench's benchmarks in their venv # - pip install torch - milabench pin --variant cuda --from-scratch $ARGS + # pip install torch + # milabench pin --variant cuda --from-scratch $ARGS milabench install --system $MILABENCH_WORDIR/system.yaml $ARGS which pip @@ -81,9 +84,10 @@ if [ "$MILABENCH_PREPARE" -eq 0 ]; then . $MILABENCH_WORDIR/env/bin/activate - # milabench pin --variant cuda --from-scratch + + # pip install torch + # milabench pin --variant cuda --from-scratch $ARGS # milabench install --system $MILABENCH_WORDIR/system.yaml --force $ARGS - # milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS # # Run the benchmakrs diff --git a/tests/test_command_reg/test_command_reg_one_node.txt b/tests/test_command_reg/test_command_reg_one_node.txt index 05a286f8a..3a511bb65 100644 --- a/tests/test_command_reg/test_command_reg_one_node.txt +++ b/tests/test_command_reg/test_command_reg_one_node.txt @@ -16,7 +16,7 @@ export MILABENCH_DIR_RUNS=$BASE/runs export MILABENCH_DIR_EXTRA=$BASE/extra/llm export MILABENCH_DIR_CACHE=$BASE/cache export OMP_NUM_THREADS=0 -export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' +export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "monogpu", "nlp", "nobatch"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' echo "---" echo "llama" @@ -326,21 +326,6 @@ time ( wait ) -echo "---" -echo "super-slomo" -echo "===========" -time ( - CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - wait -) - echo "---" echo "brax" echo "====" @@ -399,7 +384,7 @@ echo "---" echo "lightning-gpus" echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & wait ) @@ -422,7 +407,7 @@ echo "---" echo "dinov2-giant-gpus" echo "=================" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file $SRC/milabench/benchmarks/dinov2/src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file $SRC/milabench/benchmarks/dinov2/src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & wait ) @@ -445,7 +430,7 @@ echo "---" echo "llm-lora-ddp-gpus" echo "=================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-gpus/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-gpus/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & wait ) @@ -453,7 +438,7 @@ echo "---" echo "llm-lora-ddp-nodes" echo "==================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & wait ) @@ -461,7 +446,7 @@ echo "---" echo "llm-lora-mp-gpus" echo "================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_lora.yaml epochs=1 output_dir=$BASE/extra/llm-lora-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ safetensors=true metric_logger.log_dir=$BASE/extra/llm-lora-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" batch_size=8 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_lora.yaml epochs=1 output_dir=$BASE/extra/llm-lora-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ safetensors=true metric_logger.log_dir=$BASE/extra/llm-lora-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" batch_size=8 gradient_accumulation_steps=1 & wait ) @@ -469,7 +454,7 @@ echo "---" echo "llm-full-mp-gpus" echo "================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & wait ) @@ -477,7 +462,7 @@ echo "---" echo "llm-full-mp-nodes" echo "=================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & wait ) @@ -571,21 +556,6 @@ time ( wait ) -echo "---" -echo "rlhf_" -echo "=====" -time ( - CUDA_VISIBLE_DEVICES=0 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=1 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=2 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=3 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=4 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=5 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=6 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=7 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - wait -) - echo "---" echo "rlhf-single" echo "===========" @@ -609,3 +579,26 @@ time ( wait ) +echo "---" +echo "vjepa-single" +echo "============" +time ( + CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + wait +) + +echo "---" +echo "vjepa-gpus" +echo "==========" +time ( + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-gpus & + wait +) + diff --git a/tests/test_command_reg/test_command_reg_two_nodes.txt b/tests/test_command_reg/test_command_reg_two_nodes.txt index c84460dea..3004505de 100644 --- a/tests/test_command_reg/test_command_reg_two_nodes.txt +++ b/tests/test_command_reg/test_command_reg_two_nodes.txt @@ -16,7 +16,7 @@ export MILABENCH_DIR_RUNS=$BASE/runs export MILABENCH_DIR_EXTRA=$BASE/extra/llm export MILABENCH_DIR_CACHE=$BASE/cache export OMP_NUM_THREADS=0 -export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}, {"ip": "192.168.0.11", "main": false, "name": "1", "sshport": 22, "user": "username", "hostname": "192.168.0.11"}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' +export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}, {"ip": "192.168.0.11", "main": false, "name": "1", "sshport": 22, "user": "username", "hostname": "192.168.0.11"}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "sshport": 22, "user": "username", "hostname": "127.0.0.1"}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "monogpu", "nlp", "nobatch"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' echo "---" echo "llama" @@ -326,21 +326,6 @@ time ( wait ) -echo "---" -echo "super-slomo" -echo "===========" -time ( - CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/super-slomo/slomo/train.py --train_batch_size 64 --dataset_root $BASE/data/FakeImageNet --loader pytorch --num_workers 8 & - wait -) - echo "---" echo "brax" echo "====" @@ -400,7 +385,7 @@ echo "---" echo "lightning-gpus" echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & wait ) @@ -423,7 +408,7 @@ echo "---" echo "dinov2-giant-gpus" echo "=================" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file $SRC/milabench/benchmarks/dinov2/src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file $SRC/milabench/benchmarks/dinov2/src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & wait ) @@ -446,7 +431,7 @@ echo "---" echo "llm-lora-ddp-gpus" echo "=================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-gpus/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-gpus/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & wait ) @@ -454,8 +439,8 @@ echo "---" echo "llm-lora-ddp-nodes" echo "==================" time ( - $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & - ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & + $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --node-rank=0 --local-addr=127.0.0.1 --rdzv-conf=rank=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & + ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --node-rank=1 --local-addr=192.168.0.11 --rdzv-conf=rank=1 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml epochs=1 output_dir=$BASE/extra/llm-lora-ddp-nodes/output tokenizer.path=$BASE/data/llama3_8B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_8B/original checkpointer.output_dir=$BASE/data/llama3_8B/ metric_logger.log_dir=$BASE/extra/llm-lora-ddp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-8B" batch_size=8 gradient_accumulation_steps=8 & wait ) @@ -463,7 +448,7 @@ echo "---" echo "llm-lora-mp-gpus" echo "================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_lora.yaml epochs=1 output_dir=$BASE/extra/llm-lora-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ safetensors=true metric_logger.log_dir=$BASE/extra/llm-lora-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" batch_size=8 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/lora_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_lora.yaml epochs=1 output_dir=$BASE/extra/llm-lora-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ safetensors=true metric_logger.log_dir=$BASE/extra/llm-lora-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" batch_size=8 gradient_accumulation_steps=1 & wait ) @@ -471,7 +456,7 @@ echo "---" echo "llm-full-mp-gpus" echo "================" time ( - $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-gpus/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-gpus/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & wait ) @@ -479,8 +464,8 @@ echo "---" echo "llm-full-mp-nodes" echo "=================" time ( - $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & - ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & + $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --node-rank=0 --local-addr=127.0.0.1 --rdzv-conf=rank=0 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & + ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $BASE/venv/torch/bin/tune run --nnodes=2 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --node-rank=1 --local-addr=192.168.0.11 --rdzv-conf=rank=1 --nproc-per-node=8 -- $SRC/milabench/benchmarks/llm/recipes/full_finetune_distributed.py --config $SRC/milabench/benchmarks/llm/configs/llama3_70B_full.yaml epochs=1 output_dir=$BASE/extra/llm-full-mp-nodes/output tokenizer.path=$BASE/data/llama3_70B/original/tokenizer.model checkpointer.checkpoint_dir=$BASE/data/llama3_70B checkpointer.output_dir=$BASE/data/llama3_70B/ metric_logger.log_dir=$BASE/extra/llm-full-mp-nodes/metrics repo_id="meta-llama/Meta-Llama-3.1-70B" safetensors=true batch_size=2 gradient_accumulation_steps=1 & wait ) @@ -574,21 +559,6 @@ time ( wait ) -echo "---" -echo "rlhf_" -echo "=====" -time ( - CUDA_VISIBLE_DEVICES=0 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=1 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=2 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=3 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=4 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=5 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=6 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - CUDA_VISIBLE_DEVICES=7 $SRC/milabench/benchmarks/rlhf/main.py --output_dir $BASE/extra/rlhf_/output --model_name_or_path EleutherAI/pythia-1b-deduped --per_device_train_batch_size 64 --logging_strategy no --log_level critical --bf16 & - wait -) - echo "---" echo "rlhf-single" echo "===========" @@ -612,3 +582,26 @@ time ( wait ) +echo "---" +echo "vjepa-single" +echo "============" +time ( + CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-single & + wait +) + +echo "---" +echo "vjepa-gpus" +echo "==========" +time ( + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=static --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --local-ranks-filter=0 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/vjepa/main.py --batch_size 24 --num_workers 12 --dataset $BASE/data/FakeVideo/video_metainfo.csv --output $BASE/extra/vjepa-gpus & + wait +) + diff --git a/tests/test_mock.py b/tests/test_mock.py index 8e3944568..2e41a8388 100644 --- a/tests/test_mock.py +++ b/tests/test_mock.py @@ -14,7 +14,12 @@ "llm-lora-ddp-nodes", "llm-lora-mp-gpus", "llm-full-mp-gpus", - "llm-full-mp-nodes" + "llm-full-mp-nodes", +} + + +OVERSIZED_INSTALL_BENCHMARKS = { + } def run_cli(*args, expected_code=0, msg=None): @@ -88,6 +93,9 @@ def test_milabench(monkeypatch, bench, module_tmp_dir, standard_config): monkeypatch.setenv("MILABENCH_GPU_ARCH", "cuda") + if bench in OVERSIZED_INSTALL_BENCHMARKS: + return + with filecount_inc(module_tmp_dir, "install"): run_cli("install", *args, "--select", bench) @@ -111,6 +119,12 @@ def test_milabench(monkeypatch, bench, module_tmp_dir, standard_config): run_cli("run", *args, "--no-report", "--select", bench, "--run-name", str(bench)) + import shutil + import tempfile + shutil.rmtree(tempfile.gettempdir(), ignore_errors=True) + # shutil.rmtree(module_tmp_dir) + + ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) def cleanpath(out, tmppath): import subprocess diff --git a/tests/test_summary/test_report.txt b/tests/test_summary/test_report.txt index fe0105469..b9f6ce02a 100644 --- a/tests/test_summary/test_report.txt +++ b/tests/test_summary/test_report.txt @@ -5,8 +5,8 @@ Benchmark results Breakdown --------- -bench | fail | n | ngpu | perf | sem% | std% | peak_memory | score | weight -benchio | 0 | 4 | 0 | 7979.82 | 2.9% | 17.2% | nan | 7979.82 | 2.00 +bench | fail | n | ngpu | perf | sem% | std% | peak_memory | score | weight +benchio | 0 | 4 | 0 | 7979.82 | 2.9% | 17.2% | nan | 7979.82 | 2.00 Scores ------ diff --git a/tests/test_summary/test_report_folder_does_average.txt b/tests/test_summary/test_report_folder_does_average.txt index 50a4accd0..9fda7a9c2 100644 --- a/tests/test_summary/test_report_folder_does_average.txt +++ b/tests/test_summary/test_report_folder_does_average.txt @@ -5,8 +5,8 @@ Benchmark results Breakdown --------- -bench | fail | n | ngpu | perf | sem% | std% | peak_memory | score | weight -benchio | 0 | 6 | 0 | 7878.45 | 2.5% | 18.0% | 24456 | 7878.45 | 2.00 +bench | fail | n | ngpu | perf | sem% | std% | peak_memory | score | weight +benchio | 0 | 6 | 0 | 7878.45 | 2.5% | 18.0% | 24456 | 7878.45 | 2.00 Scores ------