fix: Do not change layerdrop and padding in a multi-GPU setup, as it …

…is not required
alexandrainst · Sep 26, 2024 · deff165 · deff165
1 parent 5100bf5
commit deff165
Show file tree

Hide file tree

Showing 12 changed files with 10 additions and 25 deletions.
diff --git a/config/asr_finetuning.yaml b/config/asr_finetuning.yaml
@@ -29,7 +29,6 @@ streaming: true
 cache_dir: null
 
 # Can be `longest`, `max_length` or `do_not_pad`
-# NOTE: This is automatically set to `max_length` in a multi-gpu setting
 padding: longest
 
 # This is a list of the sampling probability of each dataset, where null means that

diff --git a/config/model/test-wav2vec2.yaml b/config/model/test-wav2vec2.yaml
@@ -19,7 +19,7 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
 ctc_loss_reduction: sum
 
 # Decoder hyperparameters

diff --git a/config/model/test-whisper.yaml b/config/model/test-whisper.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/config/model/wav2vec2-large.yaml b/config/model/wav2vec2-large.yaml
@@ -19,7 +19,7 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
 ctc_loss_reduction: sum
 
 # Decoder hyperparameters

diff --git a/config/model/wav2vec2-medium.yaml b/config/model/wav2vec2-medium.yaml
@@ -19,7 +19,7 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
 ctc_loss_reduction: sum
 
 # Decoder hyperparameters

diff --git a/config/model/wav2vec2-small.yaml b/config/model/wav2vec2-small.yaml
@@ -19,7 +19,7 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
 ctc_loss_reduction: sum
 
 # Decoder hyperparameters

diff --git a/config/model/whisper-large.yaml b/config/model/whisper-large.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/config/model/whisper-medium.yaml b/config/model/whisper-medium.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/config/model/whisper-small.yaml b/config/model/whisper-small.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/config/model/whisper-xsmall.yaml b/config/model/whisper-xsmall.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/config/model/whisper-xxsmall.yaml b/config/model/whisper-xxsmall.yaml
@@ -16,4 +16,4 @@ mask_time_prob: 0.5
 mask_time_length: 10
 mask_feature_prob: 0.5
 mask_feature_length: 64
-layerdrop: 0.1  # NOTE: This will automatically be set to 0 in a multi-gpu setting
+layerdrop: 0.1
diff --git a/src/scripts/finetune_asr_model.py b/src/scripts/finetune_asr_model.py
@@ -44,20 +44,6 @@ def main(config: DictConfig) -> None:
                     "GPU training"
                 )
             config.gradient_checkpointing = False
-        # if "layerdrop" in config.model and config.model.layerdrop != 0.0:
-        #     if is_main_process:
-        #         logger.info(
-        #             "Forcing `layerdrop` to be 0.0 as this is required in a multi-GPU "
-        #             "training"
-        #         )
-        #     config.model.layerdrop = 0.0
-        # if config.padding != "max_length":
-        #     if is_main_process:
-        #         logger.info(
-        #             "Forcing `padding` to be 'max_length' as this is required in a "
-        #             "multi-GPU training"
-        #         )
-        #     config.padding = "max_length"
 
     finetune(config=config)