Merge pull request #64 from lissyx/lang-folder

Update to DeepSpeech 0.6.0a7 and move language-specifics to a language folder
common-voice · Sep 25, 2019 · 9f87200 · 9f87200
2 parents a3ed592 + e3fcfbb
commit 9f87200
Show file tree

Hide file tree

Showing 13 changed files with 40 additions and 32 deletions.
diff --git a/DeepSpeech/CONTRIBUTING.md b/DeepSpeech/CONTRIBUTING.md
@@ -54,7 +54,7 @@ will be copied from that place.
 Training successfull on:
  - 64GB RAM
  - 2x RTX 2080 Ti
- - Debian Sid, kernel 4.19, driver 418.56
+ - Debian Sid, kernel 5.2, driver 430.50
  - With ~250h of audio, one training epoch takes ~15min, and validation takes ~50s
 
 ## Run the image:
@@ -63,7 +63,7 @@ The `mount` option is really important: this is where intermediate files, traini
 well as final model files will be produced.
 
 ```
-$ docker run --runtime=nvidia --mount type=bind,src=PATH/TO/HOST/DIRECTORY,dst=/mnt <docker-image-id>
+$ docker run --tty --runtime=nvidia --mount type=bind,src=PATH/TO/HOST/DIRECTORY,dst=/mnt <docker-image-id>
 ```
 
 Training parameters can be changed at runtime as well using environment variables.
diff --git a/DeepSpeech/Dockerfile.train b/DeepSpeech/Dockerfile.train
@@ -1,18 +1,18 @@
 FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04
 
 ARG ds_repo=mozilla/DeepSpeech
-ARG ds_branch=6e53fd8fe0f1c7469dcf946a2eeed4d7aace6afd
-ARG ds_sha1=6e53fd8fe0f1c7469dcf946a2eeed4d7aace6afd
+ARG ds_branch=513c8e9ab7e2abc9b3fc62e7f1514b83156e8f97
+ARG ds_sha1=513c8e9ab7e2abc9b3fc62e7f1514b83156e8f97
 ARG kenlm_repo=kpu/kenlm
 ARG kenlm_branch=2ad7cb56924cd3c6811c604973f592cb5ef604eb
 
-ARG model_languag=fr
+ARG model_language=fr
 
 ARG batch_size=68
 ARG n_hidden=2048
-ARG epochs=30
-ARG learning_rate=0.00025
-ARG dropout=0.15
+ARG epochs=60
+ARG learning_rate=0.0001
+ARG dropout=0.2
 ARG lm_alpha=0.75
 ARG lm_beta=1.85
 ARG early_stop=1
@@ -72,13 +72,17 @@ RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
     unzip \
     pixz \
     sox \
+    sudo \
     libsox-fmt-all \
     locales locales-all \
     xz-utils
 
 RUN groupadd -g 999 trainer && \
     adduser --system --uid 999 --group trainer
 
+RUN echo "trainer ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/trainer && \
+    chmod 0440 /etc/sudoers.d/trainer
+
 # Below that point, nothing requires being root
 USER trainer
 
@@ -101,7 +105,7 @@ RUN pip install `python util/taskcluster.py --decoder`
 RUN TASKCLUSTER_SCHEME="https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s" python util/taskcluster.py \
 	--target="$(pwd)" \
 	--artifact="convert_graphdef_memmapped_format" \
-	--branch="r1.13" && chmod +x convert_graphdef_memmapped_format
+	--branch="r1.14" && chmod +x convert_graphdef_memmapped_format
 
 WORKDIR $HOMEDIR
 
@@ -116,8 +120,10 @@ RUN git clone https://github.com/$kenlm_repo.git && cd kenlm && git checkout $ke
 ENV PATH="$HOMEDIR/kenlm/build/bin/:$PATH"
 
 # Copy now so that docker build can leverage caches
-COPY --chown=trainer:trainer *.sh $HOMEDIR/
+COPY --chown=trainer:trainer checks.sh generate_alphabet.sh package.sh run.sh $HOMEDIR/
+
+COPY --chown=trainer:trainer ${MODEL_LANGUAGE}/*.sh $HOMEDIR/${MODEL_LANGUAGE}/
 
-COPY --chown=trainer:trainer lingua_libre_skiplist.txt $HOMEDIR/
+COPY --chown=trainer:trainer ${MODEL_LANGUAGE}/lingua_libre_skiplist.txt $HOMEDIR/${MODEL_LANGUAGE}/
 
 ENTRYPOINT "$HOMEDIR/run.sh"
diff --git a/DeepSpeech/checks.sh b/DeepSpeech/checks.sh
@@ -2,6 +2,11 @@
 
 set -xe
 
+sudo id
+
+# Workaround libnvidia-ml.so: https://github.com/NVIDIA/nvidia-docker/issues/854#issuecomment-451464721
+sudo /sbin/ldconfig
+
 nvidia-smi
 
 for dir in $(find /mnt/ -maxdepth 1 -type d);

diff --git a/DeepSpeech/fr/.run.sh.swp b/DeepSpeech/fr/.run.sh.swp
diff --git a/DeepSpeech/build_lm.sh → DeepSpeech/fr/build_lm.sh b/DeepSpeech/build_lm.sh → DeepSpeech/fr/build_lm.sh
diff --git a/DeepSpeech/import_cvfr.sh → DeepSpeech/fr/import_cvfr.sh b/DeepSpeech/import_cvfr.sh → DeepSpeech/fr/import_cvfr.sh
diff --git a/DeepSpeech/import_lingualibre.sh → DeepSpeech/fr/import_lingualibre.sh b/DeepSpeech/import_lingualibre.sh → DeepSpeech/fr/import_lingualibre.sh
@@ -13,7 +13,7 @@ pushd $HOME/ds/
 			--iso639-3 fra                                  \
 			--english-name French                           \
 			${IMPORT_AS_ENGLISH}                            \
-			--bogus-records $HOME/lingua_libre_skiplist.txt \
+			--bogus-records $HOME/${MODEL_LANGUAGE}/lingua_libre_skiplist.txt \
 			/mnt/extracted/data/lingualibre
 	fi;
 popd
diff --git a/DeepSpeech/import_trainingspeech.sh → DeepSpeech/fr/import_trainingspeech.sh b/DeepSpeech/import_trainingspeech.sh → DeepSpeech/fr/import_trainingspeech.sh
diff --git a/DeepSpeech/lingua_libre_skiplist.txt → DeepSpeech/fr/lingua_libre_skiplist.txt b/DeepSpeech/lingua_libre_skiplist.txt → DeepSpeech/fr/lingua_libre_skiplist.txt
diff --git a/DeepSpeech/fr/run.sh b/DeepSpeech/fr/run.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -xe
+
+${MODEL_LANGUAGE}/import_cvfr.sh
+
+${MODEL_LANGUAGE}/import_lingualibre.sh
+
+${MODEL_LANGUAGE}/import_trainingspeech.sh
+
+generate_alphabet.sh
+
+${MODEL_LANGUAGE}/build_lm.sh
+
+${MODEL_LANGUAGE}/train.sh
diff --git a/DeepSpeech/train_fr.sh → DeepSpeech/fr/train.sh b/DeepSpeech/train_fr.sh → DeepSpeech/fr/train.sh
@@ -23,6 +23,7 @@ pushd $HOME/ds/
 		fi;
 
 		python -u DeepSpeech.py \
+			--show_progressbar \
 			--alphabet_config_path /mnt/models/alphabet.txt \
 			--lm_binary_path /mnt/lm/lm.binary \
 			--lm_trie_path /mnt/lm/trie \
@@ -40,9 +41,6 @@ pushd $HOME/ds/
 			--lm_alpha ${LM_ALPHA} \
 			--lm_beta ${LM_BETA} \
 			${EARLY_STOP_FLAG} \
-			--display_step 0 \
-			--validation_step 1 \
-			--checkpoint_step 1 \
 			--checkpoint_dir /mnt/checkpoints/ \
 			--export_dir /mnt/models/ \
 			--export_language "fra"
@@ -58,7 +56,6 @@ pushd $HOME/ds/
 			--checkpoint_dir /mnt/checkpoints/ \
 			--export_dir /mnt/models/ \
 			--export_tflite \
-			--nouse_seq_length \
 			--export_language "fra"
 	fi;
 

diff --git a/DeepSpeech/run.sh b/DeepSpeech/run.sh
@@ -11,6 +11,6 @@ checks.sh
 export TMP=/mnt/tmp
 export TEMP=/mnt/tmp
 
-source run_${MODEL_LANGUAGE}.sh
+${MODEL_LANGUAGE}/run.sh
 
 package.sh
diff --git a/DeepSpeech/run_fr.sh b/DeepSpeech/run_fr.sh