diff --git a/scripts/bundle-html-dataset.sh b/scripts/bundle-html-dataset.sh
index f878c60143..90e9ac0081 100755
--- a/scripts/bundle-html-dataset.sh
+++ b/scripts/bundle-html-dataset.sh
@@ -19,8 +19,13 @@ psql -h $PGADDRESS -U cortex -t -o "$DTPATH/$CORPUSNAME-no_problem-tasks.txt" -c
psql -h $PGADDRESS -U cortex -t -o "$DTPATH/$CORPUSNAME-warning-tasks.txt" -c "SELECT entry FROM tasks WHERE corpus_id=$CORPUSID and service_id=$SERVICEID and status=-2"
psql -h $PGADDRESS -U cortex -t -o "$DTPATH/$CORPUSNAME-error-tasks.txt" -c "SELECT entry FROM tasks WHERE corpus_id=$CORPUSID and service_id=$SERVICEID and status=-3"
+# Specific to the 08.2019 dataset
+split -l 500000 arxmliv-warning-tasks.txt
+mv xaa arxmliv-warning_1-tasks.txt
+mv xab arxmliv-warning_2-tasks.txt
+
# For each severity, prepare a dataset archive of HTML files
-severitylist="no_problem warning error"
+severitylist="no_problem warning_1 warning_2 error"
for severity in $severitylist; do
mkdir $DTPATH/$severity