Intel-bigdata · Martin-Jia · Oct 15, 2019 · Oct 15, 2019 · Oct 15, 2019
diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java
@@ -497,9 +497,13 @@ public int run(String[] args) throws Exception {
     int tputPlotInterval = DEFAULT_TPUT_PLOT_INTERVAL;
     long tputSampleUnit = DEFAULT_TPUT_SAMPLE_UNIT;
     float threshold = 0.5f;
+    int analyzeNumReduceTasks = 1;
 
     String version="TestFDSIO.0.0.4 Enhanced Version";
-    String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] [-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] [-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal]";
+    String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] " +
+                   "[-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] " +
+                   "[-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] " +
+                   "[-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal] [-analyzeNumReduceTasks]";
 
     System.out.println(version);
     if (args.length == 0) {
@@ -561,6 +565,8 @@ else if (unit.equalsIgnoreCase("g"))
             else {
                 LOG.warn("Illegal format of parameter \"sampleUnit\", Ignored.");
             }
+        } else if (args[i].equals("-analyzeNumReduceTasks")) {
+            analyzeNumReduceTasks = Integer.parseInt(args[++i]);
         }
     }
 
@@ -613,7 +619,7 @@ else if (unit.equalsIgnoreCase("g"))
                     tputFileName, tputReportEach, tputReportTotal);*/
             runAnalyse(fs, fsConfig, testType, execTime, resFileName, nrFiles, fileSize*MEGA, 
                     tStart, tputPlotInterval, tputSampleUnit,(int)(mapSlots*threshold),
-                    tputFileName, tputReportEach, tputReportTotal);
+                    tputFileName, tputReportEach, tputReportTotal, analyzeNumReduceTasks);
         }
     } catch(Exception e) {
         System.err.print(StringUtils.stringifyException(e));
@@ -838,7 +844,8 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig,
 								         long fileSize, long tStart,
 								         int plotInterval, long sampleUnit,
 								         int threshold, String tputResFileName,
-								         boolean tputReportEach, boolean tputReportTotal) throws IOException {
+								         boolean tputReportEach, boolean tputReportTotal,
+                                         int analyzeNumReduceTasks) throws IOException {
 		 long t1 = System.currentTimeMillis();
 		 Path reduceFile;
 		 if (testType == TEST_TYPE_WRITE)
@@ -942,7 +949,7 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig,
 			 job.setReducerClass(_Reducer.class);
 			 job.setOutputKeyClass(Text.class);
 			 job.setOutputValueClass(Text.class);
-//			 job.setNumReduceTasks(1);
+			 job.setNumReduceTasks(analyzeNumReduceTasks);
 			 org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, reduceFile);
 			 org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, DfsioeConfig.getInstance().getReportDir(fsConfig));
 			 job.waitForCompletion(true);

diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py
@@ -163,6 +163,7 @@
     RD_FILE_SIZE="hibench.dfsioe.read.file_size",
     WT_NUM_OF_FILES="hibench.dfsioe.write.number_of_files",
     WT_FILE_SIZE="hibench.dfsioe.write.file_size",
+    NUM_REDUCE_TASKS_ANALYZE="hibench.dfsioe.analyze.num_reduce_tasks",
     MAP_JAVA_OPTS="hibench.dfsioe.map.java_opts",
     RED_JAVA_OPTS="hibench.dfsioe.red.java_opts",
     # For NWeight

diff --git a/bin/workloads/micro/dfsioe/hadoop/run_read.sh b/bin/workloads/micro/dfsioe/hadoop/run_read.sh
@@ -29,7 +29,7 @@ rmr_hdfs $INPUT_HDFS/io_read || true
 rmr_hdfs $INPUT_HDFS/_* || true
 
 SIZE=`dir_size $INPUT_HDFS`
-OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}"
+OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}"
 
 OLD_HADOOP_OPTS=${HADOOP_OPTS:-}
 export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} "

diff --git a/bin/workloads/micro/dfsioe/hadoop/run_write.sh b/bin/workloads/micro/dfsioe/hadoop/run_write.sh
@@ -29,7 +29,7 @@ rmr_hdfs ${OUTPUT_HDFS} || true
 
 # pre-running
 SIZE=`dir_size $INPUT_HDFS`
-OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}"
+OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}"
 
 OLD_HADOOP_OPTS=${HADOOP_OPTS:-}
 export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} "