diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java index b5a0e52c4..9c42a0ef1 100644 --- a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java +++ b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java @@ -497,9 +497,13 @@ public int run(String[] args) throws Exception { int tputPlotInterval = DEFAULT_TPUT_PLOT_INTERVAL; long tputSampleUnit = DEFAULT_TPUT_SAMPLE_UNIT; float threshold = 0.5f; + int analyzeNumReduceTasks = 1; String version="TestFDSIO.0.0.4 Enhanced Version"; - String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] [-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] [-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal]"; + String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] " + + "[-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] " + + "[-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] " + + "[-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal] [-analyzeNumReduceTasks]"; System.out.println(version); if (args.length == 0) { @@ -561,6 +565,8 @@ else if (unit.equalsIgnoreCase("g")) else { LOG.warn("Illegal format of parameter \"sampleUnit\", Ignored."); } + } else if (args[i].equals("-analyzeNumReduceTasks")) { + analyzeNumReduceTasks = Integer.parseInt(args[++i]); } } @@ -613,7 +619,7 @@ else if (unit.equalsIgnoreCase("g")) tputFileName, tputReportEach, tputReportTotal);*/ runAnalyse(fs, fsConfig, testType, execTime, resFileName, nrFiles, fileSize*MEGA, tStart, tputPlotInterval, tputSampleUnit,(int)(mapSlots*threshold), - tputFileName, tputReportEach, tputReportTotal); + tputFileName, tputReportEach, tputReportTotal, analyzeNumReduceTasks); } } catch(Exception e) { System.err.print(StringUtils.stringifyException(e)); @@ -838,7 +844,8 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig, long fileSize, long tStart, int plotInterval, long sampleUnit, int threshold, String tputResFileName, - boolean tputReportEach, boolean tputReportTotal) throws IOException { + boolean tputReportEach, boolean tputReportTotal, + int analyzeNumReduceTasks) throws IOException { long t1 = System.currentTimeMillis(); Path reduceFile; if (testType == TEST_TYPE_WRITE) @@ -942,7 +949,7 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig, job.setReducerClass(_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); -// job.setNumReduceTasks(1); + job.setNumReduceTasks(analyzeNumReduceTasks); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, reduceFile); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, DfsioeConfig.getInstance().getReportDir(fsConfig)); job.waitForCompletion(true); diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py index a3b91d064..235181c5b 100644 --- a/bin/functions/hibench_prop_env_mapping.py +++ b/bin/functions/hibench_prop_env_mapping.py @@ -163,6 +163,7 @@ RD_FILE_SIZE="hibench.dfsioe.read.file_size", WT_NUM_OF_FILES="hibench.dfsioe.write.number_of_files", WT_FILE_SIZE="hibench.dfsioe.write.file_size", + NUM_REDUCE_TASKS_ANALYZE="hibench.dfsioe.analyze.num_reduce_tasks", MAP_JAVA_OPTS="hibench.dfsioe.map.java_opts", RED_JAVA_OPTS="hibench.dfsioe.red.java_opts", # For NWeight diff --git a/bin/workloads/micro/dfsioe/hadoop/run_read.sh b/bin/workloads/micro/dfsioe/hadoop/run_read.sh index c4b7245b8..568ec5749 100755 --- a/bin/workloads/micro/dfsioe/hadoop/run_read.sh +++ b/bin/workloads/micro/dfsioe/hadoop/run_read.sh @@ -29,7 +29,7 @@ rmr_hdfs $INPUT_HDFS/io_read || true rmr_hdfs $INPUT_HDFS/_* || true SIZE=`dir_size $INPUT_HDFS` -OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}" +OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}" OLD_HADOOP_OPTS=${HADOOP_OPTS:-} export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} " diff --git a/bin/workloads/micro/dfsioe/hadoop/run_write.sh b/bin/workloads/micro/dfsioe/hadoop/run_write.sh index 39c356f33..878bfd694 100755 --- a/bin/workloads/micro/dfsioe/hadoop/run_write.sh +++ b/bin/workloads/micro/dfsioe/hadoop/run_write.sh @@ -29,7 +29,7 @@ rmr_hdfs ${OUTPUT_HDFS} || true # pre-running SIZE=`dir_size $INPUT_HDFS` -OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}" +OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}" OLD_HADOOP_OPTS=${HADOOP_OPTS:-} export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} "