From e5f3592b9f75b7c72bdee3e446135f0303d1275a Mon Sep 17 00:00:00 2001 From: martinJia Date: Tue, 15 Oct 2019 13:43:45 +0800 Subject: [PATCH 1/3] change to TestDFSIOEnh.java to enable configuable reduce task number of analysis part --- .../org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java index b5a0e52c4..42a8958f9 100644 --- a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java +++ b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java @@ -497,9 +497,13 @@ public int run(String[] args) throws Exception { int tputPlotInterval = DEFAULT_TPUT_PLOT_INTERVAL; long tputSampleUnit = DEFAULT_TPUT_SAMPLE_UNIT; float threshold = 0.5f; + int analyzeNumReduceTasks = 1; String version="TestFDSIO.0.0.4 Enhanced Version"; - String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] [-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] [-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal]"; + String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB]" + + " [-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] " + + "[-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] [-sumThreshold 0.0~1.0]" + + " [-tputReportEach] [-tputReportTotal] [-analyzeNumReduceTasks]"; System.out.println(version); if (args.length == 0) { @@ -561,6 +565,8 @@ else if (unit.equalsIgnoreCase("g")) else { LOG.warn("Illegal format of parameter \"sampleUnit\", Ignored."); } + } else if (args[i].equals("-analyzeNumReduceTasks")) { + analyzeNumReduceTasks = Integer.parseInt(args[++i]); } } @@ -613,7 +619,7 @@ else if (unit.equalsIgnoreCase("g")) tputFileName, tputReportEach, tputReportTotal);*/ runAnalyse(fs, fsConfig, testType, execTime, resFileName, nrFiles, fileSize*MEGA, tStart, tputPlotInterval, tputSampleUnit,(int)(mapSlots*threshold), - tputFileName, tputReportEach, tputReportTotal); + tputFileName, tputReportEach, tputReportTotal, analyzeNumReduceTasks); } } catch(Exception e) { System.err.print(StringUtils.stringifyException(e)); @@ -838,7 +844,8 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig, long fileSize, long tStart, int plotInterval, long sampleUnit, int threshold, String tputResFileName, - boolean tputReportEach, boolean tputReportTotal) throws IOException { + boolean tputReportEach, boolean tputReportTotal, + int analyzeNumReduceTasks) throws IOException { long t1 = System.currentTimeMillis(); Path reduceFile; if (testType == TEST_TYPE_WRITE) @@ -942,7 +949,7 @@ protected static void runAnalyse(FileSystem fs, Configuration fsConfig, job.setReducerClass(_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); -// job.setNumReduceTasks(1); + job.setNumReduceTasks(analyzeNumReduceTasks); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, reduceFile); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, DfsioeConfig.getInstance().getReportDir(fsConfig)); job.waitForCompletion(true); From bfcb6e0546b542ccff2be3280f478c30b9f98680 Mon Sep 17 00:00:00 2001 From: martinJia Date: Tue, 15 Oct 2019 14:22:23 +0800 Subject: [PATCH 2/3] add some configurations that can modify the number of reduce tasks of the analyze procedure --- .../java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java | 8 ++++---- bin/functions/hibench_prop_env_mapping.py | 1 + bin/workloads/micro/dfsioe/hadoop/run_read.sh | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java index 42a8958f9..9c42a0ef1 100644 --- a/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java +++ b/autogen/src/main/java/org/apache/hadoop/fs/dfsioe/TestDFSIOEnh.java @@ -500,10 +500,10 @@ public int run(String[] args) throws Exception { int analyzeNumReduceTasks = 1; String version="TestFDSIO.0.0.4 Enhanced Version"; - String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB]" + - " [-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] " + - "[-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] [-sumThreshold 0.0~1.0]" + - " [-tputReportEach] [-tputReportTotal] [-analyzeNumReduceTasks]"; + String usage = "Usage: TestFDSIOEnh -read | -write | -skipAnalyze | -clean [-nrFiles N] [-fileSize MB] " + + "[-resFile resultFileName] [-bufferSize Bytes] [-tputFile AggregatedThroughputCSVFileName] " + + "[-sampleInterval Miliseconds] [-plotInterval Miliseconds] [-sampleUnit g|m|k|b] " + + "[-sumThreshold 0.0~1.0] [-tputReportEach] [-tputReportTotal] [-analyzeNumReduceTasks]"; System.out.println(version); if (args.length == 0) { diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py index a3b91d064..235181c5b 100644 --- a/bin/functions/hibench_prop_env_mapping.py +++ b/bin/functions/hibench_prop_env_mapping.py @@ -163,6 +163,7 @@ RD_FILE_SIZE="hibench.dfsioe.read.file_size", WT_NUM_OF_FILES="hibench.dfsioe.write.number_of_files", WT_FILE_SIZE="hibench.dfsioe.write.file_size", + NUM_REDUCE_TASKS_ANALYZE="hibench.dfsioe.analyze.num_reduce_tasks", MAP_JAVA_OPTS="hibench.dfsioe.map.java_opts", RED_JAVA_OPTS="hibench.dfsioe.red.java_opts", # For NWeight diff --git a/bin/workloads/micro/dfsioe/hadoop/run_read.sh b/bin/workloads/micro/dfsioe/hadoop/run_read.sh index c4b7245b8..568ec5749 100755 --- a/bin/workloads/micro/dfsioe/hadoop/run_read.sh +++ b/bin/workloads/micro/dfsioe/hadoop/run_read.sh @@ -29,7 +29,7 @@ rmr_hdfs $INPUT_HDFS/io_read || true rmr_hdfs $INPUT_HDFS/_* || true SIZE=`dir_size $INPUT_HDFS` -OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}" +OPTION="-read -nrFiles ${RD_NUM_OF_FILES} -fileSize ${RD_FILE_SIZE} -bufferSize 131072 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}" OLD_HADOOP_OPTS=${HADOOP_OPTS:-} export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} " From ba83dff23bc9b4fa59817d52530755ed3022bb14 Mon Sep 17 00:00:00 2001 From: martinJia Date: Tue, 15 Oct 2019 14:46:35 +0800 Subject: [PATCH 3/3] add configuration to run_write.sh --- bin/workloads/micro/dfsioe/hadoop/run_write.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/workloads/micro/dfsioe/hadoop/run_write.sh b/bin/workloads/micro/dfsioe/hadoop/run_write.sh index 39c356f33..878bfd694 100755 --- a/bin/workloads/micro/dfsioe/hadoop/run_write.sh +++ b/bin/workloads/micro/dfsioe/hadoop/run_write.sh @@ -29,7 +29,7 @@ rmr_hdfs ${OUTPUT_HDFS} || true # pre-running SIZE=`dir_size $INPUT_HDFS` -OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS}" +OPTION="-write -nrFiles ${WT_NUM_OF_FILES} -fileSize ${WT_FILE_SIZE} -bufferSize 4096 -plotInteval 1000 -sampleUnit m -sampleInteval 200 -sumThreshold 0.5 -tputReportTotal -Dtest.build.data=${INPUT_HDFS} -analyzeNumReduceTasks ${NUM_REDUCE_TASKS_ANALYZE}" OLD_HADOOP_OPTS=${HADOOP_OPTS:-} export HADOOP_OPTS="${HADOOP_OPTS:-} -Dtest.build.data=${INPUT_HDFS} "