#!/bin/sh #/*************************************************************************** # * Copyright (C) 2009 by Andy Pavlo, Brown University * # * http://www.cs.brown.edu/~pavlo/ * # * * # * Permission is hereby granted, free of charge, to any person obtaining * # * a copy of this software and associated documentation files (the * # * "Software"), to deal in the Software without restriction, including * # * without limitation the rights to use, copy, modify, merge, publish, * # * distribute, sublicense, and/or sell copies of the Software, and to * # * permit persons to whom the Software is furnished to do so, subject to * # * the following conditions: * # * * # * The above copyright notice and this permission notice shall be * # * included in all copies or substantial portions of the Software. * # * * # * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * # * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * # * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.* # * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * # * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * # * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * # * OTHER DEALINGS IN THE SOFTWARE. * # ***************************************************************************/ ## ===================================================== ## DATA OPTIONS ## ===================================================== COMBINE_RESULTS=1 ## Combine Final Results into Single Output File COMPRESS_DATA=0 ## Compress Input Data and Intermediate Results TUPLE_DATA=0 ## Use TupleWritable for multi-column values SEQUENCE_FILE=0 ## Use SequenceFile Input Formats SPLIT_DATA=0 ## Split input files into separate, smaller subsets INPUT_LIMIT=0 ## How many lines to limit for Input Files when loading ## ===================================================== ## RUN TIME OPTIONS ## ===================================================== ## ## Number of Trials Per Cluster Configuration ## NUM_OF_TRIALS=3 ## ## Debug Output ## This is flag is used in both the DataLoader and the Benchmarks ## DEBUG=0 ## ## Start the daemons for each new configuration ## START_HDFS_MASTER=1 START_JOB_TRACKER=1 ## ## Load/Unload Options ## REFORMAT_HDFS=1 # Reformat HDFS before each run LOAD_INPUT_DATA=1 # Load input data before executing CLEANUP_INPUT_DATA=0 # Remove temporary files after loading input data (compression) UNLOAD_INPUT_DATA=0 # Delete input data at shutdown UNLOAD_OUTPUT_DATA=0 # Delete output data at shutdown ## ## Cache Control ## FLUSH_CACHE_BEFORE_LOAD=1 FLUSH_CACHE_BEFORE_TRIAL=0 FLUSH_CACHE_BEFORE_BENCHMARK=1 ## ## Directory of slave files to use ## This directory should contain the following files: ## slaves-001 (1 host) ## slaves-010 (10 hosts) ## slaves-025 (25 hosts) ## slaves-050 (50 hosts) ## slaves-100 (100 hosts) ## slaves-200 (200 hosts) ## SLAVES_CONF_DIR="$HADOOP_CONF_DIR/slaves-000" ## ## What benchmarks to actually execute ## BENCHMARK_SETS=( \ "SORTGREP" \ "BENCHMARKS2" \ ) ## ===================================================== ## SORTGREP BENCHMARK CONFIGURATION ## ===================================================== ## ---------------------------- ## Benchmarks ## ---------------------------- SORTGREP_BENCHMARK_TYPES=( \ "Grep" \ ) ## ---------------------------- ## Data Sets ## ---------------------------- SORTGREP_DATA_SETS=( \ "535MB" \ "1TB" \ ) ## ---------------------------- ## 535MB Data Set Slave Configurations ## ---------------------------- SORTGREP_535MB_CLUSTER_CONFIGURATIONS=( \ "slaves-001" \ "slaves-010" \ "slaves-025" \ "slaves-050" \ "slaves-100" \ ) ## ---------------------------- ## 1TB Data Set Slave Configurations ## ---------------------------- SORTGREP_1TB_CLUSTER_CONFIGURATIONS=( \ "slaves-025" \ "slaves-050" \ "slaves-100" \ ) ## ---------------------------- ## Input Data ## ---------------------------- SORTGREP_INPUT_FILES=( ) # LEAVE BLANK SORTGREP_INPUT_DIR=$ORIG_SORTGREP_DATA_DIR ## ===================================================== ## BENCHMARKS2 CONFIGURATION ## ===================================================== ## ---------------------------- ## Benchmarks ## ---------------------------- BENCHMARKS2_BENCHMARK_TYPES=( \ "Benchmark1" \ "Benchmark2" \ "Benchmark2a" \ "Benchmark3" \ "Benchmark4" \ ) ## ---------------------------- ## Data Sets ## ---------------------------- BENCHMARKS2_DATA_SETS=( \ "FULL" \ ) ## ---------------------------- ## Cluster Configurations ## ---------------------------- BENCHMARKS2_FULL_CLUSTER_CONFIGURATIONS=( \ "slaves-001" \ "slaves-010" \ "slaves-025" \ "slaves-050" \ "slaves-100" \ ) ## ---------------------------- ## Input Data ## ---------------------------- BENCHMARKS2_INPUT_FILES=( \ "Rankings.dat" \ "UserVisits.dat" \ "docs" \ ) BENCHMARKS2_INPUT_DIR=$ORIG_BENCHMARK2_DATA_DIR