List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults// ww w. j av a 2 s . c om int numBspTask = 1; // CPU + GPU tasks int numGpuBspTask = 1; // GPU tasks int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; int maxIteration = 3; // 150; int matrixRank = 3; int skipCount = 1; double alpha = ALPHA; int userCount = 0; int itemCount = 0; int percentNonZeroValues = 0; int GPUPercentage = 20; boolean useTestExampleInput = true; boolean isDebugging = true; String inputFile = ""; String separator = "\\t"; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length >= 14) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); maxIteration = Integer.parseInt(args[4]); matrixRank = Integer.parseInt(args[5]); skipCount = Integer.parseInt(args[6]); alpha = Double.parseDouble(args[7]); userCount = Integer.parseInt(args[8]); itemCount = Integer.parseInt(args[9]); percentNonZeroValues = Integer.parseInt(args[10]); GPUPercentage = Integer.parseInt(args[11]); useTestExampleInput = Boolean.parseBoolean(args[12]); isDebugging = Boolean.parseBoolean(args[13]); // optional parameters if (args.length > 14) { inputFile = args[14]; } if (args.length > 15) { separator = args[15]; } } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println( " Argument5=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument6=matrixRank | matrixRank (" + matrixRank + ")"); System.out.println(" Argument7=skipCount | skipCount (" + skipCount + ")"); System.out.println(" Argument8=alpha | alpha (" + alpha + ")"); System.out.println(" Argument9=userCount | userCount (" + userCount + ")"); System.out.println(" Argument10=itemCount | itemCount (" + itemCount + ")"); System.out.println(" Argument11=percentNonZeroValues | percentNonZeroValues (" + percentNonZeroValues + ")"); System.out.println(" Argument12=GPUPercentage (percentage of input)"); System.out.println(" Argument13=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument14=debug | Enable debugging (true|false=default)"); System.out.println(" Argument15=inputFile (optional) | MovieLens inputFile"); System.out.println(" Argument16=separator (optional) | default '" + separator + "' "); return; } } // Check if inputFile exists if ((!inputFile.isEmpty()) && (!new File(inputFile).exists())) { System.out.println("Error: inputFile: " + inputFile + " does not exist!"); return; } // Check parameters if ((inputFile.isEmpty()) && (!useTestExampleInput) && (userCount <= 0) && (itemCount <= 0) && (percentNonZeroValues <= 0)) { System.out.println("Invalid parameter: userCount: " + userCount + " itemCount: " + itemCount + " percentNonZeroValues: " + percentNonZeroValues); return; } // Check if blockSize < matrixRank when using GPU if ((numGpuBspTask > 0) && (blockSize < matrixRank)) { System.out.println("Error: BlockSize < matrixRank"); return; } // Check GPUPercentage if ((GPUPercentage < 0) && (GPUPercentage > 100)) { System.out.println("Error: GPUPercentage must be between 0 and 100 percent"); return; } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", isDebugging); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); conf.setInt(OnlineCF.CONF_ITERATION_COUNT, maxIteration); conf.setInt(OnlineCF.CONF_MATRIX_RANK, matrixRank); conf.setInt(OnlineCF.CONF_SKIP_COUNT, skipCount); // Debug output LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("GPUPercentage: " + GPUPercentage); LOG.info("isDebugging: " + isDebugging); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("maxIteration: " + maxIteration); LOG.info("matrixRank: " + matrixRank); LOG.info("skipCount: " + skipCount); LOG.info("alpha: " + alpha); LOG.info("userCount: " + userCount); LOG.info("itemCount: " + itemCount); LOG.info("percentNonZeroValues: " + percentNonZeroValues); if (!inputFile.isEmpty()) { LOG.info("inputFile: " + inputFile); LOG.info("separator: " + separator); } // prepare Input int maxTestPrefs = 10; Path preferencesIn = new Path(CONF_INPUT_DIR, "preferences_in.seq"); List<Preference<Long, Long>> testPrefs = null; if (useTestExampleInput) { testPrefs = prepareTestInputData(conf, fs, CONF_INPUT_DIR, preferencesIn); } else if (inputFile.isEmpty()) { testPrefs = generateRandomInputData(conf, fs, CONF_INPUT_DIR, numBspTask, numGpuBspTask, userCount, itemCount, percentNonZeroValues, GPUPercentage, maxTestPrefs); } else if (!inputFile.isEmpty()) { // parse inputFile and return first entries for testing testPrefs = convertInputData(conf, fs, CONF_INPUT_DIR, preferencesIn, inputFile, separator, maxTestPrefs); } // Generate Job config BSPJob job = createOnlineCFTrainHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); // Execute Job long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Load Job results for testing OnlineCF recommender = new OnlineCF(); recommender.load(CONF_OUTPUT_DIR.toString(), false); // Test results int error = 0; double totalError = 0; for (Preference<Long, Long> test : testPrefs) { double expected = test.getValue().get(); double estimated = recommender.estimatePreference(test.getUserId(), test.getItemId()); if (testPrefs.size() <= 20) { LOG.info("(" + test.getUserId() + ", " + test.getItemId() + ", " + expected + "): " + estimated + " error: " + Math.abs(expected - estimated)); } totalError += Math.abs(expected - estimated); error += (Math.abs(expected - estimated) < 0.5) ? 1 : 0; } LOG.info("totalError: " + totalError); LOG.info("assertEquals(expected: " + (testPrefs.size() * 0.75) + " == " + error + " actual) with delta: 1"); if (isDebugging) { printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } } }
From source file:at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java
License:Apache License
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { // Defaults/*from w w w . j ava2 s . c o m*/ int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; boolean isDebugging = false; Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 5) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); isDebugging = Boolean.parseBoolean(args[4]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=debug | Enable debugging (true|false=default)"); return; } } // Set config variables conf.setBoolean("hama.pipes.logging", isDebugging); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCK_SIZE, "" + blockSize); conf.set(CONF_GRID_SIZE, "" + gridSize); conf.set(CONF_TMP_DIR, TMP_DIR.toString()); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("BlockSize: " + conf.get(CONF_BLOCK_SIZE)); LOG.info("GridSize: " + conf.get(CONF_GRID_SIZE)); LOG.info("TempDir: " + conf.get(CONF_TMP_DIR)); LOG.info("isDebugging: " + conf.getBoolean("hama.pipes.logging", false)); BSPJob job = createTestGlobalGpuSyncHybridBSPConf(conf); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); printOutput(job, FileSystem.get(conf), new Path(conf.get(CONF_TMP_DIR))); } }
From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java
License:Apache License
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 1) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); } else {/*from w w w. ja va 2s . co m*/ System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); return; } } else { // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // job.setNumBspTask(cluster.getMaxTasks()); conf.setInt("bsp.peers.num", 2); // (1 CPU and 1 GPU task) } // Enable one GPU task conf.setInt("bsp.peers.gpu.num", 1); conf.setBoolean("hama.pipes.logging", false); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumBspGpuTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("blockSize: " + CONF_BLOCK_SIZE); LOG.info("gridSize: " + CONF_GRID_SIZE); LOG.info("totalInputs: " + CONF_BLOCK_SIZE * CONF_GRID_SIZE); prepareInput(conf, CONF_INPUT_DIR, CONF_BLOCK_SIZE * CONF_GRID_SIZE, 100); BSPJob job = createTestRootbeerHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); printOutput(job, FileOutputFormat.getOutputPath(job)); } }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
@Override public void setup( BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer) throws IOException { Configuration conf = peer.getConfiguration(); m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO/* w ww. ja va 2 s. c o m*/ // task must be 0 otherwise write out does NOT work! m_masterTask = peer.getPeerName(0); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } // Receive transposed Matrix B SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf); IntWritable bKey = new IntWritable(); PipesVectorWritable bVector = new PipesVectorWritable(); // for each col of matrix B (cause by transposed B) while (reader.next(bKey, bVector)) { m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector())); if (m_isDebuggingEnabled) { m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value=" + bVector.getVector().toString() + "\n"); } } reader.close(); }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationBSPGpu.java
License:Apache License
@Override public void setup(BSPPeer<IntWritable, VectorWritable, IntWritable, VectorWritable, NullWritable> peer) throws IOException { Configuration conf = peer.getConfiguration(); m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO/*from w ww. ja v a2 s . c om*/ // task must be 0 otherwise write out does NOT work! m_masterTask = peer.getPeerName(0); this.m_blockSize = Integer.parseInt(peer.getConfiguration().get(CONF_BLOCKSIZE)); this.m_gridSize = Integer.parseInt(peer.getConfiguration().get(CONF_GRIDSIZE)); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } // Load matrixB SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf); List<DoubleVector> matrixB = new ArrayList<DoubleVector>(); IntWritable bKey = new IntWritable(); VectorWritable bVector = new VectorWritable(); // for each row of matrix B while (reader.next(bKey, bVector)) { matrixB.add(bVector.getVector()); if (m_isDebuggingEnabled) { m_logger.writeChars("bsp,setup,MatrixB (" + bKey.get() + "," + bVector.getVector() + ")\n"); } } reader.close(); // Convert matrixB to double array for GPU kernels m_matrixBArr = toArray(matrixB); if (m_isDebuggingEnabled) { for (int i = 0; i < m_matrixBArr.length; i++) { m_logger.writeChars("bsp,setup,MatrixBArr (" + i + "," + Arrays.toString(m_matrixBArr[i]) + ")\n"); } } // threadSliceSize defines how much multipliers // of column B has to be multiplied with column A m_threadSliceSize = divup(m_matrixBArr.length, m_blockSize); // blockSliceSize defines the column slice amount // columns of B per blockIters m_blockSliceSize = divup(m_matrixBArr[0].length, m_gridSize); if (m_isDebuggingEnabled) { m_logger.writeChars("bsp,setup,blockSize=" + m_blockSize + ",gridSize=" + m_gridSize + ",threadSliceSize=" + m_threadSliceSize + ",blockSliceSize=" + m_blockSliceSize + "\n"); } }
From source file:awshamondsidefunctions.AWSDiamondReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String userName = HadoopUser.getHadoopUser(); Configuration conf = context.getConfiguration(); String outPut = conf.get(DiamondMapReduce.OUTPUT); FileSystem fs = FileSystem.get(conf); //merge all output files into one output file on HDFS FileUtil.copyMerge(fs, new Path("/user/" + userName + "/Hamond/output"), fs, new Path("/user/" + userName + "/Hamond/" + new Path(outPut).getName()), false, conf, null); }
From source file:azkaban.jobtype.HadoopJobUtils.java
License:Apache License
/** * <pre>//from w w w .j av a 2s .co m * constructions a javaOpts string based on the Props, and the key given, will return * String.format("-D%s=%s", key, value); * </pre> * * @param conf * @param key * @return will return String.format("-D%s=%s", key, value). Throws RuntimeException if props not * present */ public static String javaOptStringFromHadoopConfiguration(Configuration conf, String key) { String value = conf.get(key); if (value == null) { throw new RuntimeException( String.format("Cannot find property [%s], in Hadoop configuration: [%s]", key, value)); } return String.format("-D%s=%s", key, value); }
From source file:bdss.cmu.edu.Sort.java
License:Apache License
/** * The main driver for sort program./* w w w . j a v a2s .c o m*/ * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = new Job(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static String getJava(Configuration conf) { return conf.get(java); }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static boolean getFilterDBSnp(Configuration conf) { String s = conf.get(filterDBSnp); if (s.equalsIgnoreCase("true")) return true; else// w w w . j a va 2s.c om return false; }