List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:ColumnStorageBasicTest.java
License:Open Source License
public void createAllSingleProject(FileSystem fs) throws Exception { Configuration conf = new Configuration(); conf.setInt("dfs.replication", 1); ;/*from w w w.j a v a 2s .c o m*/ FormatDataFile[] fd = new FormatDataFile[7]; for (int i = 0; i < 7; i++) { fd[i] = new FormatDataFile(conf); } FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); Head head = new Head(); head.setFieldMap(fieldMap); fd[0].create(byteFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); head = new Head(); head.setFieldMap(fieldMap); fd[1].create(shortFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); head = new Head(); head.setFieldMap(fieldMap); fd[2].create(intFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); head = new Head(); head.setFieldMap(fieldMap); fd[3].create(longFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); head = new Head(); head.setFieldMap(fieldMap); fd[4].create(floatFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); head = new Head(); head.setFieldMap(fieldMap); fd[5].create(doubleFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); head = new Head(); head.setFieldMap(fieldMap); fd[6].create(stringFileName, head); long begin = System.currentTimeMillis(); int count = 10; for (int i = 0; i < count; i++) { Record record = new Record(1); record.addValue(new FieldValue((byte) i, (short) 0)); fd[0].addRecord(record); record = new Record(1); record.addValue(new FieldValue((short) i, (short) 1)); fd[1].addRecord(record); record = new Record(1); record.addValue(new FieldValue((int) i, (short) 2)); fd[2].addRecord(record); record = new Record(1); record.addValue(new FieldValue((long) i, (short) 3)); fd[3].addRecord(record); record = new Record(1); record.addValue(new FieldValue((float) i, (short) 4)); fd[4].addRecord(record); record = new Record(1); record.addValue(new FieldValue((double) i, (short) 5)); fd[5].addRecord(record); record = new Record(1); record.addValue(new FieldValue("hello konten" + i, (short) 6)); fd[6].addRecord(record); } for (int i = 0; i < 7; i++) { fd[i].close(); } /* createProjectByte(fs); createProjectShort(fs); createProjectInt(fs); createProjectLong(fs); createProjectFloat(fs); createProjectDouble(fs); createProjectString(fs); */ long end = System.currentTimeMillis(); System.out.println("createAllProject delay:" + (end - begin) / 1000); }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void createProjectByte(FileSystem fs) throws Exception { FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); Head head = new Head(); head.setFieldMap(fieldMap);// ww w . ja v a2 s . c o m Configuration conf = new Configuration(); conf.setInt("dfs.replication", 1); ; FormatDataFile fd = new FormatDataFile(conf); int count = 10; for (int i = 0; i < count; i++) { Record record = new Record(1); record.addValue(new FieldValue((byte) i, (short) 0)); fd.addRecord(record); } fd.close(); }
From source file:ExtractTopPersonalizedPageRankNodes.java
License:Apache License
/** * Runs this tool./*from w ww .j a v a 2s . co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP)); options.addOption(OptionBuilder.withArgName("src").hasArg().withDescription("source node").create(SRC)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(TOP)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = "abc";//cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(TOP)); //LOG.info("Tool name: " + ExtractTopPersonalizedPageRankNodes.class.getSimpleName()); //LOG.info(" - input: " + inputPath); //LOG.info(" - output: " + outputPath); //LOG.info(" - top: " + n); Configuration conf = getConf(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setInt(TOP_PG, n); Job job = Job.getInstance(conf); job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + inputPath); job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(PairOfIntFloat.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(outputPath + "/part-r-00000"); ; //MapFile.Reader reader = new MapFile.Reader(new Path(outputPath+ "/part-r-00000"),conf); // InputStream fis=new FileInputStream(outputPath+"/part-r-00000"); BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(path))); String s; float key;//=new FloatWritable(); int value;//=new IntWritable(); while ((s = br.readLine()) != null) { String[] sources = s.split("\\s+"); key = Float.parseFloat(sources[0]); value = Integer.parseInt(sources[1]); if (key == 0.0f) { System.out.print("\n" + "Source: " + value + "\n"); } else { System.out.print(String.format("%.5f %d", key, value) + "\n"); } } //reader.close(); br.close(); //while(!SysOut.isEmpty()) //{ // System.out.print(SysOut.poll()); //} return 0; }
From source file:BuildPageRankRecords.java
License:Apache License
/** * Runs this tool.// ww w. j a va 2s . c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:andromache.config.CassandraConfigHelper.java
License:Apache License
/** * The number of rows to request with each get range slices request. * Too big and you can either get timeouts when it takes Cassandra too * long to fetch all the data. Too small and the performance * will be eaten up by the overhead of each request. * * @param conf Job configuration you are about to run * @param batchsize Number of rows to request each time */// w w w . j ava2s . co m public static void setRangeBatchSize(Configuration conf, int batchsize) { conf.setInt(RANGE_BATCH_SIZE_CONFIG, batchsize); }
From source file:andromache.config.CassandraConfigHelper.java
License:Apache License
/** * Set the size of the input split./* w ww.j ava 2 s . c o m*/ * This affects the number of maps created, if the number is too small * the overhead of each map will take up the bulk of the job time. * * @param conf Job configuration you are about to run * @param splitsize Size of the input split */ public static void setInputSplitSize(Configuration conf, int splitsize) { conf.setInt(INPUT_SPLIT_SIZE_CONFIG, splitsize); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 1) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); } else {/*from w w w . j a v a 2 s . co m*/ System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); return; } } else { // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // job.setNumBspTask(cluster.getMaxTasks()); conf.setInt("bsp.peers.num", 2); // 1 CPU and 1 GPU } // Enable one GPU task conf.setInt("bsp.peers.gpu.num", 1); conf.setBoolean("hama.pipes.logging", true); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumBspGpuTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); Path example = new Path(CONF_INPUT_DIR.getParent(), "example.seq"); conf.set(CONF_EXAMPLE_PATH, example.toString()); LOG.info("exampleFile: " + example.toString()); prepareInput(conf, CONF_INPUT_DIR, example, CONF_N); BSPJob job = createHelloHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Print input files // printOutput(job, CONF_INPUT_DIR); // printOutput(job, example); // Print output printOutput(job, FileOutputFormat.getOutputPath(job)); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//from www . j a v a 2 s. c om int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//from w ww . j a va 2 s. com int numRowsA = 4;// 1024; int numColsA = 4;// 1024; int numRowsB = 4;// 1024; int numColsB = 4;// 1024; boolean isDebugging = true; Configuration conf = new HamaConfiguration(); BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); if (args.length > 0) { if (args.length == 6) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); numRowsA = Integer.parseInt(args[1]); numColsA = Integer.parseInt(args[2]); numRowsB = Integer.parseInt(args[3]); numColsB = Integer.parseInt(args[4]); isDebugging = Boolean.parseBoolean(args[5]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numRowsA | Number of rows of the first input matrix"); System.out.println(" Argument3=numColsA | Number of columns of the first input matrix"); System.out.println(" Argument4=numRowsB | Number of rows of the second input matrix"); System.out.println(" Argument5=numColsB | Number of columns of the second input matrix"); System.out.println(" Argument6=debug | Enable debugging (true|false)"); return; } } else { conf.setInt("bsp.peers.num", 1); // cluster.getMaxTasks()); // Enable one GPU task conf.setInt("bsp.peers.gpu.num", 1); } conf.setBoolean("hama.pipes.logging", isDebugging); conf.setBoolean(CONF_DEBUG, isDebugging); conf.set(CONF_BLOCKSIZE, "" + BLOCK_SIZE); conf.set(CONF_GRIDSIZE, "" + GRID_SIZE); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")"); } // Create random DistributedRowMatrix // use constant seeds to get reproducible results // Matrix A DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); // Matrix B is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, true); // Load DistributedRowMatrix a and b DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiplication long startTime = System.currentTimeMillis(); DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH); LOG.info("MatrixMultiplicationHybrid using Hama finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix B, NOT transposed for verification DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults/* ww w . ja va2 s. com*/ int numBspTask = 1; int numGpuBspTask = 1; int numRowsA = 4;// 1024; int numColsA = 4;// 1024; int numRowsB = 4;// 1024; int numColsB = 4;// 1024; int tileWidth = 32; // 2 * 32 = 1024 threads matches the blocksize int GPUPercentage = 100; boolean isDebugging = true; Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 9) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); numRowsA = Integer.parseInt(args[2]); numColsA = Integer.parseInt(args[3]); numRowsB = Integer.parseInt(args[4]); numColsB = Integer.parseInt(args[5]); tileWidth = Integer.parseInt(args[6]); GPUPercentage = Integer.parseInt(args[7]); isDebugging = Boolean.parseBoolean(args[8]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=numRowsA | Number of rows of the first input matrix"); System.out.println(" Argument4=numColsA | Number of columns of the first input matrix"); System.out.println(" Argument5=numRowsB | Number of rows of the second input matrix"); System.out.println(" Argument6=numColsB | Number of columns of the second input matrix"); System.out.println(" Argument7=tileWidth | TileWidth denotes the size of a submatrix"); System.out.println(" Argument8=GPUPercentage (percentage of input)"); System.out.println(" Argument9=debug | Enable debugging (true|false)"); return; } } // Set config variables conf.setBoolean("hama.pipes.logging", false); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU workload // conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); // LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); if (numColsA != numRowsB) { throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")"); } // Create random DistributedRowMatrix // use constant seeds to get reproducible results // Matrix A DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_SPLITS_PATH, false, numBspTask, numGpuBspTask, GPUPercentage); // Matrix B is stored in transposed order List<Path> transposedMatrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_TRANSPOSED_PATH, true); // Execute MatrixMultiplication BSP Job long startTime = System.currentTimeMillis(); BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(conf, MATRIX_A_SPLITS_PATH, transposedMatrixBPaths.get(0), MATRIX_C_PATH, tileWidth, isDebugging); // Multiply Matrix DistributedRowMatrix matrixC = null; if (job.waitForCompletion(true)) { // Rename result file to output path Path matrixCOutPath = new Path(MATRIX_C_PATH + "/part0.seq"); FileSystem fs = MATRIX_C_PATH.getFileSystem(conf); FileStatus[] files = fs.listStatus(MATRIX_C_PATH); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), matrixCOutPath); break; } } // Read resulting Matrix from HDFS matrixC = new DistributedRowMatrix(matrixCOutPath, MATRIX_C_PATH, numRowsA, numColsB); matrixC.setConf(conf); } LOG.info("MatrixMultiplicationHybrid using Hama finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Create matrix A in one file for verification List<Path> matrixAPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); DistributedRowMatrix matrixA = new DistributedRowMatrix(matrixAPaths.get(0), CONF_INPUT_DIR, numRowsA, numColsA); matrixA.setConf(conf); // Create matrix B, NOT transposed for verification List<Path> matrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); DistributedRowMatrix matrixB = new DistributedRowMatrix(matrixBPaths.get(0), CONF_INPUT_DIR, numRowsB, numColsB); matrixB.setConf(conf); // Verification DistributedRowMatrix matrixD = matrixA.multiplyJava(matrixB, MATRIX_D_PATH); if (matrixC.verify(matrixD)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("\nMatrix A:"); matrixA.printDistributedRowMatrix(); System.out.println("\nMatrix B:"); matrixB.printDistributedRowMatrix(); System.out.println("\nTransposedMatrix B:"); // Load DistributedRowMatrix transposedMatrixB DistributedRowMatrix transposedMatrixB = new DistributedRowMatrix(transposedMatrixBPaths.get(0), CONF_INPUT_DIR, numColsB, numRowsB); transposedMatrixB.setConf(conf); transposedMatrixB.printDistributedRowMatrix(); System.out.println("\nMatrix C:"); matrixC.printDistributedRowMatrix(); System.out.println("\nMatrix D:"); matrixD.printDistributedRowMatrix(); // Print out log files printOutput(conf); } }