List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:edu.iu.kmeans.regroupallgather.KMeansLauncher.java
License:Apache License
private Job configureKMeansJob(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks, int numThreads, int numIterations, Path dataDir, Path cenDir, Path outDir, Configuration configuration) throws IOException, URISyntaxException { Job job = Job.getInstance(configuration, "kmeans_job"); FileInputFormat.setInputPaths(job, dataDir); FileOutputFormat.setOutputPath(job, outDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(KMeansLauncher.class); job.setMapperClass(KMeansCollectiveMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); job.setNumReduceTasks(0);/*from w ww . ja va 2 s.c o m*/ Configuration jobConfig = job.getConfiguration(); jobConfig.setInt(Constants.POINTS_PER_FILE, numOfDataPoints / numPointFiles); jobConfig.setInt(Constants.NUM_CENTROIDS, numCentroids); jobConfig.setInt(Constants.VECTOR_SIZE, vectorSize); jobConfig.setInt(Constants.NUM_MAPPERS, numMapTasks); jobConfig.setInt(Constants.NUM_THREADS, numThreads); jobConfig.setInt(Constants.NUM_ITERATIONS, numIterations); jobConfig.set(Constants.CEN_DIR, cenDir.toString()); return job; }
From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java
License:Apache License
/** * Launches all the tasks in order./*from ww w.j a v a 2 s . co m*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); // load args init.loadSysArgs(); init.loadDistributedLibs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2])); conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3])); conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4])); conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5])); conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6])); // config job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class); // initialize centroids data JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration(); FileSystem fs = FileSystem.get(conf); int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]); int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]); Path workPath = init.getWorkPath(); Path cenDir = new Path(workPath, "centroids"); fs.mkdirs(cenDir); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } Path initCenDir = new Path(cenDir, "init_centroids"); DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs); thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString()); thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids"); //generate Data if required boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]); if (generateData) { Path inputPath = init.getInputPath(); int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]); int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]); String tmpDirPathName = args[init.getSysArgNum() + 10]; DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath, tmpDirPathName, fs); } // finish job boolean jobSuccess = kmeansJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { kmeansJob.killJob(); System.out.println("kmeansJob failed"); } return 0; }
From source file:edu.iu.lda.LDALauncher.java
License:Apache License
private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations, int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.NUM_TOPICS, numTopics); configuration.setDouble(Constants.ALPHA, alpha); configuration.setDouble(Constants.BETA, beta); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.MIN_BOUND, minBound); configuration.setInt(Constants.MAX_BOUND, maxBound); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setBoolean(Constants.PRINT_MODEL, printModel); Job job = Job.getInstance(configuration, "lda_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000/* ww w . j a va 2 s . c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, docDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(LDALauncher.class); job.setMapperClass(LDAMPCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.mds.MDSMapReduce.java
License:Apache License
private Job prepareBCCalcJob(String inputDir, String xFile, String outputDirPath, int iterationCount, int jobCount, Configuration configuration, int numPoints, int xWidth, int numMapTasks, int partitionPerWorker) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(configuration, "map-collective-mds-bc" + jobCount); Configuration jobConfig = job.getConfiguration(); Path outputDir = new Path(outputDirPath); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); jobConfig.setInt(MDSConstants.ITERATION, iterationCount); jobConfig.setInt(MDSConstants.NUMPOINTS, numPoints); jobConfig.setInt(MDSConstants.XWIDTH, xWidth); jobConfig.set(MDSConstants.XFILE, xFile); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); jobConfig.setInt(MDSConstants.PARTITION_PER_WORKER, partitionPerWorker); // input class to file-based class job.setInputFormatClass(MultiFileInputFormat.class); // job.setInputFormatClass(DataFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(MDSMapReduce.class); job.setMapperClass(MDSAllgatherMultiThreadMapper.class); // When use MultiFileInputFormat, remember to set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0);/*from w w w. j av a 2s.c o m*/ return job; }
From source file:edu.iu.pagerank.PRJobLauncher.java
License:Apache License
private Job prepareJob(String inputDir, int totalVtx, int iterationCount, int jobCount, int numMapTasks, int partiitonPerWorker, String outputDirPath, Configuration configuration) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(configuration, "harp-pagerank-" + jobCount); Configuration jobConfig = job.getConfiguration(); Path outputDir = new Path(outputDirPath); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); jobConfig.setInt(PRConstants.ITERATION, iterationCount); jobConfig.setInt(PRConstants.TOTAL_VTX, totalVtx); jobConfig.setInt(PRConstants.NUM_MAPS, numMapTasks); jobConfig.setInt(PRConstants.PARTITION_PER_WORKER, partiitonPerWorker); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(PRJobLauncher.class); job.setMapperClass(PRMultiThreadMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0);/* w ww . jav a 2 s .co m*/ return job; }
From source file:edu.iu.sgd.SGDLauncher.java
License:Apache License
private Job configureSGDJob(Path inputDir, int r, double lambda, double epsilon, int numIterations, int trainRatio, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setDouble(Constants.EPSILON, epsilon); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.TRAIN_RATIO, trainRatio); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "sgd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000/*from w w w .j a va2 s. c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(SGDLauncher.class); job.setMapperClass(SGDCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.wdamds.MDSLauncher.java
License:Apache License
private Job prepareWDAMDSJob(int numMapTasks, Path dataDirPath, Path xFilePath, Path xOutFilePath, Path outDirPath, String idsFile, String labelsFile, double threshold, int d, double alpha, int n, int cgIter, int numThreads) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf(), "map-collective-wdamds"); Configuration jobConfig = job.getConfiguration(); FileInputFormat.setInputPaths(job, dataDirPath); FileOutputFormat.setOutputPath(job, outDirPath); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); // Load from HDFS // Now we ignore and don't read x file from // HDFS//from w ww . j a v a 2 s. c o m jobConfig.set(MDSConstants.X_FILE_PATH, xFilePath.toString()); // Output to HDFS jobConfig.set(MDSConstants.X_OUT_FILE_PATH, xOutFilePath.toString()); // Load from shared file system jobConfig.set(MDSConstants.IDS_FILE, idsFile); // Load from shared file system jobConfig.set(MDSConstants.LABELS_FILE, labelsFile); jobConfig.setDouble(MDSConstants.THRESHOLD, threshold); jobConfig.setInt(MDSConstants.D, d); jobConfig.setDouble(MDSConstants.ALPHA, alpha); jobConfig.setInt(MDSConstants.N, n); jobConfig.setInt(MDSConstants.CG_ITER, cgIter); jobConfig.setInt(MDSConstants.NUM_THREADS, numThreads); // input class to file-based class job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(MDSLauncher.class); job.setMapperClass(WDAMDSMapper.class); // When use MultiFileInputFormat, remember to // set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0); return job; }
From source file:edu.rosehulman.CollocDriver.java
License:Apache License
/** * pass1: generate collocations, ngrams/*from ww w. j a v a 2 s . c o m*/ */ @SuppressWarnings("deprecation") private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams, int maxNGramSize, int reduceTasks, int minSupport) throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize); con.setInt(CollocReducer.MIN_SUPPORT, minSupport); Job job = new Job(con); job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(GramKey.class); job.setMapOutputValueClass(Gram.class); job.setPartitionerClass(GramKeyPartitioner.class); job.setGroupingComparatorClass(GramKeyGroupComparator.class); job.setOutputKeyClass(Gram.class); job.setOutputValueClass(Gram.class); job.setCombinerClass(CollocCombiner.class); FileInputFormat.setInputPaths(job, input); Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CollocMapper.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setReducerClass(CollocReducer.class); job.setNumReduceTasks(reduceTasks); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue(); }
From source file:edu.udel.mxv.Mxv.java
@Override public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println(USAGE); System.exit(1);/* ww w .java 2 s .com*/ } int n = Integer.parseInt(args[0]); String input_matrix = args[1]; String input_vector = args[2]; String output = args[3]; Configuration conf = getConf(); conf.set("vector.path", input_vector); conf.setInt("vector.n", n); Job job = new Job(conf); job.setJobName("mxv"); job.setJarByClass(getClass()); // mapper job.setMapperClass(MxvMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); // reducer job.setReducerClass(MxvRed.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); // job.setNumReduceTasks(num_red); FileInputFormat.addInputPath(job, new Path(input_matrix)); FileOutputFormat.setOutputPath(job, new Path(output)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:edu.umd.cloud9.webgraph.driver.ClueWebDriver.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 6) { printUsage();/* ww w. j a va 2 s . c o m*/ return -1; } Configuration conf = getConf(); String inputArg = DriverUtil.argValue(args, DriverUtil.CL_INPUT); final String inputBase = inputArg.endsWith("/") ? inputArg : inputArg + "/"; String outputArg = DriverUtil.argValue(args, DriverUtil.CL_OUTPUT); final String outputBase = outputArg.endsWith("/") ? outputArg : outputArg + "/"; final String docnoMapping = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING); final int fromSegment = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_BEGIN_SEGMENT)); final int toSegment = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_END_SEGMENT)); final boolean includeInternalLinks = DriverUtil.argExists(args, DriverUtil.CL_INCLUDE_INTERNAL_LINKS); final boolean computeAnchorWeights = DriverUtil.argExists(args, DriverUtil.CL_COMPUTE_WEIGHTS); final String normalizer = DriverUtil.argValue(args, DriverUtil.CL_NORMALIZER); conf.setInt("Cloud9.Mappers", 2000); conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS); conf.set("Cloud9.DocnoMappingFile", docnoMapping); conf.setBoolean("Cloud9.IncludeInternalLinks", includeInternalLinks); conf.set("Cloud9.AnchorTextNormalizer", normalizer); // Extract link information for each segment separately for (int i = fromSegment; i <= toSegment; i++) { String inputPath = inputBase + "en." + (i == 10 ? "10" : ("0" + i)); String outputPath = outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en." + (i == 10 ? "10" : ("0" + i)); conf.set("Cloud9.InputPath", inputPath); conf.set("Cloud9.OutputPath", outputPath); int r = new ClueExtractLinks(conf).run(); if (r != 0) { return -1; } } // Construct the reverse web graph (i.e., collect incoming link // information) String inputPath = ""; for (int i = fromSegment; i < toSegment; i++) { inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.0" + i + "/,"; } if (toSegment == 10) { inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.10/"; } else { inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.0" + toSegment + "/"; } String outputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/"; conf.set("Cloud9.InputPath", inputPath); conf.set("Cloud9.OutputPath", outputPath); conf.setInt("Cloud9.Mappers", 1); conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1)); int r = new BuildReverseWebGraph(conf).run(); if (r != 0) { return -1; } // Construct the web graph inputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/"; outputPath = outputBase + DriverUtil.OUTPUT_WEBGRAPH + "/"; conf.set("Cloud9.InputPath", inputPath); conf.set("Cloud9.OutputPath", outputPath); conf.setInt("Cloud9.Mappers", 1); conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1)); r = new BuildWebGraph(conf).run(); if (r != 0) { return -1; } if (computeAnchorWeights) { // Propagating domain names in order to compute anchor weights inputPath = outputBase + DriverUtil.OUTPUT_WEBGRAPH + "/"; outputPath = outputBase + DriverUtil.OUTPUT_HOST_NAMES + "/"; conf.set("Cloud9.InputPath", inputPath); conf.set("Cloud9.OutputPath", outputPath); conf.setInt("Cloud9.Mappers", 1); conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1)); r = new CollectHostnames(conf).run(); if (r != 0) { return -1; } // Compute the weights inputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/," + outputBase + DriverUtil.OUTPUT_HOST_NAMES + "/"; outputPath = outputBase + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/"; conf.set("Cloud9.InputPath", inputPath); conf.set("Cloud9.OutputPath", outputPath); conf.setInt("Cloud9.Mappers", 1); conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1)); r = new ComputeWeight(conf).run(); if (r != 0) { return -1; } } return 0; }