List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:edu.iu.kmeans.regroupallgather.KMeansLauncher.java
License:Apache License
private Job configureKMeansJob(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles, int numMapTasks, int numThreads, int numIterations, Path dataDir, Path cenDir, Path outDir, Configuration configuration) throws IOException, URISyntaxException { Job job = Job.getInstance(configuration, "kmeans_job"); FileInputFormat.setInputPaths(job, dataDir); FileOutputFormat.setOutputPath(job, outDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(KMeansLauncher.class); job.setMapperClass(KMeansCollectiveMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); job.setNumReduceTasks(0);/*from w ww. ja va 2 s . c om*/ Configuration jobConfig = job.getConfiguration(); jobConfig.setInt(Constants.POINTS_PER_FILE, numOfDataPoints / numPointFiles); jobConfig.setInt(Constants.NUM_CENTROIDS, numCentroids); jobConfig.setInt(Constants.VECTOR_SIZE, vectorSize); jobConfig.setInt(Constants.NUM_MAPPERS, numMapTasks); jobConfig.setInt(Constants.NUM_THREADS, numThreads); jobConfig.setInt(Constants.NUM_ITERATIONS, numIterations); jobConfig.set(Constants.CEN_DIR, cenDir.toString()); return job; }
From source file:edu.iu.lda.LDALauncher.java
License:Apache License
private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations, int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.NUM_TOPICS, numTopics); configuration.setDouble(Constants.ALPHA, alpha); configuration.setDouble(Constants.BETA, beta); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.MIN_BOUND, minBound); configuration.setInt(Constants.MAX_BOUND, maxBound); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setBoolean(Constants.PRINT_MODEL, printModel); Job job = Job.getInstance(configuration, "lda_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000/* w ww .j a va2 s . c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, docDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(LDALauncher.class); job.setMapperClass(LDAMPCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.sgd.SGDLauncher.java
License:Apache License
private Job configureSGDJob(Path inputDir, int r, double lambda, double epsilon, int numIterations, int trainRatio, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setDouble(Constants.EPSILON, epsilon); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.TRAIN_RATIO, trainRatio); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "sgd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000/*from w w w. j a v a 2 s .c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(SGDLauncher.class); job.setMapperClass(SGDCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.wdamds.MDSLauncher.java
License:Apache License
private Job prepareWDAMDSJob(int numMapTasks, Path dataDirPath, Path xFilePath, Path xOutFilePath, Path outDirPath, String idsFile, String labelsFile, double threshold, int d, double alpha, int n, int cgIter, int numThreads) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf(), "map-collective-wdamds"); Configuration jobConfig = job.getConfiguration(); FileInputFormat.setInputPaths(job, dataDirPath); FileOutputFormat.setOutputPath(job, outDirPath); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); // Load from HDFS // Now we ignore and don't read x file from // HDFS/* w w w .j a va 2 s . c o m*/ jobConfig.set(MDSConstants.X_FILE_PATH, xFilePath.toString()); // Output to HDFS jobConfig.set(MDSConstants.X_OUT_FILE_PATH, xOutFilePath.toString()); // Load from shared file system jobConfig.set(MDSConstants.IDS_FILE, idsFile); // Load from shared file system jobConfig.set(MDSConstants.LABELS_FILE, labelsFile); jobConfig.setDouble(MDSConstants.THRESHOLD, threshold); jobConfig.setInt(MDSConstants.D, d); jobConfig.setDouble(MDSConstants.ALPHA, alpha); jobConfig.setInt(MDSConstants.N, n); jobConfig.setInt(MDSConstants.CG_ITER, cgIter); jobConfig.setInt(MDSConstants.NUM_THREADS, numThreads); // input class to file-based class job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(MDSLauncher.class); job.setMapperClass(WDAMDSMapper.class); // When use MultiFileInputFormat, remember to // set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0); return job; }