List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:org.unigram.likelike.lsh.LSHRecommendations.java
License:Apache License
/** * Run.//w w w. ja va 2 s. co m * @param args arguments * @param conf configuration * @return 0 when succeeded. * @throws Exception - */ public int run(final String[] args, final Configuration conf) throws Exception { String inputFile = ""; String outputPrefix = ""; String clusterDir = ""; int iterate = 1; int depth = 0; int rowSize = 0; for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { inputFile = args[++i]; clusterDir = inputFile + ".clusters"; } else if ("-output".equals(args[i])) { outputPrefix = args[++i]; } else if ("-depth".equals(args[i])) { conf.setInt(LikelikeConstants.FEATURE_DEPTH, Integer.parseInt(args[++i])); } else if ("-iterate".equals(args[i])) { iterate = Integer.parseInt(args[++i]); } else if ("-maxCluster".equals(args[i])) { conf.setLong(LikelikeConstants.MAX_CLUSTER_SIZE, Long.parseLong(args[++i])); } else if ("-minCluster".equals(args[i])) { conf.setLong(LikelikeConstants.MIN_CLUSTER_SIZE, Long.parseLong(args[++i])); } else if ("-maxRecommend".equals(args[i])) { conf.setLong(LikelikeConstants.MAX_OUTPUT_SIZE, Long.parseLong(args[++i])); } else if ("-storage".equals(args[i])) { String storageType = args[++i]; if (storageType.equals("dfs")) { conf.set(LikelikeConstants.LIKELIKE_OUTPUT_WRITER, LikelikeConstants.DEFAULT_LIKELIKE_OUTPUT_WRITER); } else if (storageType.equals("cassandra")) { conf.set(LikelikeConstants.LIKELIKE_OUTPUT_WRITER, this.cassandraAccessor); } } else if ("-help".equals(args[i])) { this.showParameters(); return 0; } } this.setHashKeys(iterate, inputFile, conf); this.extractClusters(inputFile, clusterDir, conf); this.getRecommendations(clusterDir, outputPrefix, conf, FileSystem.get(conf)); FsUtil.clean(FileSystem.get(conf), clusterDir); return 0; }
From source file:org.unigram.likelike.lsh.LSHRecommendations.java
License:Apache License
/** * Add the configuration information from the result of * extract candidates to conf.//from w w w . jav a 2s . c o m * * @param counters contains counter * @param conf configuration */ protected void setResultConf(final Counters counters, final Configuration conf) { conf.setLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS, counters.findCounter(LikelikeConstants.COUNTER_GROUP, "MAP_INPUT_RECORDS").getValue()); this.logger .logInfo("The number of record is " + conf.getLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS, -1)); }
From source file:org.voltdb.hadoop.VoltConfiguration.java
License:Open Source License
/** * Sets the job configuration properties that correspond to the given parameters * * @param conf a {@linkplain Configuration} * @param hostNames an array of host names * @param userName The user name for client connection * @param password The password for client connection * @param tableName destination table name * @param batchSize The batch size for CSVBulkLoader * @param clientTimeOut The client timeout in milliseconds * @param maxErrors The maximal number of errors before CSVBulkLoader stops processing input * @param upsert Upsert the data via bulkloader. *///from w w w.jav a 2 s .c o m public static void configureVoltDB(Configuration conf, String[] hostNames, String userName, String password, String tableName, int batchSize, long clientTimeOut, int maxErrors, boolean upsert) { configureVoltDB(conf, hostNames, userName, password, tableName); if (clientTimeOut > 0) conf.setLong(CLIENT_TIMEOUT_PROP, clientTimeOut); if (batchSize > 0) conf.setInt(BATCHSIZE_PROP, batchSize); if (maxErrors > 0) conf.setInt(BULKLOADER_MAX_ERRORS_PROP, maxErrors); conf.setBoolean(BULKLOADER_UPSERT_PROP, upsert); }
From source file:root.input.util.CreateSimilarityMatrixJob.java
License:Apache License
/** * {@inheritDoc}/*from ww w . j a va 2s .co m*/ */ @Override public int run(String[] args) throws Exception { constructParameterList(); if (parseArguments(args) == null) { return -1; } initializeConfigurationParameters(); printJobHeader(); Configuration conf = getConf(); DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class); conf.setInt("numLevels", Integer.valueOf(numLevels)); conf.setLong("diagScale", Long.valueOf(diagScale)); conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName()); conf.set(SEEDS_PATH_KEY, inputDirectory.toString()); Job job = new Job(conf, "CreateSimilarityMatrix: " + inputDirectory); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(SimilarityMatrixMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputDirectory)); FileOutputFormat.setOutputPath(job, new Path(outputDirectory)); job.setJarByClass(CreateSimilarityMatrixJob.class); if (!job.waitForCompletion(true)) { throw new IllegalStateException("CreateSimilarityMatrix failed processing " + inputDirectory); } return 0; }
From source file:simsql.runtime.AggregateOp.java
License:Apache License
public void setConfigurations(Configuration conf, RuntimeParameter params) { ExampleRuntimeParameter p = (ExampleRuntimeParameter) params; conf.setLong("simsql.preAggregationBufferSize", (p.getMemoryPerCPUInMB() / 8) * 1024 * 1024); }
From source file:simsql.runtime.JoinOp.java
License:Apache License
public void setConfigurations(Configuration conf, RuntimeParameter params) { // first, send out the type of join conf.setStrings("simsql.joinType", new String[] { joinType.toString().toLowerCase() }); // set the self-join value conf.setBoolean("simsql.isSelfJoin", isSelfJoin); // see if we have a Cartesian product conf.setBoolean("simsql.joinCartesian", isCartesian); // see if we have a pure, map-only merge join conf.setBoolean("simsql.isMergeJoin", mergeJoin); // if we are able to avoid a sort of the left or of the right, then we need some extra configs that will allow the merge if (mergeJoin || sortOnlyRight) { conf.setInt("simsql.sortedFileTypeCode", getDB().getTypeCode(getDB().getTableName(leftFile))); conf.set("simsql.sortedFileName", leftFile); conf.setInt("simsql.sortedFileNumAtts", getDB().getNumAtts(getDB().getTableName(leftFile))); } else if (sortOnlyLeft) { conf.setInt("simsql.sortedFileTypeCode", getDB().getTypeCode(getDB().getTableName(rightFile))); conf.set("simsql.sortedFileName", rightFile); conf.setInt("simsql.sortedFileNumAtts", getDB().getNumAtts(getDB().getTableName(rightFile))); }/*from ww w.j a va 2 s . c o m*/ // find out which relation is the largest. long leftSize = getPathsActualSize(getValue("leftInput.inFiles").getStringList().toArray(new String[0])); long rightSize = getPathsActualSize(getValue("rightInput.inFiles").getStringList().toArray(new String[0])); long smallerSize = 0; long largerSize = 0; int smallerTypeCode = -1; int largerTypeCode = -1; if (leftSize < rightSize) { smallerSize = leftSize; largerSize = rightSize; smallerTypeCode = leftTypeCode; largerTypeCode = rightTypeCode; } else { smallerSize = rightSize; largerSize = leftSize; smallerTypeCode = rightTypeCode; largerTypeCode = leftTypeCode; } // and pass the typecode and size of those relations. conf.setInt("simsql.smallerRelation.typeCode", smallerTypeCode); conf.setInt("simsql.largerRelation.typeCode", largerTypeCode); conf.setLong("simsql.smallerRelation.size", smallerSize); conf.setLong("simsql.largerRelation.size", largerSize); }
From source file:simsql.runtime.SelectionOp.java
License:Apache License
public void setConfigurations(Configuration conf, RuntimeParameter params) { // set for removing duplicates conf.setBoolean("simsql.removeDuplicates", removeDuplicates); // set for reducer thing. conf.setBoolean("simsql.runSelectionReducer", runSelectionReducer); // set the pre-dedup buffer size: 1/8 of available memory. ExampleRuntimeParameter p = (ExampleRuntimeParameter) params; conf.setLong("simsql.duplicatesBufferSize", (p.getMemoryPerCPUInMB() / 8) * 1024 * 1024); }
From source file:smile.wide.algorithms.SMILEBSjob.java
License:Apache License
/** Sets up the hadoop job and sends it to the cluster * waits for the job to be completed.*/ @Override/* ww w. j a v a2 s .co m*/ public int run(String[] params) throws Exception { //params: <trainfile> <output_path> <number of seeds> Configuration conf = super.getConf(); conf.set("trainfile", params[0]); //distributed cache initialization DistributedCache.createSymlink(conf); DistributedCache.addFileToClassPath(new Path(libHDFSPath_ + "/smile.jar"), conf); DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf); //upload data file to HDFS and add it to the distributed cache FileSystem dfs = FileSystem.get(conf); dfs.copyFromLocalFile(new Path(params[0]), new Path(dataHDFSPath_)); DistributedCache.addCacheFile(new URI(dataHDFSPath_ + basename(params[0]) + "#" + basename(params[0])), conf); //for now, keep the Bayesian search parameters constant conf.setInt("iterationCount", iterationCount); conf.setFloat("linkProbability", linkProbability); conf.setInt("maxParents", maxParents); conf.setInt("maxSearchTime", maxSearchTime); conf.setFloat("priorLinkProbability", priorLinkProbability); conf.setInt("priorSampleSize", priorSampleSize); // conf.setInt(RandSeedInputFormat.CONFKEY_SEED_COUNT, Integer.parseInt(params[2])); conf.setInt(RandSeedInputFormat.CONFKEY_WARMUP_ITER, 100000); conf.setLong("mapred.task.timeout", 3600000); Job job = new Job(conf); job.setJobName("Distributed Bayesian Search"); job.setJarByClass(SMILEBSjob.class); job.setMapperClass(SMILEBSMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StrucLogLikeWritable.class); job.setReducerClass(SMILEBSReducer.class); job.setNumReduceTasks(1); job.setInputFormatClass(RandSeedInputFormat.class); Path outputPath = new Path(params[1]); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath, true); //Run the job job.waitForCompletion(true); //now download result outputPath.suffix("/part-r-00000"); dfs.copyToLocalFile(outputPath.suffix("/part-r-00000"), new Path("./smile-output.txt")); return 0; }
From source file:tv.icntv.grade.film.dbcollect.TableConcurrencyJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); String[] tables = configuration.get("hbase.cdn.tables").split(","); JobControl jobControl = new JobControl("data init"); for (String table : tables) { ////from ww w .j a v a2 s . c o m String hbaseDbDirectory = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); HadoopUtils.deleteIfExist(hbaseDbDirectory); Job tableJob = new Job(configuration, "icntv grade init " + table); TableMapReduceUtil.initTableMapperJob(table, new Scan(), TableInitMapper.class, Text.class, Text.class, tableJob); MapReduceUtils.initReducerJob(new Path(hbaseDbDirectory), TableInitReducer.class, tableJob); // controlled job ControlledJob tableControlledJob = new ControlledJob(configuration); tableControlledJob.setJob(tableJob); String dbDirectory = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); HadoopUtils.deleteIfExist(dbDirectory); Configuration conf = getConf(); Job db = new Job(conf, "icntv db collect " + table); conf.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(strings[1])); FileOutputFormat.setOutputPath(db, new Path(dbDirectory)); db.setNumReduceTasks(0); ControlledJob dbControlledJob = new ControlledJob(conf); dbControlledJob.setJob(db); dbControlledJob.addDependingJob(tableControlledJob); //controlledJob. jobControl.addJob(tableControlledJob); jobControl.addJob(dbControlledJob); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } return 0; }
From source file:tv.icntv.grade.film.dbcollect.TableInitJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); JobControl jobControl = new JobControl("init data"); for (String table : strings) { String dbPath = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); // String[] arrays = new String[]{table,//input table // String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table), // db // }; String hbasePath = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); //table job Job tableJob = new Job(configuration, "icntv grade init"); Scan scan = new Scan(); HadoopUtils.deleteIfExist(hbasePath); HadoopUtils.deleteIfExist(dbPath); TableMapReduceUtil.initTableMapperJob(table, scan, TableInitMapper.class, Text.class, Text.class, tableJob);//w w w . j a v a2 s .c om MapReduceUtils.initReducerJob(new Path(hbasePath), TableInitReducer.class, tableJob); ControlledJob firstControll = new ControlledJob(configuration); firstControll.setJob(tableJob); // tableJob.waitForCompletion(true); Job db = new Job(configuration, "icntv db collect"); configuration.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(hbasePath)); FileOutputFormat.setOutputPath(db, new Path(dbPath)); db.setNumReduceTasks(0); ControlledJob secondaryController = new ControlledJob(configuration); secondaryController.setJob(db); secondaryController.addDependingJob(firstControll); jobControl.addJob(firstControll); jobControl.addJob(secondaryController); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } logger.info("job controller successed job size=" + jobControl.getSuccessfulJobList().size()); // db.waitForCompletion(true); return 0; }