Example usage for org.apache.hadoop.conf Configuration setLong

List of usage examples for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value) 

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:org.unigram.likelike.lsh.LSHRecommendations.java

License:Apache License

/**
 * Run.//w  w  w.  ja va  2 s. co  m
 * @param args arguments
 * @param conf configuration
 * @return 0 when succeeded.
 * @throws Exception -
 */
public int run(final String[] args, final Configuration conf) throws Exception {

    String inputFile = "";
    String outputPrefix = "";
    String clusterDir = "";
    int iterate = 1;
    int depth = 0;
    int rowSize = 0;

    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            inputFile = args[++i];
            clusterDir = inputFile + ".clusters";
        } else if ("-output".equals(args[i])) {
            outputPrefix = args[++i];
        } else if ("-depth".equals(args[i])) {
            conf.setInt(LikelikeConstants.FEATURE_DEPTH, Integer.parseInt(args[++i]));
        } else if ("-iterate".equals(args[i])) {
            iterate = Integer.parseInt(args[++i]);
        } else if ("-maxCluster".equals(args[i])) {
            conf.setLong(LikelikeConstants.MAX_CLUSTER_SIZE, Long.parseLong(args[++i]));
        } else if ("-minCluster".equals(args[i])) {
            conf.setLong(LikelikeConstants.MIN_CLUSTER_SIZE, Long.parseLong(args[++i]));
        } else if ("-maxRecommend".equals(args[i])) {
            conf.setLong(LikelikeConstants.MAX_OUTPUT_SIZE, Long.parseLong(args[++i]));
        } else if ("-storage".equals(args[i])) {
            String storageType = args[++i];
            if (storageType.equals("dfs")) {
                conf.set(LikelikeConstants.LIKELIKE_OUTPUT_WRITER,
                        LikelikeConstants.DEFAULT_LIKELIKE_OUTPUT_WRITER);
            } else if (storageType.equals("cassandra")) {
                conf.set(LikelikeConstants.LIKELIKE_OUTPUT_WRITER, this.cassandraAccessor);
            }
        } else if ("-help".equals(args[i])) {
            this.showParameters();
            return 0;
        }
    }

    this.setHashKeys(iterate, inputFile, conf);
    this.extractClusters(inputFile, clusterDir, conf);
    this.getRecommendations(clusterDir, outputPrefix, conf, FileSystem.get(conf));

    FsUtil.clean(FileSystem.get(conf), clusterDir);
    return 0;
}

From source file:org.unigram.likelike.lsh.LSHRecommendations.java

License:Apache License

/**
 * Add the configuration information from the result of 
 * extract candidates to conf.//from  w w  w . jav  a 2s  . c  o  m
 * 
 * @param counters contains counter
 * @param conf configuration
 */
protected void setResultConf(final Counters counters, final Configuration conf) {
    conf.setLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS,
            counters.findCounter(LikelikeConstants.COUNTER_GROUP, "MAP_INPUT_RECORDS").getValue());
    this.logger
            .logInfo("The number of record is " + conf.getLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS, -1));
}

From source file:org.voltdb.hadoop.VoltConfiguration.java

License:Open Source License

/**
 * Sets the job configuration properties that correspond to the given parameters
 *
 * @param conf a {@linkplain Configuration}
 * @param hostNames an array of host names
 * @param userName The user name for client connection
 * @param password The password  for client connection
 * @param tableName destination table name
 * @param batchSize The batch size for CSVBulkLoader
 * @param clientTimeOut The client timeout in milliseconds
 * @param maxErrors The maximal number of errors before CSVBulkLoader stops processing input
 * @param upsert Upsert the data via bulkloader.
 *///from w  w  w.jav  a 2 s  .c  o m
public static void configureVoltDB(Configuration conf, String[] hostNames, String userName, String password,
        String tableName, int batchSize, long clientTimeOut, int maxErrors, boolean upsert) {

    configureVoltDB(conf, hostNames, userName, password, tableName);

    if (clientTimeOut > 0)
        conf.setLong(CLIENT_TIMEOUT_PROP, clientTimeOut);
    if (batchSize > 0)
        conf.setInt(BATCHSIZE_PROP, batchSize);
    if (maxErrors > 0)
        conf.setInt(BULKLOADER_MAX_ERRORS_PROP, maxErrors);
    conf.setBoolean(BULKLOADER_UPSERT_PROP, upsert);
}

From source file:root.input.util.CreateSimilarityMatrixJob.java

License:Apache License

/**
 * {@inheritDoc}/*from ww  w . j a va  2s  .co m*/
 */
@Override
public int run(String[] args) throws Exception {

    constructParameterList();

    if (parseArguments(args) == null) {
        return -1;
    }

    initializeConfigurationParameters();

    printJobHeader();

    Configuration conf = getConf();

    DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);

    conf.setInt("numLevels", Integer.valueOf(numLevels));
    conf.setLong("diagScale", Long.valueOf(diagScale));
    conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(SEEDS_PATH_KEY, inputDirectory.toString());

    Job job = new Job(conf, "CreateSimilarityMatrix: " + inputDirectory);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setMapperClass(SimilarityMatrixMapper.class);

    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(inputDirectory));
    FileOutputFormat.setOutputPath(job, new Path(outputDirectory));

    job.setJarByClass(CreateSimilarityMatrixJob.class);

    if (!job.waitForCompletion(true)) {
        throw new IllegalStateException("CreateSimilarityMatrix failed processing " + inputDirectory);
    }

    return 0;

}

From source file:simsql.runtime.AggregateOp.java

License:Apache License

public void setConfigurations(Configuration conf, RuntimeParameter params) {

    ExampleRuntimeParameter p = (ExampleRuntimeParameter) params;
    conf.setLong("simsql.preAggregationBufferSize", (p.getMemoryPerCPUInMB() / 8) * 1024 * 1024);
}

From source file:simsql.runtime.JoinOp.java

License:Apache License

public void setConfigurations(Configuration conf, RuntimeParameter params) {

    // first, send out the type of join
    conf.setStrings("simsql.joinType", new String[] { joinType.toString().toLowerCase() });

    // set the self-join value
    conf.setBoolean("simsql.isSelfJoin", isSelfJoin);

    // see if we have a Cartesian product
    conf.setBoolean("simsql.joinCartesian", isCartesian);

    // see if we have a pure, map-only merge join
    conf.setBoolean("simsql.isMergeJoin", mergeJoin);

    // if we are able to avoid a sort of the left or of the right, then we need some extra configs that will allow the merge
    if (mergeJoin || sortOnlyRight) {
        conf.setInt("simsql.sortedFileTypeCode", getDB().getTypeCode(getDB().getTableName(leftFile)));
        conf.set("simsql.sortedFileName", leftFile);
        conf.setInt("simsql.sortedFileNumAtts", getDB().getNumAtts(getDB().getTableName(leftFile)));
    } else if (sortOnlyLeft) {
        conf.setInt("simsql.sortedFileTypeCode", getDB().getTypeCode(getDB().getTableName(rightFile)));
        conf.set("simsql.sortedFileName", rightFile);
        conf.setInt("simsql.sortedFileNumAtts", getDB().getNumAtts(getDB().getTableName(rightFile)));
    }/*from ww w.j  a va  2  s  . c  o  m*/

    // find out which relation is the largest.
    long leftSize = getPathsActualSize(getValue("leftInput.inFiles").getStringList().toArray(new String[0]));
    long rightSize = getPathsActualSize(getValue("rightInput.inFiles").getStringList().toArray(new String[0]));
    long smallerSize = 0;
    long largerSize = 0;
    int smallerTypeCode = -1;
    int largerTypeCode = -1;

    if (leftSize < rightSize) {
        smallerSize = leftSize;
        largerSize = rightSize;
        smallerTypeCode = leftTypeCode;
        largerTypeCode = rightTypeCode;
    } else {
        smallerSize = rightSize;
        largerSize = leftSize;
        smallerTypeCode = rightTypeCode;
        largerTypeCode = leftTypeCode;
    }

    // and pass the typecode and size of those relations.
    conf.setInt("simsql.smallerRelation.typeCode", smallerTypeCode);
    conf.setInt("simsql.largerRelation.typeCode", largerTypeCode);
    conf.setLong("simsql.smallerRelation.size", smallerSize);
    conf.setLong("simsql.largerRelation.size", largerSize);
}

From source file:simsql.runtime.SelectionOp.java

License:Apache License

public void setConfigurations(Configuration conf, RuntimeParameter params) {

    // set for removing duplicates
    conf.setBoolean("simsql.removeDuplicates", removeDuplicates);

    // set for reducer thing.
    conf.setBoolean("simsql.runSelectionReducer", runSelectionReducer);

    // set the pre-dedup buffer size: 1/8 of available memory.
    ExampleRuntimeParameter p = (ExampleRuntimeParameter) params;
    conf.setLong("simsql.duplicatesBufferSize", (p.getMemoryPerCPUInMB() / 8) * 1024 * 1024);
}

From source file:smile.wide.algorithms.SMILEBSjob.java

License:Apache License

/** Sets up the hadoop job and sends it to the cluster
 * waits for the job to be completed.*/
@Override/*  ww w.  j a v  a2 s .co  m*/
public int run(String[] params) throws Exception {
    //params: <trainfile> <output_path> <number of seeds>
    Configuration conf = super.getConf();
    conf.set("trainfile", params[0]);
    //distributed cache initialization
    DistributedCache.createSymlink(conf);

    DistributedCache.addFileToClassPath(new Path(libHDFSPath_ + "/smile.jar"), conf);
    DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf);
    //upload data file to HDFS and add it to the distributed cache
    FileSystem dfs = FileSystem.get(conf);
    dfs.copyFromLocalFile(new Path(params[0]), new Path(dataHDFSPath_));
    DistributedCache.addCacheFile(new URI(dataHDFSPath_ + basename(params[0]) + "#" + basename(params[0])),
            conf);

    //for now, keep the Bayesian search parameters constant
    conf.setInt("iterationCount", iterationCount);
    conf.setFloat("linkProbability", linkProbability);
    conf.setInt("maxParents", maxParents);
    conf.setInt("maxSearchTime", maxSearchTime);
    conf.setFloat("priorLinkProbability", priorLinkProbability);
    conf.setInt("priorSampleSize", priorSampleSize);
    //
    conf.setInt(RandSeedInputFormat.CONFKEY_SEED_COUNT, Integer.parseInt(params[2]));
    conf.setInt(RandSeedInputFormat.CONFKEY_WARMUP_ITER, 100000);
    conf.setLong("mapred.task.timeout", 3600000);

    Job job = new Job(conf);
    job.setJobName("Distributed Bayesian Search");
    job.setJarByClass(SMILEBSjob.class);
    job.setMapperClass(SMILEBSMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StrucLogLikeWritable.class);
    job.setReducerClass(SMILEBSReducer.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(RandSeedInputFormat.class);
    Path outputPath = new Path(params[1]);
    FileOutputFormat.setOutputPath(job, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath, true);

    //Run the job
    job.waitForCompletion(true);
    //now download result
    outputPath.suffix("/part-r-00000");
    dfs.copyToLocalFile(outputPath.suffix("/part-r-00000"), new Path("./smile-output.txt"));
    return 0;
}

From source file:tv.icntv.grade.film.dbcollect.TableConcurrencyJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    String[] tables = configuration.get("hbase.cdn.tables").split(",");
    JobControl jobControl = new JobControl("data init");
    for (String table : tables) {
        ////from   ww w .j  a v  a2  s .  c o m
        String hbaseDbDirectory = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(),
                table);
        HadoopUtils.deleteIfExist(hbaseDbDirectory);
        Job tableJob = new Job(configuration, "icntv grade init " + table);
        TableMapReduceUtil.initTableMapperJob(table, new Scan(), TableInitMapper.class, Text.class, Text.class,
                tableJob);
        MapReduceUtils.initReducerJob(new Path(hbaseDbDirectory), TableInitReducer.class, tableJob);
        // controlled job
        ControlledJob tableControlledJob = new ControlledJob(configuration);
        tableControlledJob.setJob(tableJob);

        String dbDirectory = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table);
        HadoopUtils.deleteIfExist(dbDirectory);
        Configuration conf = getConf();
        Job db = new Job(conf, "icntv db collect " + table);
        conf.setLong("mapred.min.split.size", 512 * 2014 * 1024L);
        MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db,
                new Path(strings[1]));
        FileOutputFormat.setOutputPath(db, new Path(dbDirectory));
        db.setNumReduceTasks(0);
        ControlledJob dbControlledJob = new ControlledJob(conf);
        dbControlledJob.setJob(db);
        dbControlledJob.addDependingJob(tableControlledJob);
        //controlledJob.
        jobControl.addJob(tableControlledJob);
        jobControl.addJob(dbControlledJob);
    }
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    return 0;
}

From source file:tv.icntv.grade.film.dbcollect.TableInitJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {

    Configuration configuration = getConf();
    JobControl jobControl = new JobControl("init data");
    for (String table : strings) {
        String dbPath = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table);
        //            String[] arrays = new String[]{table,//input table
        //                    String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table),
        //                    db
        //            };
        String hbasePath = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table);
        //table job
        Job tableJob = new Job(configuration, "icntv grade init");
        Scan scan = new Scan();

        HadoopUtils.deleteIfExist(hbasePath);
        HadoopUtils.deleteIfExist(dbPath);
        TableMapReduceUtil.initTableMapperJob(table, scan, TableInitMapper.class, Text.class, Text.class,
                tableJob);//w  w w .  j a  v  a2 s  .c om
        MapReduceUtils.initReducerJob(new Path(hbasePath), TableInitReducer.class, tableJob);
        ControlledJob firstControll = new ControlledJob(configuration);
        firstControll.setJob(tableJob);
        //            tableJob.waitForCompletion(true);
        Job db = new Job(configuration, "icntv db collect");
        configuration.setLong("mapred.min.split.size", 512 * 2014 * 1024L);
        MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db,
                new Path(hbasePath));
        FileOutputFormat.setOutputPath(db, new Path(dbPath));
        db.setNumReduceTasks(0);
        ControlledJob secondaryController = new ControlledJob(configuration);
        secondaryController.setJob(db);
        secondaryController.addDependingJob(firstControll);
        jobControl.addJob(firstControll);
        jobControl.addJob(secondaryController);
    }
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    logger.info("job controller successed job size=" + jobControl.getSuccessfulJobList().size());
    //        db.waitForCompletion(true);
    return 0;
}