Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:edu.isi.mavuno.app.mine.HarvestParaphraseCandidates.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.ExtractorClass",
            conf);/*from w w  w .j  ava 2  s  .c  om*/
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.ExtractorArgs",
            conf);
    String numResults = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.NumResults", conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.MinMatches", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: HarvestParaphraseCandidates");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestParaphraseCandidates");

    // harvest all (context, pattern) triples
    conf.set("Mavuno.HarvestContextPatternPairs.CorpusPath", corpusPath);
    conf.set("Mavuno.HarvestContextPatternPairs.CorpusClass", corpusClass);
    conf.set("Mavuno.HarvestContextPatternPairs.ExtractorClass", extractorClass);
    conf.set("Mavuno.HarvestContextPatternPairs.ExtractorArgs", extractorArgs);
    conf.set("Mavuno.HarvestContextPatternPairs.MinMatches", minMatches);
    conf.set("Mavuno.HarvestContextPatternPairs.OutputPath", outputPath + "/triples");
    new HarvestContextPatternPairs(conf).run();

    FileInputFormat.addInputPath(job, new Path(outputPath + "/triples"));
    FileOutputFormat.setOutputPath(job, new Path(outputPath + "/patterns-all"));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(ContextPatternWritable.class);
    job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
    job.setPartitionerClass(ContextPatternWritable.IdContextPartitioner.class);
    job.setMapOutputValueClass(TextLongPairWritable.class);

    job.setOutputKeyClass(ContextPatternWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);

    // combine scores
    //      conf.set("Mavuno.CombineScores.InputPath", outputPath + "/patterns-all");
    //      conf.set("Mavuno.CombineScores.OutputPath", outputPath + "/patterns");
    //      new CombineScores(conf).run();
    //            
    // only retain the top paraphrases
    conf.set("Mavuno.GetTopResults.InputPath", outputPath + "/patterns-all");
    conf.set("Mavuno.GetTopResults.OutputPath", outputPath + "/top-k");
    conf.set("Mavuno.GetTopResults.NumResults", numResults);
    conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", false);
    new GetTopResults(conf).run();

    MavunoUtils.removeDirectory(conf, outputPath + "/patterns-all");

    return 0;
}

From source file:edu.iu.daal_sgd.SGDDaalLauncher.java

License:Apache License

/**
 * Launches SGD workers.//from   w w  w .  ja  va2  s  . co  m
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();
    init.loadDistributedLibsExp();
    // load args
    init.loadSysArgs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setDouble(Constants.LAMBDA, Double.parseDouble(args[init.getSysArgNum() + 2]));
    conf.setDouble(Constants.EPSILON, Double.parseDouble(args[init.getSysArgNum() + 3]));
    conf.setBoolean(Constants.ENABLE_TUNING, Boolean.parseBoolean(args[init.getSysArgNum() + 4]));
    conf.set(HarpDAALConstants.TEST_FILE_PATH, args[init.getSysArgNum() + 5]);

    // launch job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));

    Job mfsgdJob = init.createJob("mfsgdJob", SGDDaalLauncher.class, SGDDaalCollectiveMapper.class);

    // finish job
    boolean jobSuccess = mfsgdJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        mfsgdJob.killJob();
        System.out.println("mfsgdJob failed");
    }

    return 0;
}

From source file:edu.iu.daal_subgraph.SCDaalLauncher.java

License:Apache License

private Job configureSCJob(String graphDir, String template, String outDir, int numMapTasks,
        boolean useLocalMultiThread, int numThreads, int numCores, String affinity, String omp_opt, int tpc,
        int mem, double memjavaratio, int send_array_limit, int nbr_split_len, boolean rotation_pipeline,
        int numIteration) throws IOException {

    Configuration configuration = getConf();

    Job job = Job.getInstance(configuration, "subgraph counting");
    Configuration jobConfig = job.getConfiguration();
    Path jobOutDir = new Path(outDir);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(jobOutDir)) {
        fs.delete(jobOutDir, true);/*from ww w  . j a  v  a2 s . c om*/
    }

    FileInputFormat.setInputPaths(job, graphDir);
    FileOutputFormat.setOutputPath(job, jobOutDir);

    //job.setInputFormatClass(KeyValueTextInputFormat.class);
    //use harp multifile input format to have a better control on num of map tasks
    job.setInputFormatClass(MultiFileInputFormat.class);

    job.setJarByClass(SCDaalLauncher.class);
    job.setMapperClass(SCDaalCollectiveMapper.class);
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    // mapreduce.map.collective.memory.mb
    // 125000
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    // int xmx = (mem - 5000) > (mem * 0.9)
    //     ? (mem - 5000) : (int) Math.ceil(mem * 0.5);
    // int xmx = (int) Math.ceil((mem - 5000)*0.2);
    int xmx = (int) Math.ceil((mem - 5000) * memjavaratio);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    jobConf.setInt("mapreduce.task.timeout", 60000000);

    job.setNumReduceTasks(0);

    jobConfig.setInt(SCConstants.NUM_MAPPERS, numMapTasks);

    jobConfig.set(SCConstants.TEMPLATE_PATH, template);

    jobConfig.set(SCConstants.OUTPUT_PATH, outDir);

    jobConfig.setBoolean(SCConstants.USE_LOCAL_MULTITHREAD, useLocalMultiThread);

    jobConfig.setInt(SCConstants.NUM_THREADS_PER_NODE, numThreads);

    jobConfig.setInt(SCConstants.THREAD_NUM, numThreads);
    jobConfig.setInt(SCConstants.CORE_NUM, numCores);
    jobConfig.set(SCConstants.THD_AFFINITY, affinity);
    jobConfig.set(SCConstants.OMPSCHEDULE, omp_opt);
    jobConfig.setInt(SCConstants.TPC, tpc);
    jobConfig.setInt(SCConstants.SENDLIMIT, send_array_limit);
    jobConfig.setInt(SCConstants.NBRTASKLEN, nbr_split_len);

    jobConfig.setBoolean(SCConstants.ROTATION_PIPELINE, rotation_pipeline);
    jobConfig.setInt(SCConstants.NUM_ITERATION, numIteration);

    return job;
}

From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java

License:Apache License

/**
 * Launches all the tasks in order.//w  w w .  j av a 2 s.c  o m
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    // load args
    init.loadSysArgs();

    init.loadDistributedLibs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2]));
    conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3]));
    conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4]));
    conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5]));
    conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6]));

    // config job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class);

    // initialize centroids data
    JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]);
    int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]);
    Path workPath = init.getWorkPath();
    Path cenDir = new Path(workPath, "centroids");
    fs.mkdirs(cenDir);
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }

    Path initCenDir = new Path(cenDir, "init_centroids");
    DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs);
    thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString());
    thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids");

    //generate Data if required
    boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]);
    if (generateData) {
        Path inputPath = init.getInputPath();
        int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]);
        int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]);
        String tmpDirPathName = args[init.getSysArgNum() + 10];

        DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath,
                tmpDirPathName, fs);
    }

    // finish job
    boolean jobSuccess = kmeansJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        kmeansJob.killJob();
        System.out.println("kmeansJob failed");
    }

    return 0;
}

From source file:edu.iu.lda.LDALauncher.java

License:Apache License

private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations,
        int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem,
        boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID)
        throws IOException, URISyntaxException {
    configuration.setInt(Constants.NUM_TOPICS, numTopics);
    configuration.setDouble(Constants.ALPHA, alpha);
    configuration.setDouble(Constants.BETA, beta);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.MIN_BOUND, minBound);
    configuration.setInt(Constants.MAX_BOUND, maxBound);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setBoolean(Constants.PRINT_MODEL, printModel);
    Job job = Job.getInstance(configuration, "lda_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");
    // mapreduce.map.collective.memory.mb
    // 125000//from   w w  w. j  a  v  a  2 s.c om
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, docDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(LDALauncher.class);
    job.setMapperClass(LDAMPCollectiveMapper.class);
    job.setNumReduceTasks(0);
    return job;
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams//from www. ja  v  a  2s  . c  om
 */
@SuppressWarnings("deprecation")
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);

    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation//  w ww  .ja va 2s.  c  om
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minLLRValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(LLRReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:edu.umd.cloud9.webgraph.driver.ClueWebDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 6) {
        printUsage();//from  ww  w  .jav a2s . c om
        return -1;
    }

    Configuration conf = getConf();
    String inputArg = DriverUtil.argValue(args, DriverUtil.CL_INPUT);
    final String inputBase = inputArg.endsWith("/") ? inputArg : inputArg + "/";
    String outputArg = DriverUtil.argValue(args, DriverUtil.CL_OUTPUT);
    final String outputBase = outputArg.endsWith("/") ? outputArg : outputArg + "/";
    final String docnoMapping = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING);
    final int fromSegment = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_BEGIN_SEGMENT));
    final int toSegment = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_END_SEGMENT));
    final boolean includeInternalLinks = DriverUtil.argExists(args, DriverUtil.CL_INCLUDE_INTERNAL_LINKS);
    final boolean computeAnchorWeights = DriverUtil.argExists(args, DriverUtil.CL_COMPUTE_WEIGHTS);
    final String normalizer = DriverUtil.argValue(args, DriverUtil.CL_NORMALIZER);

    conf.setInt("Cloud9.Mappers", 2000);
    conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
    conf.set("Cloud9.DocnoMappingFile", docnoMapping);
    conf.setBoolean("Cloud9.IncludeInternalLinks", includeInternalLinks);
    conf.set("Cloud9.AnchorTextNormalizer", normalizer);

    // Extract link information for each segment separately
    for (int i = fromSegment; i <= toSegment; i++) {
        String inputPath = inputBase + "en." + (i == 10 ? "10" : ("0" + i));
        String outputPath = outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en."
                + (i == 10 ? "10" : ("0" + i));
        conf.set("Cloud9.InputPath", inputPath);
        conf.set("Cloud9.OutputPath", outputPath);
        int r = new ClueExtractLinks(conf).run();
        if (r != 0) {
            return -1;
        }
    }

    // Construct the reverse web graph (i.e., collect incoming link
    // information)
    String inputPath = "";
    for (int i = fromSegment; i < toSegment; i++) {
        inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.0" + i + "/,";
    }

    if (toSegment == 10) {
        inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.10/";
    } else {
        inputPath += outputBase + DriverUtil.OUTPUT_EXTRACT_LINKS + "/en.0" + toSegment + "/";
    }

    String outputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/";
    conf.set("Cloud9.InputPath", inputPath);
    conf.set("Cloud9.OutputPath", outputPath);
    conf.setInt("Cloud9.Mappers", 1);
    conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1));

    int r = new BuildReverseWebGraph(conf).run();
    if (r != 0) {
        return -1;
    }

    // Construct the web graph
    inputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/";
    outputPath = outputBase + DriverUtil.OUTPUT_WEBGRAPH + "/";
    conf.set("Cloud9.InputPath", inputPath);
    conf.set("Cloud9.OutputPath", outputPath);
    conf.setInt("Cloud9.Mappers", 1);
    conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1));
    r = new BuildWebGraph(conf).run();
    if (r != 0) {
        return -1;
    }

    if (computeAnchorWeights) {
        // Propagating domain names in order to compute anchor weights
        inputPath = outputBase + DriverUtil.OUTPUT_WEBGRAPH + "/";
        outputPath = outputBase + DriverUtil.OUTPUT_HOST_NAMES + "/";
        conf.set("Cloud9.InputPath", inputPath);
        conf.set("Cloud9.OutputPath", outputPath);
        conf.setInt("Cloud9.Mappers", 1);
        conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1));
        r = new CollectHostnames(conf).run();
        if (r != 0) {
            return -1;
        }

        // Compute the weights
        inputPath = outputBase + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/," + outputBase
                + DriverUtil.OUTPUT_HOST_NAMES + "/";
        outputPath = outputBase + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/";
        conf.set("Cloud9.InputPath", inputPath);
        conf.set("Cloud9.OutputPath", outputPath);
        conf.setInt("Cloud9.Mappers", 1);
        conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS * (toSegment - fromSegment + 1));
        r = new ComputeWeight(conf).run();
        if (r != 0) {
            return -1;
        }
    }

    return 0;
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

/**
 * Create a partitioner for a particular job
 * @param ins//ww  w .j  a v  a  2 s .co  m
 * @param out
 * @param job
 * @param partitionerName
 * @return
 * @throws IOException
 */
public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName)
        throws IOException {
    try {
        Partitioner partitioner;
        Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase());
        if (partitionerClass == null) {
            // Try to parse the name as a class name
            try {
                partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Unknown index type '" + partitionerName + "'");
            }
        }

        if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) {
            boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase());
            job.setBoolean("replicate", replicate);
        }
        partitioner = partitionerClass.newInstance();

        long t1 = System.currentTimeMillis();
        final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr");
        // Determine number of partitions
        long inSize = 0;
        for (Path in : ins) {
            inSize += FileUtil.getPathSize(in.getFileSystem(job), in);
        }
        long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f)));
        FileSystem outFS = out.getFileSystem(job);
        long outBlockSize = outFS.getDefaultBlockSize(out);

        final List<Point> sample = new ArrayList<Point>();
        float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f);
        long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024);

        LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%");
        ResultCollector<Point> resultCollector = new ResultCollector<Point>() {
            @Override
            public void collect(Point p) {
                sample.add(p.clone());
            }
        };

        OperationsParams params2 = new OperationsParams(job);
        params2.setFloat("ratio", sample_ratio);
        params2.setLong("size", sample_size);
        if (job.get("shape") != null)
            params2.set("shape", job.get("shape"));
        if (job.get("local") != null)
            params2.set("local", job.get("local"));
        params2.setClass("outshape", Point.class, Shape.class);
        Sampler.sample(ins, resultCollector, params2);
        long t2 = System.currentTimeMillis();
        System.out.println("Total time for sampling in millis: " + (t2 - t1));
        LOG.info("Finished reading a sample of " + sample.size() + " records");

        int partitionCapacity = (int) Math.max(1,
                Math.floor((double) sample.size() * outBlockSize / estimatedOutSize));
        int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize));
        LOG.info("Partitioning the space into " + numPartitions + " partitions with capacity of "
                + partitionCapacity);

        partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), partitionCapacity);

        return partitioner;
    } catch (InstantiationException e) {
        e.printStackTrace();
        return null;
    } catch (IllegalAccessException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFToText.java

License:Open Source License

/**
 * Performs an HDF to text operation as a MapReduce job and returns total
 * number of points generated./*w w  w . ja va2s  .c  o  m*/
 * @param inPath
 * @param outPath
 * @param datasetName
 * @param skipFillValue
 * @return
 * @throws IOException
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static long HDFToTextMapReduce(Path inPath, Path outPath, String datasetName, boolean skipFillValue,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "HDFToText");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(HDFToText.class);
    job.setJobName("HDFToText");

    // Set Map function details
    job.setMapperClass(HDFToTextMap.class);
    job.setNumReduceTasks(0);

    // Set input information
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPath);
    if (conf.get("shape") == null)
        conf.setClass("shape", NASAPoint.class, Shape.class);
    conf.set("dataset", datasetName);
    conf.setBoolean("skipfillvalue", skipFillValue);

    // Set output information
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outPath);

    // Run the job
    boolean verbose = conf.getBoolean("verbose", false);
    job.waitForCompletion(verbose);
    Counters counters = job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    return resultCount;
}