Example usage for org.apache.hadoop.conf Configuration setFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setFloat.

Prototype

public void setFloat(String name, float value)

Source Link

Document

Set the value of the name property to a float.

Usage

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        boolean aIsMapDir, String inMemCStr, int inMemCRows, int inMemCCols, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    conf.set(MATRIXINMEMORY, inMemCStr);
    conf.setInt(MATRIXINMEMORYROWS, inMemCRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemCCols);

    conf.setFloat(ALPHA1, alpha1);
    conf.setFloat(ALPHA2, alpha2);// w  ww .  j  a v  a 2s. co  m

    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "compositedmj");

    conf.set(MAPDIRMATRIX, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CompositeDMJ.class);
    job.setJobName(CompositeDMJ.class.getSimpleName() + "-" + matrixOutputPath.getName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.algebra.nmf.SampleColsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    conf.setInt(COLS, cols);/*from www  . ja va2 s.c  om*/
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleColsJob.class);
    job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.SampleRowsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleRowsJob.class);
    job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);/*from   w  ww  . j a v a  2s  . c om*/
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.zjy.mongo.util.MongoConfigUtil.java

License:Apache License

public static Configuration buildConfiguration(final Map<String, Object> data) {
    Configuration newConf = new Configuration();
    for (Entry<String, Object> entry : data.entrySet()) {
        String key = entry.getKey();
        Object val = entry.getValue();
        if (val instanceof String) {
            newConf.set(key, (String) val);
        } else if (val instanceof Boolean) {
            newConf.setBoolean(key, (Boolean) val);
        } else if (val instanceof Integer) {
            newConf.setInt(key, (Integer) val);
        } else if (val instanceof Float) {
            newConf.setFloat(key, (Float) val);
        } else if (val instanceof DBObject) {
            setDBObject(newConf, key, (DBObject) val);
        } else {/*w ww .j ava2  s.  c om*/
            throw new RuntimeException("can't convert " + val.getClass() + " into any type for Configuration");
        }
    }
    return newConf;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation//from   w w  w  . java  2 s .c  o  m
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:edu.cuhk.hccl.hadoop.HadoopApp.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args == null || args.length < 4) {
        System.out.println("Please specify parameters: input, output, domain, num-reducers!");
        System.exit(-1);//from w w  w. jav  a  2 s  . c  om
    }

    String input = args[0];
    String output = args[1];
    String domain = args[2];
    int numReducers = Integer.parseInt(args[3]);
    float similarity = Float.parseFloat(args[4]);
    int range = Integer.parseInt(args[5]);

    Job job = new Job(new Configuration(), this.getClass().getSimpleName());

    // Must below the line of job creation
    Configuration conf = job.getConfiguration();
    // Reuse the JVM
    conf.setInt("mapred.job.reuse.jvm.num.tasks", -1);
    conf.setFloat("SIM_THRESHOLD", similarity);
    conf.setInt("SEARCH_RANGE", range);

    if (domain.equalsIgnoreCase("restaurant")) {
        conf.setStrings("ASPECTS", Constant.RESTAURANT_ASPECTS);
        job.setMapperClass(YelpMapper.class);
        job.setInputFormatClass(TextInputFormat.class);

        // args[4] is the business file to select matching business_ids to restaurant
        String busiFile = args[6];
        DistributedCache.addCacheFile(new URI(busiFile), conf);
    } else if (domain.equalsIgnoreCase("hotel")) {
        conf.setStrings("ASPECTS", Constant.TRIPADVISOR_ASPECTS);
        job.setMapperClass(TripAdvisorMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
    } else {
        System.out.println("Wrong domain type!");
        System.exit(-1);
    }

    job.setJarByClass(HadoopApp.class);
    job.setReducerClass(ReviewReducer.class);
    job.setNumReduceTasks(numReducers);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(UserItemPair.class);
    job.setOutputValueClass(NounPhrase.class);

    // Delete output if exists
    Path outputDir = new Path(output);
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir))
        hdfs.delete(outputDir, true);

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansAdapterTest.java

License:Apache License

@Test
public static void testCluster() {
    int dimension = 500;

    // construct data samplers centered on the corners of a unit cube
    Matrix mean = new DenseMatrix(8, dimension);
    List<MultiNormal> rowSamplers = Lists.newArrayList();
    for (int i = 0; i < 8; i++) {
        //         mean.viewRow(i).assign(
        //               new double[] { 0.25 * (i & 4), 0.5 * (i & 2), i & 1 });

        double[] random = new double[dimension];
        for (int j = 0; j < random.length; j++) {
            random[j] = Math.random();
        }/*from  w  ww . j  a  v a2  s  .co  m*/
        mean.viewRow(i).assign(random);
        rowSamplers.add(new MultiNormal(0.01, mean.viewRow(i)));
    }

    // sample a bunch of data points
    Matrix data = new DenseMatrix(10000, dimension);
    for (MatrixSlice row : data) {
        row.vector().assign(rowSamplers.get(row.index() % 8).sample());
    }

    // cluster the data
    long t0 = System.currentTimeMillis();

    double cutoff = StreamingKMeansAdapter.estimateCutoff(data, 100);
    Configuration conf = new Configuration();
    conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, 1000);
    conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, (float) cutoff);
    conf.setClass(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class,
            DistanceMeasure.class);
    conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dimension);
    StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter(conf);
    // for (MatrixSlice row : Iterables.skip(data, 1)) {
    // skmeans.cluster(row.vector());
    // }
    for (MatrixSlice row : data) {
        skmeans.cluster(row.vector());
    }

    // validate
    Searcher r = skmeans.getCentroids();

    // StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter();
    // Searcher r = skmeans.cluster(data, 1000, centroidFactory);

    long t1 = System.currentTimeMillis();

    assertEquals("Total weight not preserved", totalWeight(data), totalWeight(r), 1e-9);

    // and verify that each corner of the cube has a centroid very nearby
    for (MatrixSlice row : mean) {
        WeightedVector v = r.search(row.vector(), 1).get(0);
        assertTrue(v.getWeight() < 0.05);
    }
    System.out.printf("%.2f for clustering\n%.1f us per row\n", (t1 - t0) / 1000.0,
            (t1 - t0) / 1000.0 / data.rowSize() * 1e6);

    System.out.println("Done??");
}

From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansDriver.java

License:Apache License

private void StreamingKMeansConfigHelper(Configuration conf, String input, int maxCluster) throws IOException {
    // get samples to calculate scale factor
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path(input), Utilities.HIDDEN_FILE_FILTER);
    int index = 0 + (int) (Math.random() * (status.length));
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, status[index].getPath(), conf);

    int count = 0;
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    List<MatrixSlice> slices = new ArrayList<MatrixSlice>();
    while (seqReader.next(key, value) && count < samplesNum) {
        MatrixSlice slice = new MatrixSlice(value.get().clone(), count);
        slices.add(slice);/*from   ww  w.  ja  v a 2  s  .  c  o m*/
        count++;
    }

    // set cutoff
    float cutoff = (float) StreamingKmeans.estimateCutoff(slices, samplesNum);
    conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, cutoff);
    logger.info("Scale factor (cutoff) is: " + cutoff);

    // set vector dimension
    int dim = value.get().size();
    conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dim);
    logger.info("Dimemsion of a vector is: " + dim);

    // set maximum #cluster
    conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, maxCluster);

    // set distance measurement
    conf.set(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class.getName());
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/*  www. j  a va  2  s.co m*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minLLRValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(LLRReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 5) {
        System.err.println("Usage: " + this.getClass().getName()
                + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>...");
        return 1;
    }/* w  w  w . j  a  va  2 s .c  om*/

    // Parse arguments
    Path outputPath = new Path(args[0]);
    float proportionToSample = Float.parseFloat(args[1]);
    int numberTabletServers = Integer.parseInt(args[2]);
    Path resultingSplitsFile = new Path(args[3]);
    Path[] inputPaths = new Path[args.length - 4];
    for (int i = 0; i < inputPaths.length; i++) {
        inputPaths[i] = new Path(args[i + 4]);
    }

    // Conf and job
    Configuration conf = getConf();
    conf.setFloat("proportion_to_sample", proportionToSample);
    String jobName = "Estimate split points: input = ";
    for (int i = 0; i < inputPaths.length; i++) {
        jobName += inputPaths[i] + ", ";
    }
    jobName += "output = " + outputPath;
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(getClass());

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    for (int i = 0; i < inputPaths.length; i++) {
        SequenceFileInputFormat.addInputPath(job, inputPaths[i]);
    }

    // Mapper
    job.setMapperClass(EstimateSplitPointsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Reducer
    job.setReducerClass(EstimateSplitPointsReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(1);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    @SuppressWarnings("deprecation")
    Counter counter = job.getCounters()
            .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    long recordsOutput = counter.getValue();
    System.out.println("Number of records output = " + recordsOutput);

    // Work out when to output a split point. The number of split points
    // needed is the number of tablet servers minus 1 (because you don't
    // have to output the start of the first tablet or the end of the
    // last tablet).
    long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to
    // file.
    FileSystem fs = FileSystem.get(conf);
    Path resultsFile = new Path(outputPath, "part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
    PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true)));
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
        count++;
        if (count % outputEveryNthRecord == 0) {
            numberSplitPointsOutput++;
            splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes())));
            System.out.println("Written split point: " + key.getRow());
        }
    }
    reader.close();
    splitsWriter.close();
    System.out.println("Number of split points output = " + numberSplitPointsOutput);
    return 0;
}