List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { MemoryUtil.startMemoryLogger(5000);/*from w w w . jav a2 s . c om*/ log.info("Retrieving configuration"); Configuration conf = context.getConfiguration(); float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN); float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN); long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L); random = RandomUtils.getRandom(seed); numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1); int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1); int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1); int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4); maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10); float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f); testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f); log.info("Initializing read model"); TopicModel readModel; Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found"); readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null, numTrainThreads, modelWeight); } log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms); log.info("Initializing topic vector"); topicVector = new DenseVector(new double[numTopics]); }
From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); URI[] localFiles = DistributedCache.getCacheFiles(conf); Preconditions.checkArgument(localFiles != null && localFiles.length >= 1, "missing paths from the DistributedCache"); dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE); sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false); namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false); maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize); Path dictionaryFile = new Path(localFiles[0].getPath()); // key is word value is id for (Pair<Writable, IntWritable> record : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {// w w w . j a va2 s . c o m dictionary.put(record.getFirst().toString(), record.getSecond().get()); } }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Create a line reader that reads from the given stream using the * <code>io.file.buffer.size</code> specified in the given * <code>Configuration</code>. * // w ww. ja va 2 s . co m * @param in * input stream * @param conf * configuration * @throws IOException */ public LineReaders(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE), conf.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0)); }
From source file:com.ery.hadoop.mrddx.file.LineReaders.java
License:Apache License
/** * Create a line reader that reads from the given stream using the * <code>io.file.buffer.size</code> specified in the given * <code>Configuration</code>, and using a custom delimiter of array of * bytes./*from w ww. j a va 2 s. c om*/ * * @param in * input stream * @param conf * configuration * @param recordDelimiterBytes * The delimiter * @throws IOException */ public LineReaders(InputStream in, Configuration conf, byte[] recordDelimiterBytes, int skipNum) throws IOException { this.in = in; this.bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); this.perFileSkipRowNum = skipNum; this.buffer = new byte[this.bufferSize]; this.recordDelimiterBytes = recordDelimiterBytes; }
From source file:com.ery.hadoop.mrddx.file.LineRecordReader.java
License:Apache License
public LineRecordReader(Configuration job, FileSplit split) throws IOException { this.perFileSkipRowNum = job.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.job = job; this.split = split; for (long l : split.getLengths()) { totalend += l;/* w w w.ja va 2 s . co m*/ } this.fileEncodeing = job .get(MRConfiguration.FILE_CONTENT_ENCODING, MRConfiguration.FILE_CONTENT_ENCODING_DEFAULT) .toLowerCase(); if (this.fileEncodeing.equals("")) { this.fileEncodeing = "utf-8"; } this.split.setFileIndex(0); this.openFile(); }
From source file:com.ery.hadoop.mrddx.file.LineRecordReader.java
License:Apache License
public LineRecordReader(InputStream in, long offset, long endOffset, Configuration job) throws IOException { this.job = job; this.perFileSkipRowNum = job.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.in = new LineReader(in, job); this.start = offset; this.pos = offset; this.end = endOffset; this.filePosition = null; }
From source file:com.ery.hadoop.mrddx.hFile.LineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from w w w. j a v a 2s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); // tar.gzTarInputStream // new TarInputStream(codec.createInputStream(fileIn, decompressor) String filename = file.getName(); if (filename.endsWith(".tar.gz")) { in = new LineReader(new TarInputStream(cIn), job); } else { in = new LineReader(cIn, job); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { String filename = file.getName(); if (filename.endsWith(".tar.gz")) { in = new LineReader(new TarInputStream(codec.createInputStream(fileIn, decompressor)), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } filePosition = fileIn; } } else { fileIn.seek(start); in = new LineReader(fileIn, job); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.ery.server.util.IOUtils.java
License:Apache License
/** * Copies from one stream to another. <strong>closes the input and output * streams at the end</strong>.//from w w w .java2 s . c om * * @param in * InputStrem to read from * @param out * OutputStream to write to * @param conf * the Configuration object */ public static void copyBytes(InputStream in, OutputStream out, Configuration conf) throws IOException { copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), true); }
From source file:com.ery.server.util.IOUtils.java
License:Apache License
/** * Copies from one stream to another./*from ww w . ja v a2 s.c om*/ * * @param in * InputStrem to read from * @param out * OutputStream to write to * @param conf * the Configuration object * @param close * whether or not close the InputStream and OutputStream at the * end. The streams are closed in the finally clause. */ public static void copyBytes(InputStream in, OutputStream out, Configuration conf, boolean close) throws IOException { copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), close); }
From source file:com.facebook.hive.orc.OrcConf.java
License:Open Source License
public static int getIntVar(Configuration conf, ConfVars var) { return conf.getInt(var.varname, var.defaultIntVal); }