List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override/*from w w w . j ava 2s. c om*/ public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactoryNoCache(conf).create(); //return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:hitune.analysis.mapreduce.processor.FileFilter.ChukwaTimeBasedFileFilter.java
License:Apache License
/** * //from w w w .j ava2 s . c o m */ public ChukwaTimeBasedFileFilter(Configuration conf, String pattern) { super(conf, pattern); starttime = conf.getLong(AnalysisProcessorConfiguration.starttime, -1); endtime = conf.getLong(AnalysisProcessorConfiguration.endtime, -1); log.debug("starttime: " + starttime + " endtime: " + endtime + " MAX_TIMESTAMP_IN_SECOND: " + MAX_TIMESTAMP_IN_SECOND); if (((int) (starttime / MAX_TIMESTAMP_IN_SECOND)) < 1) { starttime = starttime * 1000; } if (((int) (endtime / MAX_TIMESTAMP_IN_SECOND)) < 1) { endtime = endtime * 1000; } }
From source file:homework.homework.homework3.HomeworkMapper3.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); line = configuration.getLong("line", -1L); regEx = "([A-Z])\\w+"; pattern = Pattern.compile(regEx); delimiter = configuration.get("delimiter", " "); }
From source file:idgs.ConfVar.java
License:Open Source License
public static Long getLongVar(Configuration conf, ConfVar variable) { require(variable.valClass == Long.class); return conf.getLong(variable.varname, variable.defaultLongVal); }
From source file:IndexStorage.IFileInfo.java
License:Open Source License
public IFileInfo(Configuration conf) throws IOException { this.conf = conf; fs = FileSystem.get(conf);/*from w w w . ja v a 2 s .co m*/ this.confSegmentSize = conf.getLong(ConstVar.ConfSegmentSize, ConstVar.DefaultSegmentSize); this.confUnitSize = conf.getLong(ConstVar.ConfUnitSize, ConstVar.DefaultUnitSize); this.conf.setInt("io.compression.codec.lzo.buffersize", 128 * 1024); this.currentline = 0; }
From source file:info.halo9pan.word2vec.hadoop.mr.SortInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * /*from w w w .j a v a2s .c o m*/ * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final SortInputFormat inFormat = new SortInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:input_format.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override// ww w . j a va 2 s .co m public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:io.covert.binary.analysis.BinaryAnalysisMapper.java
License:Apache License
protected void setup(Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); try {// w w w . j av a 2 s. c o m parser = (OutputParser<K, V>) Class.forName(conf.get("binary.analysis.output.parser")).newInstance(); } catch (Exception e) { throw new IOException("Could create parser", e); } fileExtention = conf.get("binary.analysis.file.extention", ".dat"); timeoutMS = conf.getLong("binary.analysis.execution.timeoutMS", Long.MAX_VALUE); program = conf.get("binary.analysis.program"); args = conf.get("binary.analysis.program.args").split(conf.get("binary.analysis.program.args.delim", ",")); String[] codes = conf.get("binary.analysis.program.exit.codes").split(","); exitCodes = new int[codes.length]; for (int i = 0; i < codes.length; ++i) { exitCodes[i] = Integer.parseInt(codes[i]); } workingDir = new File(".").getAbsoluteFile(); dataDir = new File(workingDir, "_data"); dataDir.mkdir(); logDirContents(workingDir); File programFile = new File(workingDir, program); if (programFile.exists()) { LOG.info("Program file exists in working directory, ensuring executable and readable"); programFile.setExecutable(true); programFile.setReadable(true); } }
From source file:io.covert.dns.collection.CollectionMapper.java
License:Apache License
protected void setup(Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); writer = new WriterThread(outQueue, context); writer.start();//w w w. j a v a 2 s . com int numThreads = conf.getInt("dns.collection.num.resolvers", 50); String[] nameservers = conf.get("dns.collection.nameservers").split(","); maxOutstandingRequests = conf.getLong("dns.collection.max.outstanding.requests", 5000); int timeoutSecs = conf.getInt("dns.collection.timeout.secs", 5); if (nameservers.length == 0) { throw new IOException("dns.collection.num.resolvers was not defined correctly"); } for (int i = 0; i < numThreads; ++i) { ResolverThread res = new ResolverThread(inQueue, inQueueSize, outQueue, nameservers, timeoutSecs); res.start(); threads.add(res); } }
From source file:io.covert.dns.storage.accumulo.AccumuloStorageModuleFactory.java
License:Apache License
@Override public StorageModule create(Configuration conf) throws Exception { List<MutationGenerator> generators = new LinkedList<MutationGenerator>(); for (String factoryClass : conf.get("accumulo.storage.module.mutation.generator.factories").split(",")) { MutationGeneratorFactory mutGenFact = ((Class<MutationGeneratorFactory>) Class.forName(factoryClass)) .newInstance();/* w w w . j av a 2 s . c o m*/ generators.add(mutGenFact.create(conf)); } String inst = conf.get("accumulo.storage.module.instance.name"); String zooKeepers = conf.get("accumulo.storage.module.zookeepers"); String user = conf.get("accumulo.storage.module.user"); String password = conf.get("accumulo.storage.module.password"); long maxMemory = conf.getLong("accumulo.storage.module.max.memory", 10 * 1024 * 1024); long maxLatency = conf.getLong("accumulo.storage.module.max.latency", 30 * 1000); int maxWriteThreads = conf.getInt("accumulo.storage.module.max.write.threads", 5); return new AccumuloStorageModule(inst, zooKeepers, user, password, maxMemory, maxLatency, maxWriteThreads, generators); }