List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:org.apache.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample/*from ww w . java 2s.c om*/ * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(), TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min( conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.TimelineMetricClusterAggregatorHourly.java
License:Apache License
public TimelineMetricClusterAggregatorHourly(PhoenixHBaseAccessor hBaseAccessor, Configuration metricsConf) { super(hBaseAccessor, metricsConf); String checkpointDir = metricsConf.get(TIMELINE_METRICS_AGGREGATOR_CHECKPOINT_DIR, DEFAULT_CHECKPOINT_LOCATION); checkpointLocation = FilenameUtils.concat(checkpointDir, CLUSTER_AGGREGATOR_HOURLY_CHECKPOINT_FILE); sleepIntervalMillis = SECONDS.toMillis(metricsConf.getLong(CLUSTER_AGGREGATOR_HOUR_SLEEP_INTERVAL, 3600l)); checkpointCutOffIntervalMillis = SECONDS .toMillis(metricsConf.getLong(CLUSTER_AGGREGATOR_HOUR_CHECKPOINT_CUTOFF_INTERVAL, 7200l)); checkpointCutOffMultiplier = metricsConf.getInt(CLUSTER_AGGREGATOR_HOUR_CHECKPOINT_CUTOFF_MULTIPLIER, 2); }
From source file:org.apache.hama.bsp.CombineFileInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(BSPJob bspJob, int numSplits) throws IOException { Configuration job = bspJob.getConfiguration(); long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;/*from w w w. ja v a 2 s . com*/ } else { minSizeNode = job.getLong("mapred.min.split.size.per.node", 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = job.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(bspJob)); List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>(); if (paths.length == 0) { return splits.toArray(new CombineFileSplit[splits.size()]); } // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contans paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (int i = 0; i < paths.length; i++) { if (paths[i] == null) { // already processed continue; } Path p = new Path(paths[i].toUri().getPath()); if (onepool.accept(p)) { myPaths.add(paths[i]); // add it to my output set paths[i] = null; // already processed } } // create splits for all files in this pool. getMoreSplits(bspJob, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // Finally, process all paths that do not belong to any pool. ArrayList<Path> myPaths = new ArrayList<Path>(); for (Path path : paths) { if (path == null) { // already processed continue; } myPaths.add(path); } // create splits for all files that are not in any pool. getMoreSplits(bspJob, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits.toArray(new CombineFileSplit[splits.size()]); }
From source file:org.apache.hama.bsp.TaskLog.java
License:Apache License
/** * Get the desired maximum length of task's logs. * /*from w ww .ja v a 2s . c o m*/ * @param conf the job to look in * @return the number of bytes to cap the log files at */ public static long getTaskLogLength(Configuration conf) { return conf.getLong("bsp.userlog.limit.kb", 100) * 1024; }
From source file:org.apache.hcatalog.templeton.tool.HDFSCleanup.java
License:Apache License
/** * Create a cleanup object. /*from w w w. j a v a2 s . c o m*/ */ private HDFSCleanup(Configuration appConf) { this.appConf = appConf; interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval); maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage); storage_root = appConf.get(TempletonStorage.STORAGE_ROOT); }
From source file:org.apache.hcatalog.templeton.tool.ZooKeeperCleanup.java
License:Apache License
/** * Create a cleanup object. We use the appConfig to configure JobState. * @param appConf//from ww w . ja v a2 s .c om */ private ZooKeeperCleanup(Configuration appConf) { this.appConf = appConf; interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval); maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage); }
From source file:org.apache.http.examples.client.TestFetcher.java
License:Apache License
public static void main(String[] args) throws Exception { String url = "http://m.58.com/cd/zufang/"; url = "http://i.m.58.com/cd/zufang/15538653692039x.shtml"; url = "http://i.m.58.com/cd/zufang/15403127032966x.shtml"; url = "http://m.58.com/wuhou/qiuzu/?from=list_select_quyu"; url = "http://i.m.58.com/cd/qiuzu/15691792568835x.shtml"; url = "http://i.m.58.com/cd/qiuzu/15514728510981x.shtml"; url = "http://m.58.com/cd/ershoufang/"; url = "http://i.m.58.com/cd/ershoufang/15660173611521x.shtml"; // url = "http://i.m.58.com/cd/ershoufang/15692610703237x.shtml"; // url = "http://i.m.58.com/cd/ershoufang/15646523265417x.shtml"; // url = "http://i.m.58.com/cd/ershoufang/15682093896709x.shtml"; url = "http://m.58.com/cd/hezu"; url = "http://i.m.58.com/cd/hezu/11632175277065x.shtml"; // url = "http://i.m.58.com/cd/hezu/15568727765129x.shtml"; // url = "http://i.m.58.com/cd/hezu/15568727765129x.shtml"; url = "http://wap.ganji.com/cd/fang1/445542193x"; Pattern pattern = Pattern.compile("((.*?)\\?device=wap$)|((.*?)device=wap&(.*))|((.*?)&device=wap$)"); System.out.println(pattern.matcher(url).replaceAll("$2$4$5$7")); pattern = Pattern.compile("(device=wap)"); System.out.println(pattern.matcher(url).replaceAll("")); Configuration conf = NutchConfiguration.create(); conf.set(Nutch.CRAWL_ID_KEY, "ea"); NutchConstant.setUrlConfig(conf, 3); NutchConstant.setSegmentParseRules(conf); NutchConstant.getSegmentParseRules(conf); SegMentParsers parses = new SegMentParsers(conf); // Result<String, WebPage> rs = query.execute(); long curTime = System.currentTimeMillis(); UrlPathMatch urlcfg = NutchConstant.getUrlConfig(conf); boolean filter = conf.getBoolean(GeneratorJob.GENERATOR_FILTER, true); boolean normalise = conf.getBoolean(GeneratorJob.GENERATOR_NORMALISE, true); long limit = conf.getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE); if (limit < 5) { limit = Long.MAX_VALUE;//w w w . j ava 2 s . c om } int retryMax = conf.getInt("db.fetch.retry.max", 3); limit = Integer.MAX_VALUE; curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME, System.currentTimeMillis()); ProtocolFactory protocolFactory = new ProtocolFactory(conf); int rowCount = 0; HttpComponent httpComponent = new HttpComponent(); httpComponent.setConf(conf); long l = System.currentTimeMillis(); try { l = System.currentTimeMillis(); HttpClient httpClient = httpComponent.getClient(); HttpParams httpParams = httpClient.getParams(); httpClient.getParams().setParameter("http.protocol.cookie-policy", CookiePolicy.BROWSER_COMPATIBILITY); httpClient.getParams().setParameter("http.protocol.content-charset", HTTP.UTF_8); String userAgent = getAgentString("NutchCVS", null, "Nutch", "http://lucene.apache.org/nutch/bot.html", "nutch-agent@lucene.apache.org"); userAgent = "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"; // userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"; // userAgent = "Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"; // userAgent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.77 Safari/537.1"; String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3"; String accept = "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; String acceptCharset = "utf-8,ISO-8859-1;q=0.7,*;q=0.7"; System.out.println("userAgent=" + userAgent); // Set up an HTTPS socket factory that accepts self-signed certs. ArrayList<BasicHeader> headers = new ArrayList<BasicHeader>(); // Set the User Agent in the header headers.add(new BasicHeader("User-Agent", userAgent)); // prefer English // headers.add(new BasicHeader("Accept-Language", acceptLanguage)); // // prefer UTF-8 // headers.add(new BasicHeader("Accept-Charset", acceptCharset)); // // prefer understandable formats // headers.add(new BasicHeader("Accept", accept)); // accept gzipped content headers.add(new BasicHeader("Accept-Encoding", "x-gzip, gzip, deflate")); httpParams.setParameter(ClientPNames.DEFAULT_HEADERS, headers); org.apache.nutch.net.protocols.Response response = new HttpComponentResponse(httpComponent, new URL(url), null, true); System.out.println("=========================================================="); System.out.println(new String(response.getContent()).replace("\"utf-8\"", "\"GB2312\"")); System.out.println("=========================================================="); int code = response.getCode(); System.out.println((new Date().toLocaleString()) + " num:" + rowCount + " code:" + code + " time:" + (System.currentTimeMillis() - l) + " url:" + url); l = System.currentTimeMillis(); } catch (Exception e) { e.printStackTrace(System.out); } }
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat.java
License:Apache License
@Override protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer, Configuration config, Path outputPath) { long batchSize = config.getLong(RdfIOConstants.OUTPUT_BATCH_SIZE, RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE); return this.getRecordWriter(writer, batchSize); }
From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context, final OutputCommitter committer) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputdir = ((FileOutputCommitter) committer).getWorkPath(); final Configuration conf = context.getConfiguration(); LOG.debug("Task output path: " + outputdir); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);//from ww w . jav a2 s. c o m // create a map from column family to the compression algorithm final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf); final DataBlockEncoding overriddenEncoding; if (dataBlockEncodingStr != null) { overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } else { overriddenEncoding = null; } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; @Override public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = CellUtil.cloneRow(kv); long length = kv.getLength(); byte[] family = CellUtil.cloneFamily(kv); WriterLength wl = this.writers.get(family); if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important") private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); Algorithm compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; BloomType bloomType = bloomTypeMap.get(family); bloomType = bloomType == null ? BloomType.NONE : bloomType; Integer blockSize = blockSizeMap.get(family); blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; DataBlockEncoding encoding = overriddenEncoding; encoding = encoding == null ? datablockEncodingMap.get(family) : encoding; encoding = encoding == null ? DataBlockEncoding.NONE : encoding; Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize); contextBuilder.withDataBlockEncoding(encoding); HFileContext hFileContext = contextBuilder.build(); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .withFileContext(hFileContext).build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } @Override public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:org.apache.lens.server.stats.store.log.StatisticsLogRollupHandler.java
License:Apache License
/** * Initalize the handler.//from www . j a va 2 s .co m * * @param conf configuration to be used while initialization. */ public void initialize(Configuration conf) { task = new StatisticsLogFileScannerTask(this.logSegregationContext); timer = new Timer(); rate = conf.getLong(LensConfConstants.STATS_ROLLUP_SCAN_RATE, LensConfConstants.DEFAULT_STATS_ROLLUP_SCAN_RATE); }