Example usage for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue)

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample/*from   ww  w . java 2s.c om*/
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(),
            TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(
            conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS),
            splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.TimelineMetricClusterAggregatorHourly.java

License:Apache License

public TimelineMetricClusterAggregatorHourly(PhoenixHBaseAccessor hBaseAccessor, Configuration metricsConf) {
    super(hBaseAccessor, metricsConf);

    String checkpointDir = metricsConf.get(TIMELINE_METRICS_AGGREGATOR_CHECKPOINT_DIR,
            DEFAULT_CHECKPOINT_LOCATION);

    checkpointLocation = FilenameUtils.concat(checkpointDir, CLUSTER_AGGREGATOR_HOURLY_CHECKPOINT_FILE);

    sleepIntervalMillis = SECONDS.toMillis(metricsConf.getLong(CLUSTER_AGGREGATOR_HOUR_SLEEP_INTERVAL, 3600l));
    checkpointCutOffIntervalMillis = SECONDS
            .toMillis(metricsConf.getLong(CLUSTER_AGGREGATOR_HOUR_CHECKPOINT_CUTOFF_INTERVAL, 7200l));
    checkpointCutOffMultiplier = metricsConf.getInt(CLUSTER_AGGREGATOR_HOUR_CHECKPOINT_CUTOFF_MULTIPLIER, 2);
}

From source file:org.apache.hama.bsp.CombineFileInputFormat.java

License:Apache License

@Override
public InputSplit[] getSplits(BSPJob bspJob, int numSplits) throws IOException {

    Configuration job = bspJob.getConfiguration();

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;/*from   w  w w.  ja v a  2 s  . com*/
    } else {
        minSizeNode = job.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = job.getLong("mapred.max.split.size", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split size per rack " + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(bspJob));
    List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>();
    if (paths.length == 0) {
        return splits.toArray(new CombineFileSplit[splits.size()]);
    }

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contans paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (int i = 0; i < paths.length; i++) {
            if (paths[i] == null) { // already processed
                continue;
            }
            Path p = new Path(paths[i].toUri().getPath());
            if (onepool.accept(p)) {
                myPaths.add(paths[i]); // add it to my output set
                paths[i] = null; // already processed
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(bspJob, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    // Finally, process all paths that do not belong to any pool.
    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (Path path : paths) {
        if (path == null) { // already processed
            continue;
        }
        myPaths.add(path);
    }
    // create splits for all files that are not in any pool.
    getMoreSplits(bspJob, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits.toArray(new CombineFileSplit[splits.size()]);
}

From source file:org.apache.hama.bsp.TaskLog.java

License:Apache License

/**
 * Get the desired maximum length of task's logs.
 * /*from w  ww .ja  v  a 2s . c o m*/
 * @param conf the job to look in
 * @return the number of bytes to cap the log files at
 */
public static long getTaskLogLength(Configuration conf) {
    return conf.getLong("bsp.userlog.limit.kb", 100) * 1024;
}

From source file:org.apache.hcatalog.templeton.tool.HDFSCleanup.java

License:Apache License

/**
 * Create a cleanup object. /*from  w w w.  j a v  a2 s  .  c  o m*/
 */
private HDFSCleanup(Configuration appConf) {
    this.appConf = appConf;
    interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval);
    maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage);
    storage_root = appConf.get(TempletonStorage.STORAGE_ROOT);
}

From source file:org.apache.hcatalog.templeton.tool.ZooKeeperCleanup.java

License:Apache License

/**
 * Create a cleanup object.  We use the appConfig to configure JobState.
 * @param appConf//from   ww w  . ja  v a2  s  .c  om
 */
private ZooKeeperCleanup(Configuration appConf) {
    this.appConf = appConf;
    interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval);
    maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage);
}

From source file:org.apache.http.examples.client.TestFetcher.java

License:Apache License

public static void main(String[] args) throws Exception {

    String url = "http://m.58.com/cd/zufang/";
    url = "http://i.m.58.com/cd/zufang/15538653692039x.shtml";
    url = "http://i.m.58.com/cd/zufang/15403127032966x.shtml";
    url = "http://m.58.com/wuhou/qiuzu/?from=list_select_quyu";
    url = "http://i.m.58.com/cd/qiuzu/15691792568835x.shtml";
    url = "http://i.m.58.com/cd/qiuzu/15514728510981x.shtml";

    url = "http://m.58.com/cd/ershoufang/";
    url = "http://i.m.58.com/cd/ershoufang/15660173611521x.shtml";
    // url = "http://i.m.58.com/cd/ershoufang/15692610703237x.shtml";
    // url = "http://i.m.58.com/cd/ershoufang/15646523265417x.shtml";
    // url = "http://i.m.58.com/cd/ershoufang/15682093896709x.shtml";
    url = "http://m.58.com/cd/hezu";
    url = "http://i.m.58.com/cd/hezu/11632175277065x.shtml";
    // url = "http://i.m.58.com/cd/hezu/15568727765129x.shtml";
    // url = "http://i.m.58.com/cd/hezu/15568727765129x.shtml";
    url = "http://wap.ganji.com/cd/fang1/445542193x";

    Pattern pattern = Pattern.compile("((.*?)\\?device=wap$)|((.*?)device=wap&(.*))|((.*?)&device=wap$)");
    System.out.println(pattern.matcher(url).replaceAll("$2$4$5$7"));
    pattern = Pattern.compile("(device=wap)");
    System.out.println(pattern.matcher(url).replaceAll(""));

    Configuration conf = NutchConfiguration.create();
    conf.set(Nutch.CRAWL_ID_KEY, "ea");
    NutchConstant.setUrlConfig(conf, 3);
    NutchConstant.setSegmentParseRules(conf);
    NutchConstant.getSegmentParseRules(conf);

    SegMentParsers parses = new SegMentParsers(conf);
    // Result<String, WebPage> rs = query.execute();
    long curTime = System.currentTimeMillis();
    UrlPathMatch urlcfg = NutchConstant.getUrlConfig(conf);
    boolean filter = conf.getBoolean(GeneratorJob.GENERATOR_FILTER, true);
    boolean normalise = conf.getBoolean(GeneratorJob.GENERATOR_NORMALISE, true);
    long limit = conf.getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
    if (limit < 5) {
        limit = Long.MAX_VALUE;//w w  w  .  j  ava  2  s  . c om
    }
    int retryMax = conf.getInt("db.fetch.retry.max", 3);

    limit = Integer.MAX_VALUE;

    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME, System.currentTimeMillis());

    ProtocolFactory protocolFactory = new ProtocolFactory(conf);

    int rowCount = 0;
    HttpComponent httpComponent = new HttpComponent();
    httpComponent.setConf(conf);
    long l = System.currentTimeMillis();

    try {
        l = System.currentTimeMillis();
        HttpClient httpClient = httpComponent.getClient();
        HttpParams httpParams = httpClient.getParams();
        httpClient.getParams().setParameter("http.protocol.cookie-policy", CookiePolicy.BROWSER_COMPATIBILITY);
        httpClient.getParams().setParameter("http.protocol.content-charset", HTTP.UTF_8);
        String userAgent = getAgentString("NutchCVS", null, "Nutch", "http://lucene.apache.org/nutch/bot.html",
                "nutch-agent@lucene.apache.org");
        userAgent = "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1";
        // userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36";
        // userAgent = "Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1";
        // userAgent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.77 Safari/537.1";
        String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
        String accept = "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
        String acceptCharset = "utf-8,ISO-8859-1;q=0.7,*;q=0.7";
        System.out.println("userAgent=" + userAgent);
        // Set up an HTTPS socket factory that accepts self-signed certs.
        ArrayList<BasicHeader> headers = new ArrayList<BasicHeader>();
        // Set the User Agent in the header
        headers.add(new BasicHeader("User-Agent", userAgent));
        // prefer English
        // headers.add(new BasicHeader("Accept-Language", acceptLanguage));
        // // prefer UTF-8
        // headers.add(new BasicHeader("Accept-Charset", acceptCharset));
        // // prefer understandable formats
        // headers.add(new BasicHeader("Accept", accept));
        // accept gzipped content
        headers.add(new BasicHeader("Accept-Encoding", "x-gzip, gzip, deflate"));
        httpParams.setParameter(ClientPNames.DEFAULT_HEADERS, headers);

        org.apache.nutch.net.protocols.Response response = new HttpComponentResponse(httpComponent,
                new URL(url), null, true);
        System.out.println("==========================================================");
        System.out.println(new String(response.getContent()).replace("\"utf-8\"", "\"GB2312\""));
        System.out.println("==========================================================");
        int code = response.getCode();
        System.out.println((new Date().toLocaleString()) + " num:" + rowCount + " code:" + code + " time:"
                + (System.currentTimeMillis() - l) + "  url:" + url);
        l = System.currentTimeMillis();
    } catch (Exception e) {
        e.printStackTrace(System.out);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractBatchedNodeTupleOutputFormat.java

License:Apache License

@Override
protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer, Configuration config, Path outputPath) {
    long batchSize = config.getLong(RdfIOConstants.OUTPUT_BATCH_SIZE, RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE);
    return this.getRecordWriter(writer, batchSize);
}

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context, final OutputCommitter committer)
        throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
    final Configuration conf = context.getConfiguration();
    LOG.debug("Task output path: " + outputdir);
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from ww w . jav a2 s.  c  o  m

    // create a map from column family to the compression algorithm
    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
    final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf);
    final DataBlockEncoding overriddenEncoding;
    if (dataBlockEncodingStr != null) {
        overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
    } else {
        overriddenEncoding = null;
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            byte[] rowKey = CellUtil.cloneRow(kv);
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            WriterLength wl = this.writers.get(family);
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:org.apache.lens.server.stats.store.log.StatisticsLogRollupHandler.java

License:Apache License

/**
 * Initalize the handler.//from  www .  j a  va 2 s .co m
 *
 * @param conf configuration to be used while initialization.
 */
public void initialize(Configuration conf) {
    task = new StatisticsLogFileScannerTask(this.logSegregationContext);
    timer = new Timer();
    rate = conf.getLong(LensConfConstants.STATS_ROLLUP_SCAN_RATE,
            LensConfConstants.DEFAULT_STATS_ROLLUP_SCAN_RATE);
}