Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:com.twitter.hraven.PigJobDescFactory.java

License:Apache License

@Override
public JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, Configuration jobConf) {
    String appId = getAppId(jobConf);
    String version = jobConf.get(Constants.PIG_VERSION_CONF_KEY, Constants.UNKNOWN);
    long pigSubmitTimeMillis = jobConf.getLong(Constants.PIG_RUN_CONF_KEY, 0);

    // This means that Constants.PIG_RUN_CONF_KEY was not present (for jobs
    // launched with an older pig version).
    if (pigSubmitTimeMillis == 0) {
        String pigLogfile = jobConf.get(Constants.PIG_LOG_FILE_CONF_KEY);
        if (pigLogfile == null) {
            // Should be rare, but we're seeing this happen occasionally
            // Give up on grouping the jobs within the run together, and treat these as individual runs.
            pigSubmitTimeMillis = submitTimeMillis;
        } else {/* ww  w  .j  a  v a 2  s .  c o  m*/
            pigSubmitTimeMillis = getScriptStartTimeFromLogfileName(pigLogfile);
        }
    }

    return create(qualifiedJobId, jobConf, appId, version, Framework.PIG, pigSubmitTimeMillis);
}

From source file:com.twitter.hraven.ScaldingJobDescFactory.java

License:Apache License

/**
 * Returns the flow submit time for this job or a computed substitute that
 * will at least be consistent for all jobs in a flow.
 *
 * The time is computed according to:/*  w  w w .  ja  v a 2s.co  m*/
 * <ol>
 *   <li>use "scalding.flow.submitted.timestamp" if present</li>
 *   <li>otherwise use "cascading.flow.id" as a substitute</li>
 * </ol>
 * 
 * @param jobConf
 *          The job configuration
 * @param submitTimeMillis
 *          of a individual job in the flow
 * @return when the entire flow started, or else at least something that binds
 *         all jobs in a flow together.
 */
static long getFlowSubmitTimeMillis(Configuration jobConf, long submitTimeMillis) {
    // TODO: Do some parsing / hacking on this.
    // Grab the year/month component and add part of the flowId turned into long
    // kind of a thing.

    long cascadingSubmitTimeMillis = jobConf.getLong(Constants.CASCADING_RUN_CONF_KEY, 0);

    if (cascadingSubmitTimeMillis == 0) {
        // Convert hex encoded flow ID (128-bit MD5 hash) into long as a substitute
        String flowId = jobConf.get(Constants.CASCADING_FLOW_ID_CONF_KEY);
        if (flowId != null && !flowId.isEmpty()) {
            if (flowId.length() > 16) {
                flowId = flowId.substring(0, 16);
            }
            try {
                long tmpFlow = Long.parseLong(flowId, 16);
                // need to prevent the computed run ID from showing up in the future,
                // so we don't "mask" jobs later submitted with the correct property

                // make this show up within the job submit month
                long monthStart = DateUtil.getMonthStart(submitTimeMillis);
                // this still allows these jobs to show up in the "future", but at least
                // constrains to current month
                cascadingSubmitTimeMillis = monthStart + (tmpFlow % DateUtil.MONTH_IN_MILLIS);
            } catch (NumberFormatException nfe) {
                // fall back to the job submit time
                cascadingSubmitTimeMillis = submitTimeMillis;
            }
        } else {
            // fall back to the job submit time
            cascadingSubmitTimeMillis = submitTimeMillis;
        }
    }

    return cascadingSubmitTimeMillis;
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

/** Create the input file used for launching the maps */
void createInputFile(Job job, String workdir) throws IOException {
    Configuration conf = job.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Path inpath = new Path(workdir + "/inputkeyranges.txt");
    PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(inpath)));
    long start = conf.getLong(ARG_KEY_RANGE_START, 0);
    long end = conf.getLong(ARG_KEY_RANGE_END, 0);
    int parts = conf.getInt(ARG_KEY_RANGE_PARTITIONS, 1);

    writeRanges(start, end, parts, out);
    out.close();//from  w w w . ja v a 2  s .  c  o  m

    TextInputFormat.setInputPaths(job, inpath);
    // NLineInputFormat.setInputPaths(job, inpath);

    /* compute the max input split size */
    //        long max_split = fs.getFileStatus( inpath ).getLen() / parts;
    //        TextInputFormat.setMaxInputSplitSize(job, max_split);

    // JobConf jc = new JobConf(conf);
    // jc.setNumMapTasks(parts);
}

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

public static long getSplitSize(final Configuration conf, final FileStatus file) {
    // Try new configuration options first, but fall back to old ones.
    long maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize",
            conf.getLong("mapred.max.split.size", Long.MAX_VALUE));
    long minSize = Math.max(1L, conf.getLong("mapreduce.input.fileinputformat.split.minsize",
            conf.getLong("mapred.min.split.size", 1L)));

    if (file != null) {
        long fileBlockSize = file.getBlockSize();
        return Math.max(minSize, Math.min(maxSize, fileBlockSize));
    } else {//from  w w w .  ja v  a  2 s  .  c  om
        long blockSize = conf.getLong("dfs.blockSize", 64 * 1024 * 1024);
        return Math.max(minSize, Math.min(maxSize, blockSize));
    }
}

From source file:common.DataNode.java

License:Apache License

/**
 * This method starts the data node with the specified conf.
 * //from   ww w  . jav  a 2s .  com
 * @param conf - the configuration
 *  if conf's CONFIG_PROPERTY_SIMULATED property is set
 *  then a simulated storage based data node is created.
 * 
 * @param dataDirs - only for a non-simulated storage data node
 * @throws IOException
 */
void startDataNode(Configuration conf, AbstractList<File> dataDirs, DatanodeProtocol namenode)
        throws IOException {
    // use configured nameserver & interface to get local hostname
    if (conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY) != null) {
        machineName = conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY);
    }
    if (machineName == null) {
        machineName = DNS.getDefaultHost(conf.get("dfs.datanode.dns.interface", "default"),
                conf.get("dfs.datanode.dns.nameserver", "default"));
    }
    this.nameNodeAddr = NameNode.getAddress(conf);

    this.socketTimeout = conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT);
    this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout", HdfsConstants.WRITE_TIMEOUT);
    /* Based on results on different platforms, we might need set the default 
     * to false on some of them. */
    this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true);
    this.writePacketSize = conf.getInt(DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY,
            DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT);
    InetSocketAddress socAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.address", "0.0.0.0:50010"));
    int tmpPort = socAddr.getPort();
    storage = new DataStorage();
    // construct registration
    this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort);

    // connect to name node
    this.namenode = namenode;

    // get version and id info from the name-node
    NamespaceInfo nsInfo = handshake();
    StartupOption startOpt = getStartupOption(conf);
    assert startOpt != null : "Startup option must be set.";

    boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false);
    if (simulatedFSDataset) {
        setNewStorageID(dnRegistration);
        dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION;
        dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID;
        // it would have been better to pass storage as a parameter to
        // constructor below - need to augment ReflectionUtils used below.
        conf.set(DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY, dnRegistration.getStorageID());
        try {
            //Equivalent of following (can't do because Simulated is in test dir)
            //  this.data = new SimulatedFSDataset(conf);
            this.data = (FSDatasetInterface) ReflectionUtils.newInstance(
                    Class.forName("org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"), conf);
        } catch (ClassNotFoundException e) {
            throw new IOException(StringUtils.stringifyException(e));
        }
    } else { // real storage
        // read storage info, lock data dirs and transition fs state if necessary
        storage.recoverTransitionRead(nsInfo, dataDirs, startOpt);
        // adjust
        this.dnRegistration.setStorageInfo(storage);
        // initialize data node internal structure
        this.data = new FSDataset(storage, conf);
    }

    // find free port
    ServerSocket ss = (socketWriteTimeout > 0) ? ServerSocketChannel.open().socket() : new ServerSocket();
    Server.bind(ss, socAddr, 0);
    ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE);
    // adjust machine name with the actual port
    tmpPort = ss.getLocalPort();
    selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(), tmpPort);
    this.dnRegistration.setName(machineName + ":" + tmpPort);
    LOG.info("Opened info server at " + tmpPort);

    this.threadGroup = new ThreadGroup("dataXceiverServer");
    this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this));
    this.threadGroup.setDaemon(true); // auto destroy when empty

    this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
    this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY)
            * 1000L;
    if (this.initialBlockReportDelay >= blockReportInterval) {
        this.initialBlockReportDelay = 0;
        LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec."
                + " Setting initial delay to 0 msec:");
    }
    this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;

    //initialize periodic block scanner
    String reason = null;
    if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) {
        reason = "verification is turned off by configuration";
    } else if (!(data instanceof FSDataset)) {
        reason = "verifcation is supported only with FSDataset";
    }
    if (reason == null) {
        blockScanner = new DataBlockScanner(this, (FSDataset) data, conf);
    } else {
        LOG.info("Periodic Block Verification is disabled because " + reason + ".");
    }

    //create a servlet to serve full-file content
    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get("dfs.datanode.http.address", "0.0.0.0:50075"));
    String infoHost = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort, tmpInfoPort == 0, conf);
    if (conf.getBoolean("dfs.https.enable", false)) {
        boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY,
                DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT);
        InetSocketAddress secInfoSocAddr = NetUtils
                .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 0));
        Configuration sslConf = new HdfsConfiguration(false);
        sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml"));
        this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
    }
    this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class);
    this.infoServer.addInternalServlet(null, "/getFileChecksum/*", FileChecksumServlets.GetServlet.class);
    this.infoServer.setAttribute("datanode.blockScanner", blockScanner);
    this.infoServer.setAttribute("datanode.conf", conf);
    this.infoServer.addServlet(null, "/blockScannerReport", DataBlockScanner.Servlet.class);
    this.infoServer.start();
    // adjust info port
    this.dnRegistration.setInfoPort(this.infoServer.getPort());
    myMetrics = new DataNodeMetrics(conf, dnRegistration.getName());

    // set service-level authorization security policy
    if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
        ServiceAuthorizationManager.refresh(conf, new HDFSPolicyProvider());
    }

    //init ipc server
    InetSocketAddress ipcAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.ipc.address"));
    ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(), ipcAddr.getPort(),
            conf.getInt("dfs.datanode.handler.count", 3), false, conf);
    ipcServer.start();
    dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort());

    LOG.info("dnRegistration = " + dnRegistration);

    plugins = conf.getInstances("dfs.datanode.plugins", ServicePlugin.class);
    for (ServicePlugin p : plugins) {
        try {
            p.start(this);
            LOG.info("Started plug-in " + p);
        } catch (Throwable t) {
            LOG.warn("ServicePlugin " + p + " could not be started", t);
        }
    }
}

From source file:common.NameNode.java

License:Apache License

private void startTrashEmptier(Configuration conf) throws IOException {
    long trashInterval = conf.getLong("fs.trash.interval", 0);
    if (trashInterval == 0)
        return;/* w  ww.j a  va2  s  .  c o  m*/
    this.emptier = new Thread(new Trash(conf).getEmptier(), "Trash Emptier");
    this.emptier.setDaemon(true);
    this.emptier.start();
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssocReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minValue = conf.getFloat(MIN_VALUE, DEFAULT_MIN_VALUE);
    String assocType = conf.get(ASSOC_METRIC, DEFAULT_ASSOC);
    if (assocType.equalsIgnoreCase("llr"))
        assocCalculator = new ConcreteLLCallback();
    else if (assocType.equalsIgnoreCase("dice"))
        assocCalculator = new DiceCallback();
    else if (assocType.equalsIgnoreCase("pmi"))
        assocCalculator = new PMICallback();
    else if (assocType.equalsIgnoreCase("chi"))
        assocCalculator = new ChiSquareCallback();

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);
    log.info("NGram Total: {}, Min DICE value: {}, Emit Unigrams: {}",
            new Object[] { ngramTotal, minValue, emitUnigrams });

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }//from w  w w. j  a va2  s  . c  o m
    mos = new MultipleOutputs<Text, DoubleWritable>(context);
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.io.WARCFileWriter.java

License:Apache License

/**
 * Creates a WARC file, and opens it for writing. If a file with the same name already
 * exists, it is *overwritten*. Note that this is different behaviour from the other
 * constructor. Yes, this sucks. It will probably change in a future version.
 *
 * @param conf           The Hadoop configuration.
 * @param codec          If null, the file is uncompressed. If non-null, this compression codec
 *                       will be used. The codec's default file extension is appended to the filename.
 * @param workOutputPath The directory and filename prefix to which the data should be
 *                       written. We append a segment number and filename extensions to it.
 * @param progress       An object used by the mapred API for tracking a task's progress.
 * @throws IOException I/O exception// www.j a  v  a 2 s.co  m
 */
public WARCFileWriter(Configuration conf, CompressionCodec codec, Path workOutputPath, Progressable progress)
        throws IOException {
    this.conf = conf;
    this.codec = codec;
    this.workOutputPath = workOutputPath;
    this.progress = progress;
    this.extensionFormat = ".seg-%05d.warc" + (codec == null ? "" : codec.getDefaultExtension());
    this.maxSegmentSize = conf.getLong("warc.output.segment.size", DEFAULT_MAX_SEGMENT_SIZE);
    createSegment();
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();

    this.blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000);
    this.noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10);
    this.noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize);
    String distFuncName = conf.get(Constants.DIST_FUNC);
    this.distFunc = (DistanceFunction) Utils.loadObject(distFuncName);
    this.distFunc.prepare(new HashMap<>());
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGReduce.java

License:Open Source License

public void reduce(LongWritable key, Iterable<SWGWritable> values, Context context) throws IOException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();

    long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000);
    long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10);
    long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize);

    // to handle the edge blocks with lesser number of sequences
    int row = (int) (key.get() * blockSize);
    int currentRowBlockSize = (int) blockSize;
    if ((row + blockSize) > (noOfSequences)) {
        currentRowBlockSize = (int) (noOfSequences - row);
    }/*from  w  w w  .j  a  v  a  2s . c o m*/

    short[][] alignments = new short[(int) currentRowBlockSize][(int) noOfSequences];
    for (SWGWritable alignmentWritable : values) {
        LOG.info("key " + key.get() + " col " + alignmentWritable.getColumnBlock() + " row "
                + alignmentWritable.getRowBlock() + " blocksize " + blockSize);
        DataInput in = alignmentWritable.getDataInput();
        int column = (int) (alignmentWritable.getColumnBlock() * blockSize);

        // to handle the edge blocks with lesser number of sequences
        int currentColumnBlockSize = (int) blockSize;
        if ((column + blockSize) > (noOfSequences)) {
            currentColumnBlockSize = (int) (noOfSequences - column);
        }

        for (int i = 0; i < currentRowBlockSize; i++) {
            // byte[] b = new byte[currentBlockSize /* * 2*/];
            //            System.out.println("row block "+i+"  currentBlockSize"+currentRowBlockSize);
            for (int j = 0; j < currentColumnBlockSize; j++) {
                short readShort = in.readShort();
                //               System.out.print(readShort+" ");
                alignments[i][column + j] = readShort;
            }
        }
    }

    // retrieve the output dir
    String outDir = context.getConfiguration().get("mapred.output.dir");

    FileSystem fs = FileSystem.get(conf);
    // out dir is created in the main driver.
    String childName = "row_" + key.get() + "_" + blockSize;
    Path outFilePart = new Path(outDir, childName);
    writeOutFile(alignments, fs, outFilePart);
    LOG.info("Reduce Processing Time: " + ((System.nanoTime() - startTime) / 1000000));
}