List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:com.twitter.hraven.PigJobDescFactory.java
License:Apache License
@Override public JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, Configuration jobConf) { String appId = getAppId(jobConf); String version = jobConf.get(Constants.PIG_VERSION_CONF_KEY, Constants.UNKNOWN); long pigSubmitTimeMillis = jobConf.getLong(Constants.PIG_RUN_CONF_KEY, 0); // This means that Constants.PIG_RUN_CONF_KEY was not present (for jobs // launched with an older pig version). if (pigSubmitTimeMillis == 0) { String pigLogfile = jobConf.get(Constants.PIG_LOG_FILE_CONF_KEY); if (pigLogfile == null) { // Should be rare, but we're seeing this happen occasionally // Give up on grouping the jobs within the run together, and treat these as individual runs. pigSubmitTimeMillis = submitTimeMillis; } else {/* ww w .j a v a 2 s . c o m*/ pigSubmitTimeMillis = getScriptStartTimeFromLogfileName(pigLogfile); } } return create(qualifiedJobId, jobConf, appId, version, Framework.PIG, pigSubmitTimeMillis); }
From source file:com.twitter.hraven.ScaldingJobDescFactory.java
License:Apache License
/** * Returns the flow submit time for this job or a computed substitute that * will at least be consistent for all jobs in a flow. * * The time is computed according to:/* w w w . ja v a 2s.co m*/ * <ol> * <li>use "scalding.flow.submitted.timestamp" if present</li> * <li>otherwise use "cascading.flow.id" as a substitute</li> * </ol> * * @param jobConf * The job configuration * @param submitTimeMillis * of a individual job in the flow * @return when the entire flow started, or else at least something that binds * all jobs in a flow together. */ static long getFlowSubmitTimeMillis(Configuration jobConf, long submitTimeMillis) { // TODO: Do some parsing / hacking on this. // Grab the year/month component and add part of the flowId turned into long // kind of a thing. long cascadingSubmitTimeMillis = jobConf.getLong(Constants.CASCADING_RUN_CONF_KEY, 0); if (cascadingSubmitTimeMillis == 0) { // Convert hex encoded flow ID (128-bit MD5 hash) into long as a substitute String flowId = jobConf.get(Constants.CASCADING_FLOW_ID_CONF_KEY); if (flowId != null && !flowId.isEmpty()) { if (flowId.length() > 16) { flowId = flowId.substring(0, 16); } try { long tmpFlow = Long.parseLong(flowId, 16); // need to prevent the computed run ID from showing up in the future, // so we don't "mask" jobs later submitted with the correct property // make this show up within the job submit month long monthStart = DateUtil.getMonthStart(submitTimeMillis); // this still allows these jobs to show up in the "future", but at least // constrains to current month cascadingSubmitTimeMillis = monthStart + (tmpFlow % DateUtil.MONTH_IN_MILLIS); } catch (NumberFormatException nfe) { // fall back to the job submit time cascadingSubmitTimeMillis = submitTimeMillis; } } else { // fall back to the job submit time cascadingSubmitTimeMillis = submitTimeMillis; } } return cascadingSubmitTimeMillis; }
From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java
License:Apache License
/** Create the input file used for launching the maps */ void createInputFile(Job job, String workdir) throws IOException { Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.get(conf); Path inpath = new Path(workdir + "/inputkeyranges.txt"); PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(inpath))); long start = conf.getLong(ARG_KEY_RANGE_START, 0); long end = conf.getLong(ARG_KEY_RANGE_END, 0); int parts = conf.getInt(ARG_KEY_RANGE_PARTITIONS, 1); writeRanges(start, end, parts, out); out.close();//from w w w . ja v a 2 s . c o m TextInputFormat.setInputPaths(job, inpath); // NLineInputFormat.setInputPaths(job, inpath); /* compute the max input split size */ // long max_split = fs.getFileStatus( inpath ).getLen() / parts; // TextInputFormat.setMaxInputSplitSize(job, max_split); // JobConf jc = new JobConf(conf); // jc.setNumMapTasks(parts); }
From source file:com.zjy.mongo.splitter.BSONSplitter.java
License:Apache License
public static long getSplitSize(final Configuration conf, final FileStatus file) { // Try new configuration options first, but fall back to old ones. long maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", conf.getLong("mapred.max.split.size", Long.MAX_VALUE)); long minSize = Math.max(1L, conf.getLong("mapreduce.input.fileinputformat.split.minsize", conf.getLong("mapred.min.split.size", 1L))); if (file != null) { long fileBlockSize = file.getBlockSize(); return Math.max(minSize, Math.min(maxSize, fileBlockSize)); } else {//from w w w . ja v a 2 s . c om long blockSize = conf.getLong("dfs.blockSize", 64 * 1024 * 1024); return Math.max(minSize, Math.min(maxSize, blockSize)); } }
From source file:common.DataNode.java
License:Apache License
/** * This method starts the data node with the specified conf. * //from ww w . jav a 2s . com * @param conf - the configuration * if conf's CONFIG_PROPERTY_SIMULATED property is set * then a simulated storage based data node is created. * * @param dataDirs - only for a non-simulated storage data node * @throws IOException */ void startDataNode(Configuration conf, AbstractList<File> dataDirs, DatanodeProtocol namenode) throws IOException { // use configured nameserver & interface to get local hostname if (conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY) != null) { machineName = conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY); } if (machineName == null) { machineName = DNS.getDefaultHost(conf.get("dfs.datanode.dns.interface", "default"), conf.get("dfs.datanode.dns.nameserver", "default")); } this.nameNodeAddr = NameNode.getAddress(conf); this.socketTimeout = conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT); this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout", HdfsConstants.WRITE_TIMEOUT); /* Based on results on different platforms, we might need set the default * to false on some of them. */ this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true); this.writePacketSize = conf.getInt(DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT); InetSocketAddress socAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.address", "0.0.0.0:50010")); int tmpPort = socAddr.getPort(); storage = new DataStorage(); // construct registration this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort); // connect to name node this.namenode = namenode; // get version and id info from the name-node NamespaceInfo nsInfo = handshake(); StartupOption startOpt = getStartupOption(conf); assert startOpt != null : "Startup option must be set."; boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { setNewStorageID(dnRegistration); dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID; // it would have been better to pass storage as a parameter to // constructor below - need to augment ReflectionUtils used below. conf.set(DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY, dnRegistration.getStorageID()); try { //Equivalent of following (can't do because Simulated is in test dir) // this.data = new SimulatedFSDataset(conf); this.data = (FSDatasetInterface) ReflectionUtils.newInstance( Class.forName("org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"), conf); } catch (ClassNotFoundException e) { throw new IOException(StringUtils.stringifyException(e)); } } else { // real storage // read storage info, lock data dirs and transition fs state if necessary storage.recoverTransitionRead(nsInfo, dataDirs, startOpt); // adjust this.dnRegistration.setStorageInfo(storage); // initialize data node internal structure this.data = new FSDataset(storage, conf); } // find free port ServerSocket ss = (socketWriteTimeout > 0) ? ServerSocketChannel.open().socket() : new ServerSocket(); Server.bind(ss, socAddr, 0); ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE); // adjust machine name with the actual port tmpPort = ss.getLocalPort(); selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(), tmpPort); this.dnRegistration.setName(machineName + ":" + tmpPort); LOG.info("Opened info server at " + tmpPort); this.threadGroup = new ThreadGroup("dataXceiverServer"); this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this)); this.threadGroup.setDaemon(true); // auto destroy when empty this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL); this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY) * 1000L; if (this.initialBlockReportDelay >= blockReportInterval) { this.initialBlockReportDelay = 0; LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec." + " Setting initial delay to 0 msec:"); } this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L; //initialize periodic block scanner String reason = null; if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) { reason = "verification is turned off by configuration"; } else if (!(data instanceof FSDataset)) { reason = "verifcation is supported only with FSDataset"; } if (reason == null) { blockScanner = new DataBlockScanner(this, (FSDataset) data, conf); } else { LOG.info("Periodic Block Verification is disabled because " + reason + "."); } //create a servlet to serve full-file content InetSocketAddress infoSocAddr = NetUtils .createSocketAddr(conf.get("dfs.datanode.http.address", "0.0.0.0:50075")); String infoHost = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort, tmpInfoPort == 0, conf); if (conf.getBoolean("dfs.https.enable", false)) { boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY, DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT); InetSocketAddress secInfoSocAddr = NetUtils .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 0)); Configuration sslConf = new HdfsConfiguration(false); sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml")); this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth); } this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class); this.infoServer.addInternalServlet(null, "/getFileChecksum/*", FileChecksumServlets.GetServlet.class); this.infoServer.setAttribute("datanode.blockScanner", blockScanner); this.infoServer.setAttribute("datanode.conf", conf); this.infoServer.addServlet(null, "/blockScannerReport", DataBlockScanner.Servlet.class); this.infoServer.start(); // adjust info port this.dnRegistration.setInfoPort(this.infoServer.getPort()); myMetrics = new DataNodeMetrics(conf, dnRegistration.getName()); // set service-level authorization security policy if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) { ServiceAuthorizationManager.refresh(conf, new HDFSPolicyProvider()); } //init ipc server InetSocketAddress ipcAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.ipc.address")); ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(), ipcAddr.getPort(), conf.getInt("dfs.datanode.handler.count", 3), false, conf); ipcServer.start(); dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort()); LOG.info("dnRegistration = " + dnRegistration); plugins = conf.getInstances("dfs.datanode.plugins", ServicePlugin.class); for (ServicePlugin p : plugins) { try { p.start(this); LOG.info("Started plug-in " + p); } catch (Throwable t) { LOG.warn("ServicePlugin " + p + " could not be started", t); } } }
From source file:common.NameNode.java
License:Apache License
private void startTrashEmptier(Configuration conf) throws IOException { long trashInterval = conf.getLong("fs.trash.interval", 0); if (trashInterval == 0) return;/* w ww.j a va2 s . c o m*/ this.emptier = new Thread(new Trash(conf).getEmptier(), "Trash Emptier"); this.emptier.setDaemon(true); this.emptier.start(); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssocReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1); this.minValue = conf.getFloat(MIN_VALUE, DEFAULT_MIN_VALUE); String assocType = conf.get(ASSOC_METRIC, DEFAULT_ASSOC); if (assocType.equalsIgnoreCase("llr")) assocCalculator = new ConcreteLLCallback(); else if (assocType.equalsIgnoreCase("dice")) assocCalculator = new DiceCallback(); else if (assocType.equalsIgnoreCase("pmi")) assocCalculator = new PMICallback(); else if (assocType.equalsIgnoreCase("chi")) assocCalculator = new ChiSquareCallback(); this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS); log.info("NGram Total: {}, Min DICE value: {}, Emit Unigrams: {}", new Object[] { ngramTotal, minValue, emitUnigrams }); if (ngramTotal == -1) { throw new IllegalStateException("No NGRAM_TOTAL available in job config"); }//from w w w. j a va2 s . c o m mos = new MultipleOutputs<Text, DoubleWritable>(context); }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.io.WARCFileWriter.java
License:Apache License
/** * Creates a WARC file, and opens it for writing. If a file with the same name already * exists, it is *overwritten*. Note that this is different behaviour from the other * constructor. Yes, this sucks. It will probably change in a future version. * * @param conf The Hadoop configuration. * @param codec If null, the file is uncompressed. If non-null, this compression codec * will be used. The codec's default file extension is appended to the filename. * @param workOutputPath The directory and filename prefix to which the data should be * written. We append a segment number and filename extensions to it. * @param progress An object used by the mapred API for tracking a task's progress. * @throws IOException I/O exception// www.j a v a 2 s.co m */ public WARCFileWriter(Configuration conf, CompressionCodec codec, Path workOutputPath, Progressable progress) throws IOException { this.conf = conf; this.codec = codec; this.workOutputPath = workOutputPath; this.progress = progress; this.extensionFormat = ".seg-%05d.warc" + (codec == null ? "" : codec.getDefaultExtension()); this.maxSegmentSize = conf.getLong("warc.output.segment.size", DEFAULT_MAX_SEGMENT_SIZE); createSegment(); }
From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java
License:Open Source License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000); this.noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10); this.noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize); String distFuncName = conf.get(Constants.DIST_FUNC); this.distFunc = (DistanceFunction) Utils.loadObject(distFuncName); this.distFunc.prepare(new HashMap<>()); }
From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGReduce.java
License:Open Source License
public void reduce(LongWritable key, Iterable<SWGWritable> values, Context context) throws IOException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000); long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10); long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize); // to handle the edge blocks with lesser number of sequences int row = (int) (key.get() * blockSize); int currentRowBlockSize = (int) blockSize; if ((row + blockSize) > (noOfSequences)) { currentRowBlockSize = (int) (noOfSequences - row); }/*from w w w .j a v a 2s . c o m*/ short[][] alignments = new short[(int) currentRowBlockSize][(int) noOfSequences]; for (SWGWritable alignmentWritable : values) { LOG.info("key " + key.get() + " col " + alignmentWritable.getColumnBlock() + " row " + alignmentWritable.getRowBlock() + " blocksize " + blockSize); DataInput in = alignmentWritable.getDataInput(); int column = (int) (alignmentWritable.getColumnBlock() * blockSize); // to handle the edge blocks with lesser number of sequences int currentColumnBlockSize = (int) blockSize; if ((column + blockSize) > (noOfSequences)) { currentColumnBlockSize = (int) (noOfSequences - column); } for (int i = 0; i < currentRowBlockSize; i++) { // byte[] b = new byte[currentBlockSize /* * 2*/]; // System.out.println("row block "+i+" currentBlockSize"+currentRowBlockSize); for (int j = 0; j < currentColumnBlockSize; j++) { short readShort = in.readShort(); // System.out.print(readShort+" "); alignments[i][column + j] = readShort; } } } // retrieve the output dir String outDir = context.getConfiguration().get("mapred.output.dir"); FileSystem fs = FileSystem.get(conf); // out dir is created in the main driver. String childName = "row_" + key.get() + "_" + blockSize; Path outFilePart = new Path(outDir, childName); writeOutFile(alignments, fs, outFilePart); LOG.info("Reduce Processing Time: " + ((System.nanoTime() - startTime) / 1000000)); }