List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); return new RecordWriter<ImmutableBytesWritable, KeyValue>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters();//w ww . ja v a 2 s. c om return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new HFile.Writer. * @param family * @return A WriterLength, containing a new HFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.cloudera.bigdata.analysis.dataload.mapreduce.SplitableRecordReader.java
License:Apache License
/** * Decide the start of the reader.//from w ww.j av a 2 s. c o m */ public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // if (codec instanceof CryptoCodec && job instanceof JobConf) // CryptoContextHelper.resetInputCryptoContext((CryptoCodec) codec, // (JobConf) job, file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn), job); } else { in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } LOG.info("Read from " + split.getPath().toString()); // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); // Read another line as previous. Text current = new Text(); int newSize = in.readLine(previous, maxLineLength, maxBytesToConsume(start)); LOG.info("Skip line " + previous + " for last split."); start += newSize; // Keep reading until a splitable point is found. while (start <= end) { newSize = in.readLine(current, maxLineLength, maxBytesToConsume(start)); if (canSplit(previous.getBytes(), current.getBytes())) { break; } start += newSize; previous.set(current.getBytes()); LOG.info("Skip line " + previous + " for last split."); } // If exceed the end, still read one extra line. if (start > end) { if (isContinue) { newSize = in.readLine(current, maxLineLength, maxBytesToConsume(start)); if (!canSplit(previous.getBytes(), current.getBytes())) { // Still not splitable. So skip the block. start += newSize; isContinue = false; } } } LOG.info("Split between: \n" + previous + "\n" + current); // Restart at the last read line. fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } this.pos = start; } else { Text skip = new Text(); start += in.readLine(skip, maxLineLength, maxBytesToConsume(start)); // start += in.readLine(skip, 0, maxBytesToConsume(start)); LOG.info("Skip line " + skip + ". Start at " + start); } // Restart at the start index. }
From source file:com.cloudera.castagna.logparser.Utils.java
License:Apache License
public static void setReducers(Job job, Configuration configuration, Logger log) { boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT); int num_reducers = configuration.getInt(Constants.OPTION_NUM_REDUCERS, Constants.OPTION_NUM_REDUCERS_DEFAULT); // TODO: should we comment this out and let Hadoop decide the number of reducers? if (runLocal) { if (log != null) log.debug("Setting number of reducers to {}", 1); job.setNumReduceTasks(1);/*from www. ja va 2 s .c o m*/ } else { job.setNumReduceTasks(num_reducers); if (log != null) log.debug("Setting number of reducers to {}", num_reducers); } }
From source file:com.cloudera.hadoop.hdfs.nfs.rpc.RPCServer.java
License:Apache License
public RPCServer(RPCHandler<REQUEST, RESPONSE> rpcHandler, Configuration conf, InetAddress address, int port) throws IOException { mExecutor = new ThreadPoolExecutor(10, conf.getInt(RPC_MAX_THREADS, 500), 30, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>()); mHandler = rpcHandler;/*w w w.j av a 2 s. c o m*/ mConfiguration = conf; mPort = port; mServer = new ServerSocket(mPort, -1, address); // if port is 0, we are supposed to find a port // mPort should then be set to the port we found mPort = mServer.getLocalPort(); setName("RPCServer-" + mHandler.getClass().getSimpleName() + "-" + mPort); }
From source file:com.cloudera.impala.service.JniFrontend.java
License:Apache License
/** * Return an empty string if block location tracking is properly enabled. If not, * return an error string describing the issues. *//*from w w w. j av a 2 s. com*/ private String checkBlockLocationTracking(Configuration conf) { StringBuilder output = new StringBuilder(); String errorMessage = "ERROR: block location tracking is not properly enabled " + "because\n"; String prefix = " - "; StringBuilder errorCause = new StringBuilder(); if (!conf.getBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT)) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED); errorCause.append(" is not enabled.\n"); } // dfs.client.file-block-storage-locations.timeout should be >= 500 // TODO: OPSAPS-12765 - it should be >= 3000, but use 500 for now until CM refresh if (conf.getInt(DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT, DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT) < 500) { errorCause.append(prefix); errorCause.append(DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT); errorCause.append(" is too low. It should be at least 3000.\n"); } if (errorCause.length() > 0) { output.append(errorMessage); output.append(errorCause); } return output.toString(); }
From source file:com.cloudera.impala.service.ZooKeeperSession.java
License:Apache License
/** * Connects to zookeeper and handles maintaining membership. * Note: this can only be called when either planner or worker is running. * The client of this class is responsible for checking that. * @param conf/*from w ww . j a v a 2s .co m*/ * @param id - The ID for this server. This should be unique among * all instances of the service. * @param principal - The Kerberos principal to use. If not null and not empty, * ZooKeeper nodes will be secured with Kerberos. * @param keytabPath - The path to the keytab file. Only used when a valid * principal is provided. * @param plannerPort - If greater than 0, running the planner service. * @param workerPort - If greater than 0, running the worker service. */ public ZooKeeperSession(Configuration conf, String id, String principal, String keytabPath, int plannerPort, int workerPort) throws IOException { id_ = id; plannerPort_ = plannerPort; workerPort_ = workerPort; zkConnectString_ = conf.get(ZOOKEEPER_CONNECTION_STRING_CONF); if (zkConnectString_ == null || zkConnectString_.trim().isEmpty()) { throw new IllegalArgumentException( "Zookeeper connect string has to be specified through " + ZOOKEEPER_CONNECTION_STRING_CONF); } LOGGER.info("Connecting to zookeeper at: " + zkConnectString_ + " with id: " + id_); connectTimeoutMillis_ = conf.getInt(ZOOKEEPER_CONNECT_TIMEOUTMILLIS_CONF, CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); if (principal != null && !principal.isEmpty()) { newNodeAcl_ = Ids.CREATOR_ALL_ACL; } else { newNodeAcl_ = Ids.OPEN_ACL_UNSAFE; } String aclStr = conf.get(ZOOKEEPER_STORE_ACL_CONF, null); LOGGER.info("Zookeeper acl: " + aclStr); if (StringUtils.isNotBlank(aclStr)) newNodeAcl_ = parseACLs(aclStr); plannersAcl_ = Ids.READ_ACL_UNSAFE; String plannersAclStr = conf.get(ZOOKEEPER_STORE_PLANNERS_ACL_CONF, null); LOGGER.info("Zookeeper planners acl: " + plannersAclStr); if (plannersAclStr != null) plannersAcl_ = parseACLs(plannersAclStr); rootNode_ = conf.get(ZOOKEEPER_ZNODE_CONF, ZOOKEEPER_ZNODE_DEFAULT); LOGGER.info("Zookeeper root: " + rootNode_); // Install the JAAS Configuration for the runtime, if Kerberos is enabled. if (principal != null && !principal.isEmpty()) { setupJAASConfig(principal, keytabPath); } initMembershipPaths(); }
From source file:com.cloudera.impala.util.RequestPoolService.java
License:Apache License
/** * Looks up the per-pool Llama config, first checking for a per-pool value, then a * default set in the config, and lastly to the specified 'defaultValue'. * * @param conf The Configuration to use, provided so the caller can ensure the same * Configuration is used to look up multiple properties. *///from www. j a v a2s .c o m private int getLlamaPoolConfigValue(Configuration conf, String pool, String key, int defaultValue) { return conf.getInt(String.format(LLAMA_PER_POOL_CONFIG_KEY_FORMAT, key, pool), conf.getInt(key, defaultValue)); }
From source file:com.cloudera.llama.am.cache.ResourceCache.java
License:Apache License
@SuppressWarnings("unchecked") public ResourceCache(String queue, Configuration conf, Listener listener) { this.queue = ParamChecker.notEmpty(queue, "queue"); this.listener = ParamChecker.notNull(listener, "listener"); Class<? extends EvictionPolicy> klass = conf.getClass(LlamaAM.EVICTION_POLICY_CLASS_KEY, LlamaAM.EVICTION_POLICY_CLASS_DEFAULT, EvictionPolicy.class); evictionPolicy = ReflectionUtils.newInstance(klass, conf); evictionRunInterval = conf.getInt(LlamaAM.EVICTION_RUN_INTERVAL_KEY, LlamaAM.EVICTION_RUN_INTERVAL_DEFAULT); }
From source file:com.cloudera.llama.am.impl.MultiQueueLlamaAM.java
License:Apache License
public MultiQueueLlamaAM(Configuration conf) { super(conf);// w ww.j a v a2 s . c o m ams = new ConcurrentHashMap<String, SingleQueueAMInfo>(); reservationToQueue = new HashMap<UUID, String>(); queueExpireMs = conf.getInt(QUEUE_AM_EXPIRE_KEY, QUEUE_AM_EXPIRE_DEFAULT); expireThread = new ExpireThread(); amCheckExpiryIntervalMs = AM_CHECK_EXPIRY_INTERVAL_MS; if (SingleQueueLlamaAM.getRMConnectorClass(conf) == null) { throw new IllegalArgumentException( FastFormat.format("RMConnector class not defined in the configuration under '{}'", SingleQueueLlamaAM.RM_CONNECTOR_CLASS_KEY)); } //TODO: Make this a configuration parameter? stp = Executors.newScheduledThreadPool(4); }
From source file:com.cloudera.llama.am.impl.NormalizerRMConnector.java
License:Apache License
public NormalizerRMConnector(Configuration conf, RMConnector connector) { this.connector = connector; connector.setRMListener(this); normalCpuVCores = conf.getInt(LlamaAM.NORMALIZING_STANDARD_VCORES_KEY, LlamaAM.NORMALIZING_SIZE_VCORES_DEFAULT); normalMemoryMbs = conf.getInt(LlamaAM.NORMALIZING_STANDARD_MBS_KEY, LlamaAM.NORMALIZING_SIZE_MBS_DEFAULT); normalizedToEntry = new HashMap<UUID, ResourceEntry>(); originalToEntry = new HashMap<UUID, ResourceEntry>(); }