List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:co.nubetech.hiho.mapreduce.lib.db.apache.BigDecimalSplitter.java
License:Apache License
public List<InputSplit> split(Configuration conf, ResultSet results, String colName) throws SQLException { BigDecimal minVal = results.getBigDecimal(1); BigDecimal maxVal = results.getBigDecimal(2); String lowClausePrefix = colName + " >= "; String highClausePrefix = colName + " < "; BigDecimal numSplits = new BigDecimal(conf.getInt(MRJobConfig.NUM_MAPS, 1)); if (minVal == null && maxVal == null) { // Range is null to null. Return a null split accordingly. List<InputSplit> splits = new ArrayList<InputSplit>(); splits.add(/* w w w .j a v a2 s . com*/ new DataDrivenDBInputFormat.DataDrivenDBInputSplit(colName + " IS NULL", colName + " IS NULL")); return splits; } if (minVal == null || maxVal == null) { // Don't know what is a reasonable min/max value for interpolation. Fail. LOG.error("Cannot find a range for NUMERIC or DECIMAL fields with one end NULL."); return null; } // Get all the split points together. List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal); List<InputSplit> splits = new ArrayList<InputSplit>(); // Turn the split points into a set of intervals. BigDecimal start = splitPoints.get(0); for (int i = 1; i < splitPoints.size(); i++) { BigDecimal end = splitPoints.get(i); if (i == splitPoints.size() - 1) { // This is the last one; use a closed interval. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(lowClausePrefix + start.toString(), colName + " <= " + end.toString())); } else { // Normal open-interval case. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(lowClausePrefix + start.toString(), highClausePrefix + end.toString())); } start = end; } return splits; }
From source file:co.nubetech.hiho.mapreduce.lib.db.apache.DateSplitter.java
License:Apache License
public List<InputSplit> split(Configuration conf, ResultSet results, String colName) throws SQLException { long minVal;/* w ww . ja v a 2s . co m*/ long maxVal; int sqlDataType = results.getMetaData().getColumnType(1); minVal = resultSetColToLong(results, 1, sqlDataType); maxVal = resultSetColToLong(results, 2, sqlDataType); String lowClausePrefix = colName + " >= "; String highClausePrefix = colName + " < "; int numSplits = conf.getInt(MRJobConfig.NUM_MAPS, 1); if (numSplits < 1) { numSplits = 1; } if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) { // The range of acceptable dates is NULL to NULL. Just create a single split. List<InputSplit> splits = new ArrayList<InputSplit>(); splits.add( new DataDrivenDBInputFormat.DataDrivenDBInputSplit(colName + " IS NULL", colName + " IS NULL")); return splits; } // Gather the split point integers List<Long> splitPoints = split(numSplits, minVal, maxVal); List<InputSplit> splits = new ArrayList<InputSplit>(); // Turn the split points into a set of intervals. long start = splitPoints.get(0); Date startDate = longToDate(start, sqlDataType); if (sqlDataType == Types.TIMESTAMP) { // The lower bound's nanos value needs to match the actual lower-bound nanos. try { ((java.sql.Timestamp) startDate).setNanos(results.getTimestamp(1).getNanos()); } catch (NullPointerException npe) { // If the lower bound was NULL, we'll get an NPE; just ignore it and don't set nanos. } } for (int i = 1; i < splitPoints.size(); i++) { long end = splitPoints.get(i); Date endDate = longToDate(end, sqlDataType); if (i == splitPoints.size() - 1) { if (sqlDataType == Types.TIMESTAMP) { // The upper bound's nanos value needs to match the actual upper-bound nanos. try { ((java.sql.Timestamp) endDate).setNanos(results.getTimestamp(2).getNanos()); } catch (NullPointerException npe) { // If the upper bound was NULL, we'll get an NPE; just ignore it and don't set nanos. } } // This is the last one; use a closed interval. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( lowClausePrefix + dateToString(startDate), colName + " <= " + dateToString(endDate))); } else { // Normal open-interval case. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( lowClausePrefix + dateToString(startDate), highClausePrefix + dateToString(endDate))); } start = end; startDate = endDate; } if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) { // Add an extra split to handle the null case that we saw. splits.add( new DataDrivenDBInputFormat.DataDrivenDBInputSplit(colName + " IS NULL", colName + " IS NULL")); } return splits; }
From source file:co.nubetech.hiho.mapreduce.lib.db.apache.TextSplitter.java
License:Apache License
/** * This method needs to determine the splits between two user-provided strings. * In the case where the user's strings are 'A' and 'Z', this is not hard; we * could create two splits from ['A', 'M') and ['M', 'Z'], 26 splits for strings * beginning with each letter, etc.// w w w.j a v a 2 s. co m * * If a user has provided us with the strings "Ham" and "Haze", however, we need * to create splits that differ in the third letter. * * The algorithm used is as follows: * Since there are 2**16 unicode characters, we interpret characters as digits in * base 65536. Given a string 's' containing characters s_0, s_1 .. s_n, we interpret * the string as the number: 0.s_0 s_1 s_2.. s_n in base 65536. Having mapped the * low and high strings into floating-point values, we then use the BigDecimalSplitter * to establish the even split points, then map the resulting floating point values * back into strings. */ public List<InputSplit> split(Configuration conf, ResultSet results, String colName) throws SQLException { LOG.warn("Generating splits for a textual index column."); LOG.warn("If your database sorts in a case-insensitive order, " + "this may result in a partial import or duplicate records."); LOG.warn("You are strongly encouraged to choose an integral split column."); String minString = results.getString(1); String maxString = results.getString(2); boolean minIsNull = false; // If the min value is null, switch it to an empty string instead for purposes // of interpolation. Then add [null, null] as a special case split. if (null == minString) { minString = ""; minIsNull = true; } if (null == maxString) { // If the max string is null, then the min string has to be null too. // Just return a special split for this case. List<InputSplit> splits = new ArrayList<InputSplit>(); splits.add( new DataDrivenDBInputFormat.DataDrivenDBInputSplit(colName + " IS NULL", colName + " IS NULL")); return splits; } // Use this as a hint. May need an extra task if the size doesn't // divide cleanly. int numSplits = conf.getInt(MRJobConfig.NUM_MAPS, 1); String lowClausePrefix = colName + " >= '"; String highClausePrefix = colName + " < '"; // If there is a common prefix between minString and maxString, establish it // and pull it out of minString and maxString. int maxPrefixLen = Math.min(minString.length(), maxString.length()); int sharedLen; for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) { char c1 = minString.charAt(sharedLen); char c2 = maxString.charAt(sharedLen); if (c1 != c2) { break; } } // The common prefix has length 'sharedLen'. Extract it from both. String commonPrefix = minString.substring(0, sharedLen); minString = minString.substring(sharedLen); maxString = maxString.substring(sharedLen); List<String> splitStrings = split(numSplits, minString, maxString, commonPrefix); List<InputSplit> splits = new ArrayList<InputSplit>(); // Convert the list of split point strings into an actual set of InputSplits. String start = splitStrings.get(0); for (int i = 1; i < splitStrings.size(); i++) { String end = splitStrings.get(i); if (i == splitStrings.size() - 1) { // This is the last one; use a closed interval. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(lowClausePrefix + start + "'", colName + " <= '" + end + "'")); } else { // Normal open-interval case. splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(lowClausePrefix + start + "'", highClausePrefix + end + "'")); } } if (minIsNull) { // Add the special null split at the end. splits.add( new DataDrivenDBInputFormat.DataDrivenDBInputSplit(colName + " IS NULL", colName + " IS NULL")); } return splits; }
From source file:com.aerospike.hadoop.mapreduce.AerospikeConfigUtil.java
License:Apache License
public static int getInputPort(Configuration conf) { int port = conf.getInt(INPUT_PORT, DEFAULT_INPUT_PORT); log.info("using " + INPUT_PORT + " = " + port); return port;//from www . j av a 2 s. c om }
From source file:com.aerospike.hadoop.mapreduce.AerospikeConfigUtil.java
License:Apache License
public static int getOutputPort(Configuration conf) { int port = conf.getInt(OUTPUT_PORT, DEFAULT_OUTPUT_PORT); log.info("using " + OUTPUT_PORT + " = " + port); return port;//from ww w . j a v a 2s.c o m }
From source file:com.alexholmes.hadooputils.sort.DelimitedLineReader.java
License:Apache License
/** * Create a line reader that reads from the given stream using the * <code>io.file.buffer.size</code> specified in the given * <code>Configuration</code>. * @param in input stream/*from www. ja va 2 s. co m*/ * @param conf configuration * @throws IOException */ public DelimitedLineReader(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); }
From source file:com.alexholmes.hadooputils.sort.DelimitedLineReader.java
License:Apache License
/** * Create a line reader that reads from the given stream using the * <code>io.file.buffer.size</code> specified in the given * <code>Configuration</code>, and using a custom delimiter of array of * bytes./*from w ww . ja v a2 s . co m*/ * @param in input stream * @param conf configuration * @param recordDelimiterBytes The delimiter * @throws IOException */ public DelimitedLineReader(InputStream in, Configuration conf, byte[] recordDelimiterBytes) throws IOException { this.in = in; this.bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); this.buffer = new byte[this.bufferSize]; this.recordDelimiterBytes = recordDelimiterBytes; }
From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java
License:Apache License
protected void initialize(Configuration job, FileSplit split) throws IOException { this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from www .ja v a2s. c om*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; String rowDelim = job.get("textinputformat.record.delimiter", null); if (codec != null) { if (rowDelim != null) { byte[] hexcode = SortConfig.getHexDelimiter(rowDelim); in = new DelimitedLineReader(codec.createInputStream(fileIn), job, (hexcode != null) ? hexcode : rowDelim.getBytes()); } else { in = new DelimitedLineReader(codec.createInputStream(fileIn), job); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (rowDelim != null) { byte[] hexcode = SortConfig.getHexDelimiter(rowDelim); in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes()); } else { in = new DelimitedLineReader(fileIn, job); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:com.alibaba.wasp.client.FConnectionManager.java
License:Apache License
/** * Set the number of retries to use serverside when trying to communicate with * another server over {@link com.alibaba.wasp.client.FConnection}. Used * updating catalog tables, etc. Call this method before we create any * Connections.//w w w .j a va2 s. c o m * * @param c * The Configuration instance to set the retries into. * @param log * Used to log what we set in here. */ public static void setServerSideFConnectionRetries(final Configuration c, final Log log) { int fcRetries = c.getInt(FConstants.WASP_CLIENT_RETRIES_NUMBER, FConstants.DEFAULT_WASP_CLIENT_RETRIES_NUMBER); // Go big. Multiply by 10. If we can't get to meta after this many retries // then something seriously wrong. int serversideMultiplier = c.getInt("wasp.client.serverside.retries.multiplier", 10); int retries = fcRetries * serversideMultiplier; c.setInt(FConstants.WASP_CLIENT_RETRIES_NUMBER, retries); log.debug("Set serverside FConnection retries=" + retries); }
From source file:com.alibaba.wasp.client.ServerCallable.java
License:Apache License
/** * Run this instance with retries, timed waits, and refinds of missing * entityGroups.//from w ww .jav a 2 s . c o m * * @return an object of type T * @throws java.io.IOException * if a remote or network exception occurs * @throws RuntimeException * other unspecified error */ public T withRetries() throws IOException, RuntimeException { Configuration c = getConnection().getConfiguration(); final int numRetries = c.getInt(FConstants.WASP_CLIENT_RETRIES_NUMBER, FConstants.DEFAULT_WASP_CLIENT_RETRIES_NUMBER); List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions = new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>(); for (int tries = 0; tries < numRetries; tries++) { try { beforeCall(); connect(tries != 0); return call(); } catch (Throwable t) { shouldRetry(t); t = translateException(t); if (t instanceof SocketTimeoutException || t instanceof ConnectException || t instanceof RetriesExhaustedException) { // if thrown these exceptions, we clear all the cache entries that // map to that slow/dead server; otherwise, let cache miss and ask // .FMETA. again to find the new location EntityGroupLocation egl = location; if (egl != null) { getConnection().clearCaches(egl.getHostnamePort()); } } if (t instanceof RuntimeException) { throw new RuntimeException(t); } RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext( t, System.currentTimeMillis(), toString()); exceptions.add(qt); long pauseTime = ConnectionUtils.getPauseTime(this.pause, tries); LOG.info("withRetries attempt " + tries + " of " + numRetries + " failed; retrying after sleep of " + pauseTime); try { Thread.sleep(pauseTime); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Thread was interrupted while trying to connect to FServer.", e); } if (tries == numRetries - 1) { throw new RetriesExhaustedException(tries, exceptions); } } finally { afterCall(); } } return null; }