List of usage examples for org.apache.hadoop.io Text append
public void append(byte[] utf8, int start, int len)
From source file:mr.MyLineReader.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *//*from ww w. j av a 2 s . c o m*/ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { System.out.println("Enter readCustomLine!!!"); str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; int delPosn = 0; do { int startPosn = bufferPosn; // starting from where we left off the // last // time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else { delPosn = 0; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - delPosn; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before delimiter: " + bytesConsumed); return (int) bytesConsumed; }
From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java
License:Apache License
private static Text concat(Text prefix, String str) { Text temp = new Text(prefix); try {//ww w. java 2 s. c o m ByteBuffer buffer = Text.encode(str, false); temp.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } return temp; }
From source file:mvm.rya.indexing.KeyParts.java
License:Apache License
/** * Append any byte array to a row key.//from w w w.jav a2s. c om * @param bytes append this * @param keyText text to append to */ private static void appendBytes(byte[] bytes, Text keyText) { keyText.append(bytes, 0, bytes.length); }
From source file:mvm.rya.indexing.KeyParts.java
License:Apache License
/** * Get a collision unlikely hash string and append to the key, * so that if two keys have the same value, then they will be the same, * if two different values that occur at the same time there keys are different. * If the application uses a very large number of statements at the exact same time, * the md5 value might be upgraded to for example sha-1 to avoid collisions. * @param statement//from w w w. j av a 2 s . com * @param keyText */ public static void appendUniqueness(Statement statement, Text keyText) { keyText.append(HASH_PREFIX, 0, 1); // delimiter Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement))); byte[] hashOfValue = Md5Hash.md5Binary(statementValue); keyText.append(hashOfValue, 0, hashOfValue.length); }
From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java
License:Apache License
/** * Helper function that adds a new value to the values list passed as * argument.// www. jav a2 s. c om * * @param sb * StringBuffer that has the value to be added * @param values * values list * @param takeDelimiterOut * should be true when called in the middle of the line, when a * delimiter was found, and false when sb contains the line * ending * @throws UnsupportedEncodingException */ protected void foundDelimiter(StringBuffer sb, List<Text> values, boolean takeDelimiterOut) throws UnsupportedEncodingException { //remove trailing LF if (sb.length() > 0 && sb.charAt(sb.length() - 1) == '\n') { sb.deleteCharAt(sb.length() - 1); } // Found a real delimiter Text text = new Text(); String val = (takeDelimiterOut) ? sb.substring(0, sb.length() - separator.length()) : sb.toString(); if (val.startsWith(delimiter) && val.endsWith(delimiter)) { val = (val.length() - (2 * delimiter.length()) > 0) ? val.substring(delimiter.length(), val.length() - delimiter.length()) : ""; } text.append(val.getBytes("UTF-8"), 0, val.length()); values.add(text); // Empty string buffer sb.setLength(0); }
From source file:org.apache.accumulo.core.client.admin.FindMax.java
License:Apache License
private static Text findMidPoint(Text minBS, Text maxBS) { ByteArrayOutputStream startOS = new ByteArrayOutputStream(); startOS.write(0); // add a leading zero so bigint does not think its negative startOS.write(minBS.getBytes(), 0, minBS.getLength()); ByteArrayOutputStream endOS = new ByteArrayOutputStream(); endOS.write(0);// add a leading zero so bigint does not think its negative endOS.write(maxBS.getBytes(), 0, maxBS.getLength()); // make the numbers of the same magnitude if (startOS.size() < endOS.size()) appendZeros(startOS, endOS.size() - startOS.size()); else if (endOS.size() < startOS.size()) appendZeros(endOS, startOS.size() - endOS.size()); BigInteger min = new BigInteger(startOS.toByteArray()); BigInteger max = new BigInteger(endOS.toByteArray()); BigInteger mid = max.subtract(min).divide(BigInteger.valueOf(2)).add(min); byte[] ba = mid.toByteArray(); Text ret = new Text(); if (ba.length == startOS.size()) { if (ba[0] != 0) throw new RuntimeException(); // big int added a zero so it would not be negative, drop it ret.set(ba, 1, ba.length - 1);/*from w ww . j a v a 2s . c om*/ } else { int expLen = Math.max(minBS.getLength(), maxBS.getLength()); // big int will drop leading 0x0 bytes for (int i = ba.length; i < expLen; i++) { ret.append(new byte[] { 0 }, 0, 1); } ret.append(ba, 0, ba.length); } // remove trailing 0x0 bytes while (ret.getLength() > 0 && ret.getBytes()[ret.getLength() - 1] == 0 && ret.compareTo(minBS) > 0) { Text t = new Text(); t.set(ret.getBytes(), 0, ret.getLength() - 1); ret = t; } return ret; }
From source file:org.apache.accumulo.core.client.admin.FindMax.java
License:Apache License
private static Text findInitialEnd(Scanner scanner) { Text end = new Text(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff }); scanner.setRange(new Range(end, null)); while (scanner.iterator().hasNext()) { Text t = new Text(); t.append(end.getBytes(), 0, end.getLength()); t.append(end.getBytes(), 0, end.getLength()); end = t;//ww w .j av a 2 s. co m scanner.setRange(new Range(end, null)); } return end; }
From source file:org.apache.accumulo.core.client.impl.BulkImport.java
License:Apache License
public static List<KeyExtent> findOverlappingTablets(ClientContext context, KeyExtentCache extentCache, Text startRow, Text endRow, FileSKVIterator reader) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { List<KeyExtent> result = new ArrayList<>(); Collection<ByteSequence> columnFamilies = Collections.emptyList(); Text row = startRow; if (row == null) row = new Text(); while (true) { // log.debug(filename + " Seeking to row " + row); reader.seek(new Range(row, null), columnFamilies, false); if (!reader.hasTop()) { // log.debug(filename + " not found"); break; }//ww w . j a v a2s . c o m row = reader.getTopKey().getRow(); KeyExtent extent = extentCache.lookup(row); // log.debug(filename + " found row " + row + " at location " + tabletLocation); result.add(extent); row = extent.getEndRow(); if (row != null && (endRow == null || row.compareTo(endRow) < 0)) { row = new Text(row); row.append(byte0, 0, byte0.length); } else break; } return result; }
From source file:org.apache.accumulo.core.client.impl.TabletLocatorImpl.java
License:Apache License
private List<Range> binRanges(ClientContext context, List<Range> ranges, Map<String, Map<KeyExtent, List<Range>>> binnedRanges, boolean useCache, LockCheckerSession lcSession) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { List<Range> failures = new ArrayList<>(); List<TabletLocation> tabletLocations = new ArrayList<>(); boolean lookupFailed = false; l1: for (Range range : ranges) { tabletLocations.clear();//from w w w. jav a 2s. c om Text startRow; if (range.getStartKey() != null) { startRow = range.getStartKey().getRow(); } else startRow = new Text(); TabletLocation tl = null; if (useCache) tl = lcSession.checkLock(locateTabletInCache(startRow)); else if (!lookupFailed) tl = _locateTablet(context, startRow, false, false, false, lcSession); if (tl == null) { failures.add(range); if (!useCache) lookupFailed = true; continue; } tabletLocations.add(tl); while (tl.tablet_extent.getEndRow() != null && !range.afterEndKey(new Key(tl.tablet_extent.getEndRow()).followingKey(PartialKey.ROW))) { if (useCache) { Text row = new Text(tl.tablet_extent.getEndRow()); row.append(new byte[] { 0 }, 0, 1); tl = lcSession.checkLock(locateTabletInCache(row)); } else { tl = _locateTablet(context, tl.tablet_extent.getEndRow(), true, false, false, lcSession); } if (tl == null) { failures.add(range); if (!useCache) lookupFailed = true; continue l1; } tabletLocations.add(tl); } for (TabletLocation tl2 : tabletLocations) { TabletLocatorImpl.addRange(binnedRanges, tl2.tablet_location, tl2.tablet_extent, range); } } return failures; }
From source file:org.apache.accumulo.core.client.impl.TabletLocatorImpl.java
License:Apache License
private void lookupTabletLocation(ClientContext context, Text row, boolean retry, LockCheckerSession lcSession) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { Text metadataRow = new Text(tableId); metadataRow.append(new byte[] { ';' }, 0, 1); metadataRow.append(row.getBytes(), 0, row.getLength()); TabletLocation ptl = parent.locateTablet(context, metadataRow, false, retry); if (ptl != null) { TabletLocations locations = locationObtainer.lookupTablet(context, ptl, metadataRow, lastTabletRow, parent);/*from w w w. ja v a 2 s. com*/ while (locations != null && locations.getLocations().isEmpty() && locations.getLocationless().isEmpty()) { // try the next tablet, the current tablet does not have any tablets that overlap the row Text er = ptl.tablet_extent.getEndRow(); if (er != null && er.compareTo(lastTabletRow) < 0) { // System.out.println("er "+er+" ltr "+lastTabletRow); ptl = parent.locateTablet(context, er, true, retry); if (ptl != null) locations = locationObtainer.lookupTablet(context, ptl, metadataRow, lastTabletRow, parent); else break; } else { break; } } if (locations == null) return; // cannot assume the list contains contiguous key extents... so it is probably // best to deal with each extent individually Text lastEndRow = null; for (TabletLocation tabletLocation : locations.getLocations()) { KeyExtent ke = tabletLocation.tablet_extent; TabletLocation locToCache; // create new location if current prevEndRow == endRow if ((lastEndRow != null) && (ke.getPrevEndRow() != null) && ke.getPrevEndRow().equals(lastEndRow)) { locToCache = new TabletLocation(new KeyExtent(ke.getTableId(), ke.getEndRow(), lastEndRow), tabletLocation.tablet_location, tabletLocation.tablet_session); } else { locToCache = tabletLocation; } // save endRow for next iteration lastEndRow = locToCache.tablet_extent.getEndRow(); updateCache(locToCache, lcSession); } } }