List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.accumulo.proxy.SimpleProxyBase.java
License:Apache License
static private ByteBuffer t2bb(Text t) { return ByteBuffer.wrap(t.getBytes()); }
From source file:org.apache.accumulo.server.tabletserver.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }//w w w.j a v a 2s . c o m // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), files, .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // check to see that the midPoint is not equal to the end key if (keys.get(.5).compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Key mid = keys.get(.5); Text text = (mid == null) ? null : mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.accumulo.server.tabletserver.Tablet.java
License:Apache License
private static int longestCommonLength(Text text, Text beforeMid) { int common = 0; while (common < text.getLength() && common < beforeMid.getLength() && text.getBytes()[common] == beforeMid.getBytes()[common]) { common++;/*from www .j a v a 2 s. co m*/ } return common; }
From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java
License:Apache License
private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry, HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException { TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context); Map<TKeyExtent, List<TRange>> batch = new TreeMap<>(); for (KeyExtent keyExtent : entry.getValue()) { Text row = keyExtent.getEndRow(); Text row2 = null;//from ww w .ja v a2 s . co m if (row == null) { row = keyExtent.getPrevEndRow(); if (row != null) { row = new Text(row); row.append(new byte[] { 'a' }, 0, 1); } else { row = new Text("1234567890"); } row2 = new Text(row); row2.append(new byte[] { '!' }, 0, 1); } else { row = new Text(row); row2 = new Text(row); row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1); } Range r = new Range(row, true, row2, false); batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift())); } TInfo tinfo = Tracer.traceInfo(); Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap(); List<IterInfo> emptyListIterInfo = Collections.emptyList(); List<TColumn> emptyListColumn = Collections.emptyList(); InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn, emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L, null, null); if (is.result.more) { MultiScanResult result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); while (result.more) { result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); } } client.closeMultiScan(tinfo, is.scanID); ThriftUtil.returnClient((TServiceClient) client); }
From source file:org.apache.accumulo.tserver.tablet.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = tableConfiguration.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); long maxEndRow = tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }// w w w . j a v a 2s . c om // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // We expect to get a midPoint for this set of files. If we don't get one, we have a problem. final Key mid = keys.get(.5); if (null == mid) { throw new IllegalStateException("Could not determine midpoint for files"); } // check to see that the midPoint is not equal to the end key if (mid.compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + candidate.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Text text = mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } if (text.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + text.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.accumulo.tserver.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }/*from ww w .j av a 2 s .c o m*/ // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // check to see that the midPoint is not equal to the end key if (keys.get(.5).compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Key mid = keys.get(.5); Text text = (mid == null) ? null : mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
@Override protected void map(Writable k, Text value, Context context) throws IOException, InterruptedException { BlurRecord record = _mutate.getRecord(); record.clearColumns();/*from ww w . j av a 2 s. c o m*/ String str = value.toString(); Iterable<String> split = _splitter.split(str); List<String> list = toList(split); int offset = 0; boolean gen = false; if (!_autoGenerateRowIdAsHashOfData) { record.setRowId(list.get(offset++)); } else { _digest.reset(); byte[] bs = value.getBytes(); int length = value.getLength(); _digest.update(bs, 0, length); record.setRowId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX)); gen = true; } if (!_autoGenerateRecordIdAsHashOfData) { record.setRecordId(list.get(offset++)); } else { if (gen) { record.setRecordId(record.getRowId()); } else { _digest.reset(); byte[] bs = value.getBytes(); int length = value.getLength(); _digest.update(bs, 0, length); record.setRecordId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX)); } } String family; if (_familyNotInFile) { family = _familyFromPath; } else { family = list.get(offset++); } record.setFamily(family); List<String> columnNames = _columnNameMap.get(family); if (columnNames == null) { throw new IOException("Family [" + family + "] is missing in the definition."); } if (list.size() - offset != columnNames.size()) { String options = ""; if (!_autoGenerateRowIdAsHashOfData) { options += "rowid,"; } if (!_autoGenerateRecordIdAsHashOfData) { options += "recordid,"; } if (!_familyNotInFile) { options += "family,"; } String msg = "Record [" + str + "] does not match defined record [" + options + getColumnNames(columnNames) + "]."; throw new IOException(msg); } for (int i = 0; i < columnNames.size(); i++) { String val = handleHiveNulls(list.get(i + offset)); if (val != null) { record.addColumn(columnNames.get(i), val); _columnCounter.increment(1); } } _key.set(record.getRowId()); _mutate.setMutateType(MUTATE_TYPE.REPLACE); context.write(_key, _mutate); _recordCounter.increment(1); context.progress(); }
From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java
License:Apache License
/** * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index. * * @param text the text being searched * @param delimiter the delimiter// w w w .j a v a2 s. c o m * @param start the index to start searching * @return the first occurrence of delimiter, starting the search at the specified index */ public int find(Text text, byte delimiter, int start) { int len = text.getLength(); int p = start; byte[] bytes = text.getBytes(); boolean inQuotes = false; while (p < len) { if ('\"' == bytes[p]) { inQuotes = !inQuotes; } if (!inQuotes && bytes[p] == delimiter) { return p; } p++; } return -1; }
From source file:org.apache.flume.sink.hdfs.HDFSTextFormatter.java
License:Apache License
@Override public byte[] getBytes(Event e) { Text record = makeText(e); record.append("\n".getBytes(), 0, 1); byte[] rawBytes = record.getBytes(); return Arrays.copyOf(rawBytes, record.getLength()); }
From source file:org.apache.fluo.core.util.ByteUtil.java
License:Apache License
/** * Convert from Hadoop Text to Bytes/* w w w . j a v a 2 s . c o m*/ */ public static Bytes toBytes(Text t) { return Bytes.of(t.getBytes(), 0, t.getLength()); }