List of usage examples for org.apache.hadoop.io Text append
public void append(byte[] utf8, int start, int len)
From source file:org.apache.accumulo.examples.filedata.KeyUtil.java
License:Apache License
/** * Join some number of strings using a null byte separator into a text object. * * @param s// w w w . ja v a 2 s. co m * strings * @return a text object containing the strings separated by null bytes */ public static Text buildNullSepText(String... s) { Text t = new Text(s[0]); for (int i = 1; i < s.length; i++) { t.append(nullbyte, 0, 1); t.append(s[i].getBytes(), 0, s[i].length()); } return t; }
From source file:org.apache.accumulo.examples.simple.filedata.ChunkInputStreamTest.java
License:Apache License
public static void addData(List<Entry<Key, Value>> data, String row, String cf, int chunkSize, int chunkCount, String vis, String value) { Text chunkCQ = new Text(FileDataIngest.intToBytes(chunkSize)); chunkCQ.append(FileDataIngest.intToBytes(chunkCount), 0, 4); data.add(new KeyValue(new Key(new Text(row), new Text(cf), chunkCQ, new Text(vis)), value.getBytes())); }
From source file:org.apache.accumulo.examples.wikisearch.reader.LfLineReader.java
License:Apache License
/** * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). * EOF also terminates an otherwise unterminated line. * * @param str/*ww w . j av a2 s .co m*/ * the object to store the given line (without newline) * @param maxLineLength * the maximum number of bytes to store into str; the rest of the line is silently * discarded. * @param maxBytesToConsume * the maximum number of bytes to consume in this call. This is only a hint, because if * the line cross this threshold, we allow it to happen. It can overshoot potentially by * as much as one buffer length. * * @return the number of bytes read including the (longest) newline found. * * @throws IOException * if the underlying stream throws */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* * We're reading data from in, but the head of the stream may be already buffered in buffer, so * we have several cases: 1. No newline characters are in the buffer, so we need to copy * everything and read another buffer from the stream. 2. An unambiguously terminated line is in * buffer, so we just copy to str. */ str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization int newlineLength = 0; // length of terminating newline long bytesConsumed = 0; do { int startPosn = bufferPosn; // starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) { break; // EOF } } for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline if (buffer[bufferPosn] == LF) { newlineLength = 1; ++bufferPosn; // at next invocation proceed from following byte break; } } int readLength = bufferPosn - startPosn; bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > Integer.MAX_VALUE) { throw new IOException("Too many bytes before newline: " + bytesConsumed); } return (int) bytesConsumed; }
From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java
License:Apache License
public static void textAppend(Text t, long s) { t.append(nullByte, 0, 1); t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8); }
From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java
License:Apache License
/** * Appends a null byte to the given text * * @param text//w w w .jav a2 s .c o m * the text to which to append the null byte */ public static void appendNullByte(Text text) { text.append(nullByte, 0, nullByte.length); }
From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java
License:Apache License
/** * Appends the UTF-8 bytes of the given string to the given {@link Text} *//*from www. j a v a 2s .com*/ public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) { try { ByteBuffer buffer = Text.encode(s, replaceBadChar); t.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } }
From source file:org.apache.accumulo.server.client.BulkImporter.java
License:Apache License
protected static Text getStartRowForExtent(KeyExtent extent) { Text start = extent.getPrevEndRow(); if (start != null) { start = new Text(start); // ACCUMULO-3967 We want the first possible key in this tablet, not the following row from the previous tablet start.append(byte0, 0, 1); }//from www . j a v a 2s . co m return start; }
From source file:org.apache.accumulo.server.client.BulkImporter.java
License:Apache License
public static List<TabletLocation> findOverlappingTablets(ClientContext context, VolumeManager vm, TabletLocator locator, Path file, Text startRow, Text endRow) throws Exception { List<TabletLocation> result = new ArrayList<>(); Collection<ByteSequence> columnFamilies = Collections.emptyList(); String filename = file.toString(); // log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow); FileSystem fs = vm.getVolumeByPath(file).getFileSystem(); FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf()) .withTableConfiguration(context.getConfiguration()).seekToBeginning().build(); try {// w w w .ja va2 s . c om Text row = startRow; if (row == null) row = new Text(); while (true) { // log.debug(filename + " Seeking to row " + row); reader.seek(new Range(row, null), columnFamilies, false); if (!reader.hasTop()) { // log.debug(filename + " not found"); break; } row = reader.getTopKey().getRow(); TabletLocation tabletLocation = locator.locateTablet(context, row, false, true); // log.debug(filename + " found row " + row + " at location " + tabletLocation); result.add(tabletLocation); row = tabletLocation.tablet_extent.getEndRow(); if (row != null && (endRow == null || row.compareTo(endRow) < 0)) { row = new Text(row); row.append(byte0, 0, byte0.length); } else break; } } finally { reader.close(); } // log.debug(filename + " to be sent to " + result); return result; }
From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java
License:Apache License
private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry, HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException { TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context); Map<TKeyExtent, List<TRange>> batch = new TreeMap<>(); for (KeyExtent keyExtent : entry.getValue()) { Text row = keyExtent.getEndRow(); Text row2 = null;/*from w ww . j av a 2 s. c om*/ if (row == null) { row = keyExtent.getPrevEndRow(); if (row != null) { row = new Text(row); row.append(new byte[] { 'a' }, 0, 1); } else { row = new Text("1234567890"); } row2 = new Text(row); row2.append(new byte[] { '!' }, 0, 1); } else { row = new Text(row); row2 = new Text(row); row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1); } Range r = new Range(row, true, row2, false); batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift())); } TInfo tinfo = Tracer.traceInfo(); Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap(); List<IterInfo> emptyListIterInfo = Collections.emptyList(); List<TColumn> emptyListColumn = Collections.emptyList(); InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn, emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L, null, null); if (is.result.more) { MultiScanResult result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); while (result.more) { result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); } } client.closeMultiScan(tinfo, is.scanID); ThriftUtil.returnClient((TServiceClient) client); }
From source file:org.apache.asterix.external.indexing.input.HDFSSeekableLineReader.java
License:Apache License
/** * Read one line from the InputStream into the given Text. A line * can be terminated by one of the following: '\n' (LF) , '\r' (CR), * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated * line./*w w w. j a v a2s . c o m*/ * * @param str * the object to store the given line (without newline) * @param maxLineLength * the maximum number of bytes to store into str; * the rest of the line is silently discarded. * @param maxBytesToConsume * the maximum number of bytes to consume * in this call. This is only a hint, because if the line cross * this threshold, we allow it to happen. It can overshoot * potentially by as much as one buffer length. * @return the number of bytes read including the (longest) newline * found. * @throws IOException * if the underlying stream throws */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* We're reading data from in, but the head of the stream may be * already buffered in buffer, so we have several cases: * 1. No newline characters are in the buffer, so we need to copy * everything and read another buffer from the stream. * 2. An unambiguously terminated line is in buffer, so we just * copy to str. * 3. Ambiguously terminated line is in buffer, i.e. buffer ends * in CR. In this case we copy everything up to CR to str, but * we also need to see what follows CR: if it's LF, then we * need consume LF as well, so next call to readLine will read * from after that. * We use a flag prevCharCR to signal if previous character was CR * and, if it happens to be at the end of the buffer, delay * consuming it until we have a chance to look at the char that * follows. */ str.clear(); int txtLength = 0; //tracks str.getLength(), as an optimization int newlineLength = 0; //length of terminating newline boolean prevCharCR = false; //true of prev char was CR long bytesConsumed = 0; do { int startPosn = bufferPosn; //starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; if (prevCharCR) ++bytesConsumed; //account for CR from previous read bufferLength = reader.read(buffer); if (bufferLength <= 0) break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline if (buffer[bufferPosn] == LF) { newlineLength = (prevCharCR) ? 2 : 1; ++bufferPosn; // at next invocation proceed from following byte break; } if (prevCharCR) { //CR + notLF, we are at notLF newlineLength = 1; break; } prevCharCR = (buffer[bufferPosn] == CR); } int readLength = bufferPosn - startPosn; if (prevCharCR && newlineLength == 0) --readLength; //CR at the end of the buffer bytesConsumed += readLength; int appendLength = readLength - newlineLength; if (appendLength > maxLineLength - txtLength) { appendLength = maxLineLength - txtLength; } if (appendLength > 0) { str.append(buffer, startPosn, appendLength); txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > (long) Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); currentFilePos = reader.getPos() - bufferLength + bufferPosn; return (int) bytesConsumed; }