Example usage for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len)

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:org.apache.accumulo.examples.filedata.KeyUtil.java

License:Apache License

/**
 * Join some number of strings using a null byte separator into a text object.
 *
 * @param s//  w  w  w . ja v  a  2  s. co m
 *          strings
 * @return a text object containing the strings separated by null bytes
 */
public static Text buildNullSepText(String... s) {
    Text t = new Text(s[0]);
    for (int i = 1; i < s.length; i++) {
        t.append(nullbyte, 0, 1);
        t.append(s[i].getBytes(), 0, s[i].length());
    }
    return t;
}

From source file:org.apache.accumulo.examples.simple.filedata.ChunkInputStreamTest.java

License:Apache License

public static void addData(List<Entry<Key, Value>> data, String row, String cf, int chunkSize, int chunkCount,
        String vis, String value) {
    Text chunkCQ = new Text(FileDataIngest.intToBytes(chunkSize));
    chunkCQ.append(FileDataIngest.intToBytes(chunkCount), 0, 4);
    data.add(new KeyValue(new Key(new Text(row), new Text(cf), chunkCQ, new Text(vis)), value.getBytes()));
}

From source file:org.apache.accumulo.examples.wikisearch.reader.LfLineReader.java

License:Apache License

/**
 * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF).
 * EOF also terminates an otherwise unterminated line.
 *
 * @param str/*ww  w  . j av a2  s  .co m*/
 *          the object to store the given line (without newline)
 * @param maxLineLength
 *          the maximum number of bytes to store into str; the rest of the line is silently
 *          discarded.
 * @param maxBytesToConsume
 *          the maximum number of bytes to consume in this call. This is only a hint, because if
 *          the line cross this threshold, we allow it to happen. It can overshoot potentially by
 *          as much as one buffer length.
 *
 * @return the number of bytes read including the (longest) newline found.
 *
 * @throws IOException
 *           if the underlying stream throws
 */
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /*
     * We're reading data from in, but the head of the stream may be already buffered in buffer, so
     * we have several cases: 1. No newline characters are in the buffer, so we need to copy
     * everything and read another buffer from the stream. 2. An unambiguously terminated line is in
     * buffer, so we just copy to str.
     */
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    int newlineLength = 0; // length of terminating newline
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0) {
                break; // EOF
            }
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > Integer.MAX_VALUE) {
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    }
    return (int) bytesConsumed;
}

From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java

License:Apache License

public static void textAppend(Text t, long s) {
    t.append(nullByte, 0, 1);
    t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8);
}

From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java

License:Apache License

/**
 * Appends a null byte to the given text
 *
 * @param text//w w w .jav a2  s .c  o  m
 *          the text to which to append the null byte
 */
public static void appendNullByte(Text text) {
    text.append(nullByte, 0, nullByte.length);
}

From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java

License:Apache License

/**
 * Appends the UTF-8 bytes of the given string to the given {@link Text}
 *//*from   www.  j  a v a  2s .com*/
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
    try {
        ByteBuffer buffer = Text.encode(s, replaceBadChar);
        t.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
}

From source file:org.apache.accumulo.server.client.BulkImporter.java

License:Apache License

protected static Text getStartRowForExtent(KeyExtent extent) {
    Text start = extent.getPrevEndRow();
    if (start != null) {
        start = new Text(start);
        // ACCUMULO-3967 We want the first possible key in this tablet, not the following row from the previous tablet
        start.append(byte0, 0, 1);
    }//from  www  . j  a  v  a  2s .  co m
    return start;
}

From source file:org.apache.accumulo.server.client.BulkImporter.java

License:Apache License

public static List<TabletLocation> findOverlappingTablets(ClientContext context, VolumeManager vm,
        TabletLocator locator, Path file, Text startRow, Text endRow) throws Exception {
    List<TabletLocation> result = new ArrayList<>();
    Collection<ByteSequence> columnFamilies = Collections.emptyList();
    String filename = file.toString();
    // log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow);
    FileSystem fs = vm.getVolumeByPath(file).getFileSystem();
    FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf())
            .withTableConfiguration(context.getConfiguration()).seekToBeginning().build();
    try {// w  w  w  .ja va2 s . c  om
        Text row = startRow;
        if (row == null)
            row = new Text();
        while (true) {
            // log.debug(filename + " Seeking to row " + row);
            reader.seek(new Range(row, null), columnFamilies, false);
            if (!reader.hasTop()) {
                // log.debug(filename + " not found");
                break;
            }
            row = reader.getTopKey().getRow();
            TabletLocation tabletLocation = locator.locateTablet(context, row, false, true);
            // log.debug(filename + " found row " + row + " at location " + tabletLocation);
            result.add(tabletLocation);
            row = tabletLocation.tablet_extent.getEndRow();
            if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
                row = new Text(row);
                row.append(byte0, 0, byte0.length);
            } else
                break;
        }
    } finally {
        reader.close();
    }
    // log.debug(filename + " to be sent to " + result);
    return result;
}

From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java

License:Apache License

private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry,
        HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException {
    TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context);

    Map<TKeyExtent, List<TRange>> batch = new TreeMap<>();

    for (KeyExtent keyExtent : entry.getValue()) {
        Text row = keyExtent.getEndRow();
        Text row2 = null;/*from w  ww . j  av  a  2 s. c om*/

        if (row == null) {
            row = keyExtent.getPrevEndRow();

            if (row != null) {
                row = new Text(row);
                row.append(new byte[] { 'a' }, 0, 1);
            } else {
                row = new Text("1234567890");
            }

            row2 = new Text(row);
            row2.append(new byte[] { '!' }, 0, 1);
        } else {
            row = new Text(row);
            row2 = new Text(row);

            row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1);
        }

        Range r = new Range(row, true, row2, false);
        batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift()));
    }
    TInfo tinfo = Tracer.traceInfo();
    Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap();
    List<IterInfo> emptyListIterInfo = Collections.emptyList();
    List<TColumn> emptyListColumn = Collections.emptyList();
    InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn,
            emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L,
            null, null);
    if (is.result.more) {
        MultiScanResult result = client.continueMultiScan(tinfo, is.scanID);
        checkFailures(entry.getKey(), failures, result);

        while (result.more) {
            result = client.continueMultiScan(tinfo, is.scanID);
            checkFailures(entry.getKey(), failures, result);
        }
    }

    client.closeMultiScan(tinfo, is.scanID);

    ThriftUtil.returnClient((TServiceClient) client);
}

From source file:org.apache.asterix.external.indexing.input.HDFSSeekableLineReader.java

License:Apache License

/**
 * Read one line from the InputStream into the given Text. A line
 * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
 * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated
 * line./*w w w.  j  a  v  a2s  . c o m*/
 *
 * @param str
 *            the object to store the given line (without newline)
 * @param maxLineLength
 *            the maximum number of bytes to store into str;
 *            the rest of the line is silently discarded.
 * @param maxBytesToConsume
 *            the maximum number of bytes to consume
 *            in this call. This is only a hint, because if the line cross
 *            this threshold, we allow it to happen. It can overshoot
 *            potentially by as much as one buffer length.
 * @return the number of bytes read including the (longest) newline
 *         found.
 * @throws IOException
 *             if the underlying stream throws
 */
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
     * already buffered in buffer, so we have several cases:
     * 1. No newline characters are in the buffer, so we need to copy
     *    everything and read another buffer from the stream.
     * 2. An unambiguously terminated line is in buffer, so we just
     *    copy to str.
     * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
     *    in CR.  In this case we copy everything up to CR to str, but
     *    we also need to see what follows CR: if it's LF, then we
     *    need consume LF as well, so next call to readLine will read
     *    from after that.
     * We use a flag prevCharCR to signal if previous character was CR
     * and, if it happens to be at the end of the buffer, delay
     * consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = reader.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    currentFilePos = reader.getPos() - bufferLength + bufferPosn;
    return (int) bytesConsumed;
}