Example usage for org.apache.hadoop.io Text decode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text decode.

Prototype

public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException

Source Link

Usage

From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java

License:Apache License

/**
  * An iteration wrapper for a loaded scanner that is returned for each query above.
  */*from  w  w  w  .  j av a  2  s .c  om*/
  * @param scanner
  *            the results to iterate, then close.
  * @return an anonymous object that will iterate the resulting statements from a given scanner.
  */
private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(
        final ScannerBase scanner) {

    final Iterator<Entry<Key, Value>> i = scanner.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            Entry<Key, Value> entry = i.next();
            Value v = entry.getValue();
            try {
                String dataString = Text.decode(v.get(), 0, v.getSize());
                Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (CharacterCodingException e) {
                logger.error("Error decoding value=" + Arrays.toString(v.get()), e);
                throw new QueryEvaluationException(e);
            } catch (IOException e) {
                logger.error("Error de-serializing statement, string=" + v.get(), e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            scanner.close();
        }
    };
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.Gram.java

License:Apache License

/**
 * @return gram term string/* www  . ja v  a  2 s.c om*/
 */
public String getString() {
    try {
        return Text.decode(bytes, 1, length - 1);
    } catch (CharacterCodingException e) {
        throw new IllegalStateException("Should not have happened " + e.toString());
    }
}

From source file:org.apache.orc.impl.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files.//from w ww. jav  a2  s . com
 * @param in the file being read
 * @param path the filename for error messages
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
protected static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer)
        throws IOException {
    int magicLength = OrcFile.MAGIC.length();
    int fullLength = magicLength + 1;
    if (psLen < fullLength || buffer.remaining() < fullLength) {
        throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript length " + psLen);
    }
    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
        // If it isn't there, this may be the 0.11.0 version of ORC.
        // Read the first 3 bytes of the file to check for the header
        byte[] header = new byte[magicLength];
        in.readFully(0, header, 0, magicLength);
        // if it isn't there, this isn't an ORC file
        if (!Text.decode(header, 0, magicLength).equals(OrcFile.MAGIC)) {
            throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript.");
        }
    }
}

From source file:org.apache.orc.impl.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files.//from w w w  . j av a  2s . c o m
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
    int magicLength = OrcFile.MAGIC.length();
    int fullLength = magicLength + 1;
    if (psLen < fullLength || buffer.remaining() < fullLength) {
        throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
    }

    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
        // if it isn't there, this may be 0.11.0 version of the ORC file.
        // Read the first 3 bytes from the buffer to check for the header
        if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
            throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
        }
    }
}

From source file:org.apache.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer.java

License:Apache License

private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(final Scanner s) {

    final Iterator<Entry<Key, Value>> i = s.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override/* w w w  .ja va2s.c  o m*/
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            final Entry<Key, Value> entry = i.next();
            final Value v = entry.getValue();
            try {
                final String dataString = Text.decode(v.get(), 0, v.getSize());
                final Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (final CharacterCodingException e) {
                logger.error("Error decoding value", e);
                throw new QueryEvaluationException(e);
            } catch (final IOException e) {
                logger.error("Error deserializing statement", e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            if (s != null) {
                s.close();
            }
        }
    };
}

From source file:org.apache.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java

License:Apache License

/**
 * An iteration wrapper for a loaded scanner that is returned for each query above.
 *
 * @param scanner//from   www  .ja  v a2  s.  c  om
 *            the results to iterate, then close.
 * @return an anonymous object that will iterate the resulting statements from a given scanner.
 */
private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(
        final ScannerBase scanner) {

    final Iterator<Entry<Key, Value>> i = scanner.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            final Entry<Key, Value> entry = i.next();
            final Value v = entry.getValue();
            try {
                final String dataString = Text.decode(v.get(), 0, v.getSize());
                final Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (final CharacterCodingException e) {
                logger.error("Error decoding value=" + Arrays.toString(v.get()), e);
                throw new QueryEvaluationException(e);
            } catch (final IOException e) {
                logger.error("Error de-serializing statement, string=" + v.get(), e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            scanner.close();
        }
    };
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String getItem(String Key, int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();//from ww  w  .jav  a  2s  .  c o  m
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));

    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    return Text.decode(bOut, key2byteoffset.get(Key), key2bytelength.get(Key)).trim();
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String[] getItems(String[] Keys, int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();/*w  w  w.jav  a  2  s  .  c o  m*/
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));
    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    final int kCount = Keys.length;
    String[] sOut = new String[kCount];
    for (int i = 0; i < kCount; i++) {
        sOut[i] = Text.decode(bOut, key2byteoffset.get(Keys[i]), key2bytelength.get(Keys[i])).trim();
    }
    return sOut;
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String[] getAllItems(int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();//from w  w  w .  j  av a 2s .  co m
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));
    //unzip.setInput(
    //      dataSource.read(docid2offsets[docid],
    //            (docid+1)==docid2offsets.length ? (int)(fileLength-docid2offsets[docid])
    //                                            : (int)(docid2offsets[docid+1] - docid2offsets[docid])));
    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    final int kCount = this.keyCount;
    String[] sOut = new String[kCount];

    for (int i = 0; i < kCount; i++) {
        sOut[i] = Text.decode(bOut, valueByteOffsets[i], valueByteLengths[i]).trim();
    }
    return sOut;
}

From source file:tests.it.crs4.seal.common.TestTextSamMapping.java

License:Open Source License

@Test
public void testFields() throws java.nio.charset.CharacterCodingException {
    TextSamMapping map = new TextSamMapping(new Text(sam));

    assertEquals("ERR020229.100000/1", map.getName());
    assertEquals(89, map.getFlag());/*from  www .  j a  v  a 2  s. c  o  m*/
    assertEquals("chr6", map.getContig());
    assertEquals(3558357, map.get5Position());
    assertEquals(37, map.getMapQ());
    assertEquals("91M", map.getCigarStr());
    assertEquals(91, map.getLength());

    ByteBuffer buf = map.getSequence();
    String s = Text.decode(buf.array(), buf.position(), map.getLength());
    assertEquals("AGCTTCTTTGACTCTCGAATTTTAGCACTAGAAGAAATAGTGAGGATTATATATTTCAGAAGTTCTCACCCAGGATATCAGAACACATTCA",
            s);

    buf = map.getBaseQualities();
    s = Text.decode(buf.array(), buf.position(), map.getLength());
    assertEquals("5:CB:CCBCCB>:C@;BBBB??B;?>1@@=C=4ACCAB3A8=CC=C?CBC=CBCCCCCCCCCCCCC@5>?=?CAAB=3=>====5>=AC?C",
            s);
}