List of usage examples for org.apache.hadoop.io Text decode
public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException
From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java
License:Apache License
/** * An iteration wrapper for a loaded scanner that is returned for each query above. */*from w w w . j av a 2 s .c om*/ * @param scanner * the results to iterate, then close. * @return an anonymous object that will iterate the resulting statements from a given scanner. */ private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper( final ScannerBase scanner) { final Iterator<Entry<Key, Value>> i = scanner.iterator(); return new CloseableIteration<Statement, QueryEvaluationException>() { @Override public boolean hasNext() { return i.hasNext(); } @Override public Statement next() throws QueryEvaluationException { Entry<Key, Value> entry = i.next(); Value v = entry.getValue(); try { String dataString = Text.decode(v.get(), 0, v.getSize()); Statement s = StatementSerializer.readStatement(dataString); return s; } catch (CharacterCodingException e) { logger.error("Error decoding value=" + Arrays.toString(v.get()), e); throw new QueryEvaluationException(e); } catch (IOException e) { logger.error("Error de-serializing statement, string=" + v.get(), e); throw new QueryEvaluationException(e); } } @Override public void remove() { throw new UnsupportedOperationException("Remove not implemented"); } @Override public void close() throws QueryEvaluationException { scanner.close(); } }; }
From source file:org.apache.mahout.utils.nlp.collocations.llr.Gram.java
License:Apache License
/** * @return gram term string/* www . ja v a 2 s.c om*/ */ public String getString() { try { return Text.decode(bytes, 1, length - 1); } catch (CharacterCodingException e) { throw new IllegalStateException("Should not have happened " + e.toString()); } }
From source file:org.apache.orc.impl.ReaderImpl.java
License:Apache License
/** * Ensure this is an ORC file to prevent users from trying to read text * files or RC files as ORC files.//from w ww. jav a2 s . com * @param in the file being read * @param path the filename for error messages * @param psLen the postscript length * @param buffer the tail of the file * @throws IOException */ protected static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException { int magicLength = OrcFile.MAGIC.length(); int fullLength = magicLength + 1; if (psLen < fullLength || buffer.remaining() < fullLength) { throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript length " + psLen); } int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength; byte[] array = buffer.array(); // now look for the magic string at the end of the postscript. if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) { // If it isn't there, this may be the 0.11.0 version of ORC. // Read the first 3 bytes of the file to check for the header byte[] header = new byte[magicLength]; in.readFully(0, header, 0, magicLength); // if it isn't there, this isn't an ORC file if (!Text.decode(header, 0, magicLength).equals(OrcFile.MAGIC)) { throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript."); } } }
From source file:org.apache.orc.impl.ReaderImpl.java
License:Apache License
/** * Ensure this is an ORC file to prevent users from trying to read text * files or RC files as ORC files.//from w w w . j av a 2s . c o m * @param psLen the postscript length * @param buffer the tail of the file * @throws IOException */ protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException { int magicLength = OrcFile.MAGIC.length(); int fullLength = magicLength + 1; if (psLen < fullLength || buffer.remaining() < fullLength) { throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen); } int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength; byte[] array = buffer.array(); // now look for the magic string at the end of the postscript. if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) { // if it isn't there, this may be 0.11.0 version of the ORC file. // Read the first 3 bytes from the buffer to check for the header if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) { throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen); } } }
From source file:org.apache.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer.java
License:Apache License
private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(final Scanner s) { final Iterator<Entry<Key, Value>> i = s.iterator(); return new CloseableIteration<Statement, QueryEvaluationException>() { @Override/* w w w .ja va2s.c o m*/ public boolean hasNext() { return i.hasNext(); } @Override public Statement next() throws QueryEvaluationException { final Entry<Key, Value> entry = i.next(); final Value v = entry.getValue(); try { final String dataString = Text.decode(v.get(), 0, v.getSize()); final Statement s = StatementSerializer.readStatement(dataString); return s; } catch (final CharacterCodingException e) { logger.error("Error decoding value", e); throw new QueryEvaluationException(e); } catch (final IOException e) { logger.error("Error deserializing statement", e); throw new QueryEvaluationException(e); } } @Override public void remove() { throw new UnsupportedOperationException("Remove not implemented"); } @Override public void close() throws QueryEvaluationException { if (s != null) { s.close(); } } }; }
From source file:org.apache.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java
License:Apache License
/** * An iteration wrapper for a loaded scanner that is returned for each query above. * * @param scanner//from www .ja v a2 s. c om * the results to iterate, then close. * @return an anonymous object that will iterate the resulting statements from a given scanner. */ private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper( final ScannerBase scanner) { final Iterator<Entry<Key, Value>> i = scanner.iterator(); return new CloseableIteration<Statement, QueryEvaluationException>() { @Override public boolean hasNext() { return i.hasNext(); } @Override public Statement next() throws QueryEvaluationException { final Entry<Key, Value> entry = i.next(); final Value v = entry.getValue(); try { final String dataString = Text.decode(v.get(), 0, v.getSize()); final Statement s = StatementSerializer.readStatement(dataString); return s; } catch (final CharacterCodingException e) { logger.error("Error decoding value=" + Arrays.toString(v.get()), e); throw new QueryEvaluationException(e); } catch (final IOException e) { logger.error("Error de-serializing statement, string=" + v.get(), e); throw new QueryEvaluationException(e); } } @Override public void remove() { throw new UnsupportedOperationException("Remove not implemented"); } @Override public void close() throws QueryEvaluationException { scanner.close(); } }; }
From source file:org.terrier.structures.CompressingMetaIndex.java
License:Mozilla Public License
/** {@inheritDoc} */ public String getItem(String Key, int docid) throws IOException { Inflater unzip = inflaterCache.get(); unzip.reset();//from ww w .jav a 2s . c o m unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid))); byte[] bOut = new byte[recordLength]; try { unzip.inflate(bOut); } catch (DataFormatException dfe) { logger.error(dfe); } return Text.decode(bOut, key2byteoffset.get(Key), key2bytelength.get(Key)).trim(); }
From source file:org.terrier.structures.CompressingMetaIndex.java
License:Mozilla Public License
/** {@inheritDoc} */ public String[] getItems(String[] Keys, int docid) throws IOException { Inflater unzip = inflaterCache.get(); unzip.reset();/*w w w.jav a 2 s . c o m*/ unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid))); byte[] bOut = new byte[recordLength]; try { unzip.inflate(bOut); } catch (DataFormatException dfe) { logger.error(dfe); } final int kCount = Keys.length; String[] sOut = new String[kCount]; for (int i = 0; i < kCount; i++) { sOut[i] = Text.decode(bOut, key2byteoffset.get(Keys[i]), key2bytelength.get(Keys[i])).trim(); } return sOut; }
From source file:org.terrier.structures.CompressingMetaIndex.java
License:Mozilla Public License
/** {@inheritDoc} */ public String[] getAllItems(int docid) throws IOException { Inflater unzip = inflaterCache.get(); unzip.reset();//from w w w . j av a 2s . co m unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid))); //unzip.setInput( // dataSource.read(docid2offsets[docid], // (docid+1)==docid2offsets.length ? (int)(fileLength-docid2offsets[docid]) // : (int)(docid2offsets[docid+1] - docid2offsets[docid]))); byte[] bOut = new byte[recordLength]; try { unzip.inflate(bOut); } catch (DataFormatException dfe) { logger.error(dfe); } final int kCount = this.keyCount; String[] sOut = new String[kCount]; for (int i = 0; i < kCount; i++) { sOut[i] = Text.decode(bOut, valueByteOffsets[i], valueByteLengths[i]).trim(); } return sOut; }
From source file:tests.it.crs4.seal.common.TestTextSamMapping.java
License:Open Source License
@Test public void testFields() throws java.nio.charset.CharacterCodingException { TextSamMapping map = new TextSamMapping(new Text(sam)); assertEquals("ERR020229.100000/1", map.getName()); assertEquals(89, map.getFlag());/*from www . j a v a 2 s. c o m*/ assertEquals("chr6", map.getContig()); assertEquals(3558357, map.get5Position()); assertEquals(37, map.getMapQ()); assertEquals("91M", map.getCigarStr()); assertEquals(91, map.getLength()); ByteBuffer buf = map.getSequence(); String s = Text.decode(buf.array(), buf.position(), map.getLength()); assertEquals("AGCTTCTTTGACTCTCGAATTTTAGCACTAGAAGAAATAGTGAGGATTATATATTTCAGAAGTTCTCACCCAGGATATCAGAACACATTCA", s); buf = map.getBaseQualities(); s = Text.decode(buf.array(), buf.position(), map.getLength()); assertEquals("5:CB:CCBCCB>:C@;BBBB??B;?>1@@=C=4ACCAB3A8=CC=C?CBC=CBCCCCCCCCCCCCC@5>?=?CAAB=3=>====5>=AC?C", s); }