List of usage examples for org.apache.hadoop.io Text decode
public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException
From source file:com.blm.orc.ReaderImpl.java
License:Apache License
/** * Ensure this is an ORC file to prevent users from trying to read text * files or RC files as ORC files./*from w w w .j av a 2s . c o m*/ * @param in the file being read * @param path the filename for error messages * @param psLen the postscript length * @param buffer the tail of the file * @throws IOException */ static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException { int len = OrcFile.MAGIC.length(); if (psLen < len + 1) { throw new IOException("Malformed ORC file " + path + ". Invalid postscript length " + psLen); } int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len; byte[] array = buffer.array(); // now look for the magic string at the end of the postscript. if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) { // If it isn't there, this may be the 0.11.0 version of ORC. // Read the first 3 bytes of the file to check for the header in.seek(0); byte[] header = new byte[len]; in.readFully(header, 0, len); // if it isn't there, this isn't an ORC file if (!Text.decode(header, 0, len).equals(OrcFile.MAGIC)) { throw new IOException("Malformed ORC file " + path + ". Invalid postscript."); } } }
From source file:com.ebay.nest.io.sede.lazy.LazyDate.java
License:Apache License
/** * Initializes LazyDate object by interpreting the input bytes as a SQL date string. * * @param bytes/*from www.j a va 2 s . c o m*/ * @param start * @param length */ @Override public void init(ByteArrayRef bytes, int start, int length) { String s = null; try { s = Text.decode(bytes.getData(), start, length); data.set(Date.valueOf(s)); isNull = false; } catch (Exception e) { isNull = true; logExceptionMessage(bytes, start, length, "DATE"); } }
From source file:com.ebay.nest.io.sede.lazy.LazyDouble.java
License:Apache License
@Override public void init(ByteArrayRef bytes, int start, int length) { String byteData = null;//from w w w . j a v a 2 s.c om try { byteData = Text.decode(bytes.getData(), start, length); data.set(Double.parseDouble(byteData)); isNull = false; } catch (NumberFormatException e) { isNull = true; LOG.debug("Data not in the Double data type range so converted to null. Given data is :" + byteData, e); } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the Double data type range so converted to null.", e); } }
From source file:com.ebay.nest.io.sede.lazy.LazyFloat.java
License:Apache License
@Override public void init(ByteArrayRef bytes, int start, int length) { String byteData = null;/* w w w . ja va 2 s.co m*/ try { byteData = Text.decode(bytes.getData(), start, length); data.set(Float.parseFloat(byteData)); isNull = false; } catch (NumberFormatException e) { isNull = true; LOG.debug("Data not in the Float data type range so converted to null. Given data is :" + byteData, e); } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the Float data type range so converted to null.", e); } }
From source file:com.ebay.nest.io.sede.lazy.LazyHiveVarchar.java
License:Apache License
@Override public void init(ByteArrayRef bytes, int start, int length) { String byteData = null;// w ww . j a va2s .c om try { byteData = Text.decode(bytes.getData(), start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); } }
From source file:com.ebay.nest.io.sede.lazy.LazyPrimitive.java
License:Apache License
public void logExceptionMessage(ByteArrayRef bytes, int start, int length, String dataType) { try {/*w ww . j ava 2s . co m*/ if (LOG.isDebugEnabled()) { String byteData = Text.decode(bytes.getData(), start, length); LOG.debug("Data not in the " + dataType + " data type range so converted to null. Given data is :" + byteData, new Exception("For debugging purposes")); } } catch (CharacterCodingException e1) { LOG.debug("Data not in the " + dataType + " data type range so converted to null.", e1); } }
From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java
License:Apache License
/** * Convert a UTF-8 byte array to String. * * @param bytes// w ww . j av a 2 s . c om * The byte[] containing the UTF-8 String. * @param start * The start position inside the bytes. * @param length * The length of the data, starting from "start" * @return The unicode String */ public static String convertToString(byte[] bytes, int start, int length) { try { return Text.decode(bytes, start, length); } catch (CharacterCodingException e) { return null; } }
From source file:com.ebay.nest.io.sede.MetadataTypedColumnsetSerDe.java
License:Apache License
@Override public Object deserialize(Writable field) throws SerDeException { String row = null;//from w ww. ja v a2s .c o m if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; try { row = Text.decode(b.getBytes(), 0, b.getLength()); } catch (CharacterCodingException e) { throw new SerDeException(e); } } else if (field instanceof Text) { row = field.toString(); } try { deserialize(deserializeCache, row, separator, nullString, splitLimit); if (columnNames != null) { assert (columnNames.size() == deserializeCache.col.size()); } return deserializeCache; } catch (ClassCastException e) { throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e); } catch (Exception e) { throw new SerDeException(e); } }
From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String userID = Text.decode(value.getBytes(), 0, tabIdx1); String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)); String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1)); // System.out.print(userID); // System.out.print("\t"); // System.out.print(pageRank); // System.out.print("\t"); // System.out.println(CSVFollowingIDs); String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER); Integer totalFollowingIDs = followingIDs.length; for (String followingID : followingIDs) { String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString(); context.write(new Text(followingID), new Text(pageRankWithTotalFollowing)); }/*from w w w .java2s . c om*/ context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs)); }
From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIndex = value.find("\t"); userID = Text.decode(value.getBytes(), 0, tabIndex); followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(followerID), new Text(userID)); }