Example usage for org.apache.hadoop.io Text decode

List of usage examples for org.apache.hadoop.io Text decode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text decode.

Prototype

public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException 

Source Link

Usage

From source file:com.blm.orc.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files./*from w  w w  .j av  a  2s  .  c o  m*/
 * @param in the file being read
 * @param path the filename for error messages
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException {
    int len = OrcFile.MAGIC.length();
    if (psLen < len + 1) {
        throw new IOException("Malformed ORC file " + path + ". Invalid postscript length " + psLen);
    }
    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
        // If it isn't there, this may be the 0.11.0 version of ORC.
        // Read the first 3 bytes of the file to check for the header
        in.seek(0);
        byte[] header = new byte[len];
        in.readFully(header, 0, len);
        // if it isn't there, this isn't an ORC file
        if (!Text.decode(header, 0, len).equals(OrcFile.MAGIC)) {
            throw new IOException("Malformed ORC file " + path + ". Invalid postscript.");
        }
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyDate.java

License:Apache License

/**
 * Initializes LazyDate object by interpreting the input bytes as a SQL date string.
 *
 * @param bytes/*from www.j a  va 2  s .  c  o  m*/
 * @param start
 * @param length
 */
@Override
public void init(ByteArrayRef bytes, int start, int length) {
    String s = null;
    try {
        s = Text.decode(bytes.getData(), start, length);
        data.set(Date.valueOf(s));
        isNull = false;
    } catch (Exception e) {
        isNull = true;
        logExceptionMessage(bytes, start, length, "DATE");
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyDouble.java

License:Apache License

@Override
public void init(ByteArrayRef bytes, int start, int length) {
    String byteData = null;//from w  w w . j  a v a  2 s.c om
    try {
        byteData = Text.decode(bytes.getData(), start, length);
        data.set(Double.parseDouble(byteData));
        isNull = false;
    } catch (NumberFormatException e) {
        isNull = true;
        LOG.debug("Data not in the Double data type range so converted to null. Given data is :" + byteData, e);
    } catch (CharacterCodingException e) {
        isNull = true;
        LOG.debug("Data not in the Double data type range so converted to null.", e);
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyFloat.java

License:Apache License

@Override
public void init(ByteArrayRef bytes, int start, int length) {
    String byteData = null;/* w w  w  .  ja va  2  s.co  m*/
    try {
        byteData = Text.decode(bytes.getData(), start, length);
        data.set(Float.parseFloat(byteData));
        isNull = false;
    } catch (NumberFormatException e) {
        isNull = true;
        LOG.debug("Data not in the Float data type range so converted to null. Given data is :" + byteData, e);
    } catch (CharacterCodingException e) {
        isNull = true;
        LOG.debug("Data not in the Float data type range so converted to null.", e);
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyHiveVarchar.java

License:Apache License

@Override
public void init(ByteArrayRef bytes, int start, int length) {
    String byteData = null;//  w  ww  .  j a  va2s .c om
    try {
        byteData = Text.decode(bytes.getData(), start, length);
        data.set(byteData, maxLength);
        isNull = false;
    } catch (CharacterCodingException e) {
        isNull = true;
        LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e);
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyPrimitive.java

License:Apache License

public void logExceptionMessage(ByteArrayRef bytes, int start, int length, String dataType) {
    try {/*w ww . j ava 2s . co m*/
        if (LOG.isDebugEnabled()) {
            String byteData = Text.decode(bytes.getData(), start, length);
            LOG.debug("Data not in the " + dataType + " data type range so converted to null. Given data is :"
                    + byteData, new Exception("For debugging purposes"));
        }
    } catch (CharacterCodingException e1) {
        LOG.debug("Data not in the " + dataType + " data type range so converted to null.", e1);
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java

License:Apache License

/**
 * Convert a UTF-8 byte array to String.
 *
 * @param bytes//  w ww .  j  av  a 2 s  . c  om
 *          The byte[] containing the UTF-8 String.
 * @param start
 *          The start position inside the bytes.
 * @param length
 *          The length of the data, starting from "start"
 * @return The unicode String
 */
public static String convertToString(byte[] bytes, int start, int length) {
    try {
        return Text.decode(bytes, start, length);
    } catch (CharacterCodingException e) {
        return null;
    }
}

From source file:com.ebay.nest.io.sede.MetadataTypedColumnsetSerDe.java

License:Apache License

@Override
public Object deserialize(Writable field) throws SerDeException {
    String row = null;//from w  ww. ja  v a2s  .c  o  m
    if (field instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) field;
        try {
            row = Text.decode(b.getBytes(), 0, b.getLength());
        } catch (CharacterCodingException e) {
            throw new SerDeException(e);
        }
    } else if (field instanceof Text) {
        row = field.toString();
    }
    try {
        deserialize(deserializeCache, row, separator, nullString, splitLimit);
        if (columnNames != null) {
            assert (columnNames.size() == deserializeCache.col.size());
        }
        return deserializeCache;
    } catch (ClassCastException e) {
        throw new SerDeException(this.getClass().getName() + " expects Text or BytesWritable", e);
    } catch (Exception e) {
        throw new SerDeException(e);
    }
}

From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String userID = Text.decode(value.getBytes(), 0, tabIdx1);
    String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1));
    String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1));

    //        System.out.print(userID);
    //        System.out.print("\t");
    //        System.out.print(pageRank);
    //        System.out.print("\t");
    //        System.out.println(CSVFollowingIDs);

    String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER);
    Integer totalFollowingIDs = followingIDs.length;
    for (String followingID : followingIDs) {
        String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString();

        context.write(new Text(followingID), new Text(pageRankWithTotalFollowing));
    }/*from  w  w  w .java2s .  c om*/

    context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs));
}

From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIndex = value.find("\t");

    userID = Text.decode(value.getBytes(), 0, tabIndex);
    followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
    context.write(new Text(followerID), new Text(userID));
}