Example usage for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes()

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

public Text getColumn(Text val, int column, String delimiter) throws IOException {
    if (delimiter == null || delimiter.equals("")) {
        throw new IOException("Value of delimiter is empty");
    }//from   w w  w.java  2 s  .  c  om
    int lastOccurance = 0;
    int occurance = 0;
    for (int i = 0; i < column; i++) {
        occurance = val.find(delimiter, lastOccurance) - lastOccurance;
        lastOccurance = lastOccurance + occurance + delimiter.length();
    }

    logger.debug("text value is: " + val);
    int delimiterLength = delimiter.length();
    int startPosition = lastOccurance - (occurance + delimiterLength);
    Text keyColumn = new Text();
    keyColumn.set(val.getBytes(), startPosition, occurance);
    return keyColumn;
}

From source file:co.nubetech.hiho.dedup.HashUtility.java

License:Apache License

public static MD5Hash getMD5Hash(Text key) throws IOException {
    return MD5Hash.digest(key.getBytes());
}

From source file:com.acme.io.JsonLoader.java

License:Apache License

/**
 * Retrieves the next tuple to be processed. Implementations should NOT
 * reuse tuple objects (or inner member objects) they return across calls
 * and should return a different tuple object in each call.
 * @return the next tuple to be processed or null if there are no more
 * tuples to be processed.//from w  w  w .j a v a2 s. co  m
 * @throws IOException if there is an exception while retrieving the next
 * tuple
 */
public Tuple getNext() throws IOException {
    Text val = null;
    try {
        // Read the next key value pair from the record reader.  If it's
        // finished, return null
        if (!reader.nextKeyValue())
            return null;

        // Get the current value.  We don't use the key.
        val = (Text) reader.getCurrentValue();
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }

    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.
    ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes());
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job.
    if (p.nextToken() != JsonToken.START_OBJECT) {
        log.warn("Bad record, could not find start of record " + val.toString());
        return t;
    }

    // Read each field in the record
    for (int i = 0; i < fields.length; i++) {
        t.set(i, readField(p, fields[i], i));
    }

    if (p.nextToken() != JsonToken.END_OBJECT) {
        log.warn("Bad record, could not find end of record " + val.toString());
        return t;
    }
    p.close();
    return t;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 *//*from  w ww.  j  a v  a  2s. c  o m*/
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();
    ts = System.currentTimeMillis();

    try {
        MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey

        Put put = new Put(newRowKey.getBytes());
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            String columnQualifierStr = new String(parser.getQualifier(i));
            String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr);
            if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) {
                continue;
            }
            KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength
                    parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
                    parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
                    parsed.getColumnLength(i));

            KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts,
                    kv.getValue());
            kv = null;
            put.add(newKv);
        }
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put);
    } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (IllegalArgumentException e) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(e);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (RowKeyGeneratorException e) {
        System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage());
        throw new IOException(e);
    } finally {
        totalLineCount.increment(1);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 * //from   ww  w. j  a v  a2  s.c  o m
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        //
        Text[] texts = new Text[parsed.getColumnCount()];
        int index = 0;
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            //            if (i == parser.getRowKeyColumnIndex()){
            //               continue;
            //            }
            text = new Text();
            //?
            text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i));
            texts[index] = text;
            index++;
        }
        writer.set(texts);
        /*
        //rowkey
        String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
                
        // hash rowkey
        String newRowKey = oriRowKey;
        if(rowkeyGenerator != null){
           newRowKey = (String)rowkeyGenerator.generate(oriRowKey);
        }
        */
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey
        //LOG.info("single column newRowKey = " + newRowKey);
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer);
    } catch (BadTsvLineException badLine) {
        if (skipBadLines) {
            LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            badLineCount.increment(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java

License:Apache License

/**
 * Serializes {@link StringOption} object.
 * @param option the target object//from  www.j  a  va2s.  c o  m
 * @param output the target output
 * @throws IOException if I/O error was occurred while serializing the object
 */
public static void serialize(StringOption option, DataOutput output) throws IOException {
    if (option.isNull()) {
        writeCompactInt(UNSIGNED_NULL, output);
    } else {
        Text entity = option.get();
        int length = entity.getLength();
        writeCompactInt(length, output);
        output.write(entity.getBytes(), 0, length);
    }
}

From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java

License:Apache License

/**
 * Deserializes {@link StringOption} object.
 * @param option the target object//from  www .j  a  v a  2  s .com
 * @param input the source input
 * @throws IOException if I/O error was occurred while deserializing the object
 */
public static void deserialize(StringOption option, DataInput input) throws IOException {
    int length = readCompactInt(input);
    if (length == UNSIGNED_NULL) {
        option.setNull();
    } else {
        if (option.isNull() == false) {
            Text entity = option.get();
            if (length == entity.getLength()) {
                // optimize for same-length text properties
                input.readFully(entity.getBytes(), 0, length);
                return;
            }
        } else {
            // set as non-null
            option.reset();
        }
        byte[] buffer = getLocalBuffer(length, Integer.MAX_VALUE);
        input.readFully(buffer, 0, length);
        option.modify(buffer, 0, length);
    }
}

From source file:com.asakusafw.runtime.io.line.BasicLineOutput.java

License:Apache License

private void write(Text entity) throws IOException {
    if (entity.getLength() == 0) {
        return;//from www  .j av  a 2  s.  c om
    }
    ByteBuffer buffer = wrapperCache;
    byte[] b = entity.getBytes();
    if (buffer == null || buffer.array() != b) {
        buffer = ByteBuffer.wrap(b);
        wrapperCache = buffer;
    }
    buffer.position(0);
    buffer.limit(entity.getLength());

    boolean flushing = false;
    CharBuffer cs = charBuffer;
    while (true) {
        cs.clear();
        CoderResult result;
        if (flushing) {
            result = decoder.flush(cs);
        } else {
            result = decoder.decode(buffer, cs, true);
        }
        if (result.isError() == false) {
            cs.flip();
            if (cs.hasRemaining()) {
                writer.append(cs);
            }
            if (result.isUnderflow()) {
                if (flushing) {
                    flushing = true;
                } else {
                    break;
                }
            }
        } else {
            assert result.isError();
            try {
                result.throwException();
            } catch (CharacterCodingException e) {
                throw new IOException(MessageFormat.format("exception occurred while decoding text: {0}", path),
                        e);
            }
        }
    }
}

From source file:com.asakusafw.runtime.io.line.Utf8LineOutput.java

License:Apache License

@Override
public void write(StringOption model) throws IOException {
    if (model == null) {
        throw new IllegalArgumentException("model must not be null"); //$NON-NLS-1$
    }/* w  w w  .  ja  v  a 2 s  .c o  m*/
    if (model.isNull()) {
        return;
    }
    Text entity = model.get();
    output.write(entity.getBytes(), 0, entity.getLength());
    output.write(LINE_BREAK);
}

From source file:com.asakusafw.runtime.io.TsvEmitter.java

License:Apache License

@Override
public void emit(StringOption option) throws IOException {
    startCell();//from  www. j  a va2 s  .  c o m
    if (emitNull(option)) {
        return;
    }
    Text text = option.get();
    if (text.getLength() == 0) {
        return;
    }

    byte[] bytes = text.getBytes();
    ByteBuffer source = ByteBuffer.wrap(bytes, 0, text.getLength());
    decoder.reset();
    decodeBuffer.clear();
    while (true) {
        CoderResult result = decoder.decode(source, decodeBuffer, true);
        if (result.isError()) {
            throw new RecordFormatException(
                    MessageFormat.format("Cannot process a character string (\"{0}\")", result));
        }
        if (result.isUnderflow()) {
            consumeDecoded();
            break;
        }
        if (result.isOverflow()) {
            consumeDecoded();
        }
    }
    while (true) {
        CoderResult result = decoder.flush(decodeBuffer);
        if (result.isError()) {
            throw new RecordFormatException(
                    MessageFormat.format("Cannot process a character string (\"{0}\")", result));
        }
        if (result.isUnderflow()) {
            consumeDecoded();
            break;
        }
        if (result.isOverflow()) {
            consumeDecoded();
        }
    }
}