Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.ebay.erl.mobius.core.model.Tuple.java

License:Apache License

/**
 * Convert the <code>source</code> into a tuple.
 * <p>//from www .  ja  v  a 2s  .c o m
 * 
 * Split the <code>source</code> with the given <code>delimiter</code>,
 * and use them as the values to the returned tuple, then set the
 * schema to the tuple.
 * <p>
 * 
 * The ordering of the schema shall be the same as the ordering of the
 * values from the splitted <code>source</code>.
 * <p>
 * 
 * If the number of values in the splitted <code>source<code> is greater
 * than the length of <code>schema</code>, <code>IDX_$i</code> is used
 * as the name of those value, where <code>$i</code> starts from the 
 * length of <code>schema</code>.
 */
public static Tuple valueOf(Text source, String[] schema, String delimiter) {

    Tuple tuple = new Tuple();

    //String[] tokens = source.toString ().split (delimiter, -1);
    List<String> tokens = Util.nonRegexSplit(source.toString(), delimiter);

    for (int i = 0; i < schema.length; i++) {
        if (i < tokens.size()) {
            tuple.put(schema[i], tokens.get(i));
        } else {
            tuple.putNull(schema[i]);
        }
    }

    // there are some extra columns that exceed the length of user
    // specified schema, put in the tail.
    for (int i = schema.length; i < tokens.size(); i++) {
        tuple.put("IDX_" + i, tokens.get(i));
    }

    return tuple;
}

From source file:com.ebay.nest.FormattedSequenceFile.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from w  w w . j a  v a  2  s. c o m*/
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
        throws IOException {
    Object keyObject = sourceCall.getContext()[0];
    Text valueObject = (Text) sourceCall.getContext()[1];

    Tuple tuple = null;
    if (sourceCall.getInput().next(keyObject, valueObject)) {
        tuple = sourceCall.getIncomingEntry().getTuple();
        tuple.clear();
        String valueString = valueObject.toString();
        tuple.addAll(new String[] { valueString });
        return true;
    }
    return false;
}

From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java

License:Apache License

public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames,
        List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments, byte separator,
        Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) {
    ArrayList<Object> signature = new ArrayList<Object>();
    signature.add(structFieldNames);// ww w.  ja  v a2  s. c  o m
    signature.add(structFieldObjectInspectors);
    signature.add(Byte.valueOf(separator));
    signature.add(nullSequence.toString());
    signature.add(Boolean.valueOf(lastColumnTakesRest));
    signature.add(Boolean.valueOf(escaped));
    signature.add(Byte.valueOf(escapeChar));
    if (structFieldComments != null) {
        signature.add(structFieldComments);
    }
    LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature);
    if (result == null) {
        result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
                structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar);
        cachedLazySimpleStructObjectInspector.put(signature, result);
    }
    return result;
}

From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java

License:Apache License

public static LazyListObjectInspector getLazySimpleListObjectInspector(
        ObjectInspector listElementObjectInspector, byte separator, Text nullSequence, boolean escaped,
        byte escapeChar) {
    ArrayList<Object> signature = new ArrayList<Object>();
    signature.add(listElementObjectInspector);
    signature.add(Byte.valueOf(separator));
    signature.add(nullSequence.toString());
    signature.add(Boolean.valueOf(escaped));
    signature.add(Byte.valueOf(escapeChar));
    LazyListObjectInspector result = cachedLazySimpleListObjectInspector.get(signature);
    if (result == null) {
        result = new LazyListObjectInspector(listElementObjectInspector, separator, nullSequence, escaped,
                escapeChar);/*from   w ww  .  j a  v a 2  s  .c o m*/
        cachedLazySimpleListObjectInspector.put(signature, result);
    }
    return result;
}

From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java

License:Apache License

public static LazyMapObjectInspector getLazySimpleMapObjectInspector(ObjectInspector mapKeyObjectInspector,
        ObjectInspector mapValueObjectInspector, byte itemSeparator, byte keyValueSeparator, Text nullSequence,
        boolean escaped, byte escapeChar) {
    ArrayList<Object> signature = new ArrayList<Object>();
    signature.add(mapKeyObjectInspector);
    signature.add(mapValueObjectInspector);
    signature.add(Byte.valueOf(itemSeparator));
    signature.add(Byte.valueOf(keyValueSeparator));
    signature.add(nullSequence.toString());
    signature.add(Boolean.valueOf(escaped));
    signature.add(Byte.valueOf(escapeChar));
    LazyMapObjectInspector result = cachedLazySimpleMapObjectInspector.get(signature);
    if (result == null) {
        result = new LazyMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, itemSeparator,
                keyValueSeparator, nullSequence, escaped, escapeChar);
        cachedLazySimpleMapObjectInspector.put(signature, result);
    }//from  www  .  ja  va2s  . c  o  m
    return result;
}

From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java

License:Apache License

public static LazyUnionObjectInspector getLazyUnionObjectInspector(List<ObjectInspector> ois, byte separator,
        Text nullSequence, boolean escaped, byte escapeChar) {
    List<Object> signature = new ArrayList<Object>();
    signature.add(ois);/*from   w  w  w . j a v a  2s  .  c  om*/
    signature.add(Byte.valueOf(separator));
    signature.add(nullSequence.toString());
    signature.add(Boolean.valueOf(escaped));
    signature.add(Byte.valueOf(escapeChar));
    LazyUnionObjectInspector result = cachedLazyUnionObjectInspector.get(signature);
    if (result == null) {
        result = new LazyUnionObjectInspector(ois, separator, nullSequence, escaped, escapeChar);
        cachedLazyUnionObjectInspector.put(signature, result);
    }
    return result;
}

From source file:com.ebay.nest.io.sede.objectinspector.primitive.JavaStringObjectInspector.java

License:Apache License

@Override
public Object create(Text value) {
    return value == null ? null : value.toString();
}

From source file:com.ebay.nest.io.sede.objectinspector.primitive.JavaStringObjectInspector.java

License:Apache License

@Override
public Object set(Object o, Text value) {
    return value == null ? null : value.toString();
}

From source file:com.ebay.nest.io.sede.RegexSerDe.java

License:Apache License

@Override
public Object deserialize(Writable blob) throws SerDeException {

    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());

    if (m.groupCount() != numColumns) {
        throw new SerDeException("Number of matching groups doesn't match the number of columns");
    }//w w  w  .  j a  v a  2  s  .co  m

    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        unmatchedRowsCount++;
        if (!alreadyLoggedNoMatch) {
            // Report the row if its the first time
            LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
            alreadyLoggedNoMatch = true;
        }
        return null;
    }

    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            String t = m.group(c + 1);
            TypeInfo typeInfo = columnTypes.get(c);
            String typeName = typeInfo.getTypeName();

            // Convert the column to the correct type when needed and set in row obj
            if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
                row.set(c, t);
            } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) {
                Byte b;
                b = Byte.valueOf(t);
                row.set(c, b);
            } else if (typeName.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
                Short s;
                s = Short.valueOf(t);
                row.set(c, s);
            } else if (typeName.equals(serdeConstants.INT_TYPE_NAME)) {
                Integer i;
                i = Integer.valueOf(t);
                row.set(c, i);
            } else if (typeName.equals(serdeConstants.BIGINT_TYPE_NAME)) {
                Long l;
                l = Long.valueOf(t);
                row.set(c, l);
            } else if (typeName.equals(serdeConstants.FLOAT_TYPE_NAME)) {
                Float f;
                f = Float.valueOf(t);
                row.set(c, f);
            } else if (typeName.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
                Double d;
                d = Double.valueOf(t);
                row.set(c, d);
            } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
                Boolean b;
                b = Boolean.valueOf(t);
                row.set(c, b);
            } else if (typeName.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
                Timestamp ts;
                ts = Timestamp.valueOf(t);
                row.set(c, ts);
            } else if (typeName.equals(serdeConstants.DATE_TYPE_NAME)) {
                Date d;
                d = Date.valueOf(t);
                row.set(c, d);
            } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
                HiveDecimal bd;
                bd = new HiveDecimal(t);
                row.set(c, bd);
            } else if (typeInfo instanceof PrimitiveTypeInfo
                    && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
                VarcharTypeParams varcharParams = (VarcharTypeParams) ParameterizedPrimitiveTypeUtils
                        .getTypeParamsFromTypeInfo(typeInfo);
                HiveVarchar hv = new HiveVarchar(t, varcharParams != null ? varcharParams.length : -1);
                row.set(c, hv);
            }
        } catch (RuntimeException e) {
            partialMatchedRowsCount++;
            if (!alreadyLoggedPartialMatch) {
                // Report the row if its the first row
                LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, "
                        + " cannot find group " + c + ": " + rowText);
                alreadyLoggedPartialMatch = true;
            }
            row.set(c, null);
        }
    }
    return row;
}

From source file:com.ebay.nest.io.sede.SerDeUtils.java

License:Apache License

static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {

    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        if (o == null) {
            sb.append(nullStr);//  w  ww .  j av a 2  s  . c o m
        } else {
            switch (poi.getPrimitiveCategory()) {
            case BOOLEAN: {
                boolean b = ((BooleanObjectInspector) poi).get(o);
                sb.append(b ? "true" : "false");
                break;
            }
            case BYTE: {
                sb.append(((ByteObjectInspector) poi).get(o));
                break;
            }
            case SHORT: {
                sb.append(((ShortObjectInspector) poi).get(o));
                break;
            }
            case INT: {
                sb.append(((IntObjectInspector) poi).get(o));
                break;
            }
            case LONG: {
                sb.append(((LongObjectInspector) poi).get(o));
                break;
            }
            case FLOAT: {
                sb.append(((FloatObjectInspector) poi).get(o));
                break;
            }
            case DOUBLE: {
                sb.append(((DoubleObjectInspector) poi).get(o));
                break;
            }
            case STRING: {
                sb.append('"');
                sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o)));
                sb.append('"');
                break;
            }
            case VARCHAR: {
                sb.append('"');
                sb.append(
                        escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
                sb.append('"');
                break;
            }
            case DATE: {
                sb.append('"');
                sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o));
                sb.append('"');
                break;
            }
            case TIMESTAMP: {
                sb.append('"');
                sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o));
                sb.append('"');
                break;
            }
            case BINARY: {
                BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
                Text txt = new Text();
                txt.set(bw.getBytes(), 0, bw.getLength());
                sb.append(txt.toString());
                break;
            }
            case DECIMAL: {
                sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
                break;
            }
            default:
                throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
            }
        }
        break;
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
        List<?> olist = loi.getList(o);
        if (olist == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACKET);
            for (int i = 0; i < olist.size(); i++) {
                if (i > 0) {
                    sb.append(COMMA);
                }
                buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
            }
            sb.append(RBRACKET);
        }
        break;
    }
    case MAP: {
        MapObjectInspector moi = (MapObjectInspector) oi;
        ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
        ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
        Map<?, ?> omap = moi.getMap(o);
        if (omap == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            boolean first = true;
            for (Object entry : omap.entrySet()) {
                if (first) {
                    first = false;
                } else {
                    sb.append(COMMA);
                }
                Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
                buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
                sb.append(COLON);
                buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
            }
            sb.append(RBRACE);
        }
        break;
    }
    case STRUCT: {
        StructObjectInspector soi = (StructObjectInspector) oi;
        List<? extends StructField> structFields = soi.getAllStructFieldRefs();
        if (o == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            for (int i = 0; i < structFields.size(); i++) {
                if (i > 0) {
                    sb.append(COMMA);
                }
                sb.append(QUOTE);
                sb.append(structFields.get(i).getFieldName());
                sb.append(QUOTE);
                sb.append(COLON);
                buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
                        structFields.get(i).getFieldObjectInspector(), JSON_NULL);
            }
            sb.append(RBRACE);
        }
        break;
    }
    case UNION: {
        UnionObjectInspector uoi = (UnionObjectInspector) oi;
        if (o == null) {
            sb.append(nullStr);
        } else {
            sb.append(LBRACE);
            sb.append(uoi.getTag(o));
            sb.append(COLON);
            buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
            sb.append(RBRACE);
        }
        break;
    }
    default:
        throw new RuntimeException("Unknown type in ObjectInspector!");
    }
}