List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.ebay.erl.mobius.core.model.Tuple.java
License:Apache License
/** * Convert the <code>source</code> into a tuple. * <p>//from www . ja v a 2s .c o m * * Split the <code>source</code> with the given <code>delimiter</code>, * and use them as the values to the returned tuple, then set the * schema to the tuple. * <p> * * The ordering of the schema shall be the same as the ordering of the * values from the splitted <code>source</code>. * <p> * * If the number of values in the splitted <code>source<code> is greater * than the length of <code>schema</code>, <code>IDX_$i</code> is used * as the name of those value, where <code>$i</code> starts from the * length of <code>schema</code>. */ public static Tuple valueOf(Text source, String[] schema, String delimiter) { Tuple tuple = new Tuple(); //String[] tokens = source.toString ().split (delimiter, -1); List<String> tokens = Util.nonRegexSplit(source.toString(), delimiter); for (int i = 0; i < schema.length; i++) { if (i < tokens.size()) { tuple.put(schema[i], tokens.get(i)); } else { tuple.putNull(schema[i]); } } // there are some extra columns that exceed the length of user // specified schema, put in the tail. for (int i = schema.length; i < tokens.size(); i++) { tuple.put("IDX_" + i, tokens.get(i)); } return tuple; }
From source file:com.ebay.nest.FormattedSequenceFile.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w . j a v a 2 s. c o m*/ public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { Object keyObject = sourceCall.getContext()[0]; Text valueObject = (Text) sourceCall.getContext()[1]; Tuple tuple = null; if (sourceCall.getInput().next(keyObject, valueObject)) { tuple = sourceCall.getIncomingEntry().getTuple(); tuple.clear(); String valueString = valueObject.toString(); tuple.addAll(new String[] { valueString }); return true; } return false; }
From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java
License:Apache License
public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { ArrayList<Object> signature = new ArrayList<Object>(); signature.add(structFieldNames);// ww w. ja v a2 s. c o m signature.add(structFieldObjectInspectors); signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(lastColumnTakesRest)); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); if (structFieldComments != null) { signature.add(structFieldComments); } LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature); if (result == null) { result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar); cachedLazySimpleStructObjectInspector.put(signature, result); } return result; }
From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java
License:Apache License
public static LazyListObjectInspector getLazySimpleListObjectInspector( ObjectInspector listElementObjectInspector, byte separator, Text nullSequence, boolean escaped, byte escapeChar) { ArrayList<Object> signature = new ArrayList<Object>(); signature.add(listElementObjectInspector); signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); LazyListObjectInspector result = cachedLazySimpleListObjectInspector.get(signature); if (result == null) { result = new LazyListObjectInspector(listElementObjectInspector, separator, nullSequence, escaped, escapeChar);/*from w ww . j a v a 2 s .c o m*/ cachedLazySimpleListObjectInspector.put(signature, result); } return result; }
From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java
License:Apache License
public static LazyMapObjectInspector getLazySimpleMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector, byte itemSeparator, byte keyValueSeparator, Text nullSequence, boolean escaped, byte escapeChar) { ArrayList<Object> signature = new ArrayList<Object>(); signature.add(mapKeyObjectInspector); signature.add(mapValueObjectInspector); signature.add(Byte.valueOf(itemSeparator)); signature.add(Byte.valueOf(keyValueSeparator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); LazyMapObjectInspector result = cachedLazySimpleMapObjectInspector.get(signature); if (result == null) { result = new LazyMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, itemSeparator, keyValueSeparator, nullSequence, escaped, escapeChar); cachedLazySimpleMapObjectInspector.put(signature, result); }//from www . ja va2s . c o m return result; }
From source file:com.ebay.nest.io.sede.lazy.objectinspector.LazyObjectInspectorFactory.java
License:Apache License
public static LazyUnionObjectInspector getLazyUnionObjectInspector(List<ObjectInspector> ois, byte separator, Text nullSequence, boolean escaped, byte escapeChar) { List<Object> signature = new ArrayList<Object>(); signature.add(ois);/*from w w w . j a v a 2s . c om*/ signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); LazyUnionObjectInspector result = cachedLazyUnionObjectInspector.get(signature); if (result == null) { result = new LazyUnionObjectInspector(ois, separator, nullSequence, escaped, escapeChar); cachedLazyUnionObjectInspector.put(signature, result); } return result; }
From source file:com.ebay.nest.io.sede.objectinspector.primitive.JavaStringObjectInspector.java
License:Apache License
@Override public Object create(Text value) { return value == null ? null : value.toString(); }
From source file:com.ebay.nest.io.sede.objectinspector.primitive.JavaStringObjectInspector.java
License:Apache License
@Override public Object set(Object o, Text value) { return value == null ? null : value.toString(); }
From source file:com.ebay.nest.io.sede.RegexSerDe.java
License:Apache License
@Override public Object deserialize(Writable blob) throws SerDeException { Text rowText = (Text) blob; Matcher m = inputPattern.matcher(rowText.toString()); if (m.groupCount() != numColumns) { throw new SerDeException("Number of matching groups doesn't match the number of columns"); }//w w w . j a v a 2 s .co m // If do not match, ignore the line, return a row with all nulls. if (!m.matches()) { unmatchedRowsCount++; if (!alreadyLoggedNoMatch) { // Report the row if its the first time LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText); alreadyLoggedNoMatch = true; } return null; } // Otherwise, return the row. for (int c = 0; c < numColumns; c++) { try { String t = m.group(c + 1); TypeInfo typeInfo = columnTypes.get(c); String typeName = typeInfo.getTypeName(); // Convert the column to the correct type when needed and set in row obj if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) { row.set(c, t); } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) { Byte b; b = Byte.valueOf(t); row.set(c, b); } else if (typeName.equals(serdeConstants.SMALLINT_TYPE_NAME)) { Short s; s = Short.valueOf(t); row.set(c, s); } else if (typeName.equals(serdeConstants.INT_TYPE_NAME)) { Integer i; i = Integer.valueOf(t); row.set(c, i); } else if (typeName.equals(serdeConstants.BIGINT_TYPE_NAME)) { Long l; l = Long.valueOf(t); row.set(c, l); } else if (typeName.equals(serdeConstants.FLOAT_TYPE_NAME)) { Float f; f = Float.valueOf(t); row.set(c, f); } else if (typeName.equals(serdeConstants.DOUBLE_TYPE_NAME)) { Double d; d = Double.valueOf(t); row.set(c, d); } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { Boolean b; b = Boolean.valueOf(t); row.set(c, b); } else if (typeName.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { Timestamp ts; ts = Timestamp.valueOf(t); row.set(c, ts); } else if (typeName.equals(serdeConstants.DATE_TYPE_NAME)) { Date d; d = Date.valueOf(t); row.set(c, d); } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) { HiveDecimal bd; bd = new HiveDecimal(t); row.set(c, bd); } else if (typeInfo instanceof PrimitiveTypeInfo && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) { VarcharTypeParams varcharParams = (VarcharTypeParams) ParameterizedPrimitiveTypeUtils .getTypeParamsFromTypeInfo(typeInfo); HiveVarchar hv = new HiveVarchar(t, varcharParams != null ? varcharParams.length : -1); row.set(c, hv); } } catch (RuntimeException e) { partialMatchedRowsCount++; if (!alreadyLoggedPartialMatch) { // Report the row if its the first row LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText); alreadyLoggedPartialMatch = true; } row.set(c, null); } } return row; }
From source file:com.ebay.nest.io.sede.SerDeUtils.java
License:Apache License
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) { switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (o == null) { sb.append(nullStr);// w ww . j av a 2 s . c o m } else { switch (poi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) poi).get(o); sb.append(b ? "true" : "false"); break; } case BYTE: { sb.append(((ByteObjectInspector) poi).get(o)); break; } case SHORT: { sb.append(((ShortObjectInspector) poi).get(o)); break; } case INT: { sb.append(((IntObjectInspector) poi).get(o)); break; } case LONG: { sb.append(((LongObjectInspector) poi).get(o)); break; } case FLOAT: { sb.append(((FloatObjectInspector) poi).get(o)); break; } case DOUBLE: { sb.append(((DoubleObjectInspector) poi).get(o)); break; } case STRING: { sb.append('"'); sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o))); sb.append('"'); break; } case VARCHAR: { sb.append('"'); sb.append( escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString())); sb.append('"'); break; } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case TIMESTAMP: { sb.append('"'); sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o)); sb.append('"'); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); Text txt = new Text(); txt.set(bw.getBytes(), 0, bw.getLength()); sb.append(txt.toString()); break; } case DECIMAL: { sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); break; } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); } } break; } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector(); List<?> olist = loi.getList(o); if (olist == null) { sb.append(nullStr); } else { sb.append(LBRACKET); for (int i = 0; i < olist.size(); i++) { if (i > 0) { sb.append(COMMA); } buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL); } sb.append(RBRACKET); } break; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector(); Map<?, ?> omap = moi.getMap(o); if (omap == null) { sb.append(nullStr); } else { sb.append(LBRACE); boolean first = true; for (Object entry : omap.entrySet()) { if (first) { first = false; } else { sb.append(COMMA); } Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry; buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL); sb.append(COLON); buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL); } sb.append(RBRACE); } break; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> structFields = soi.getAllStructFieldRefs(); if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); for (int i = 0; i < structFields.size(); i++) { if (i > 0) { sb.append(COMMA); } sb.append(QUOTE); sb.append(structFields.get(i).getFieldName()); sb.append(QUOTE); sb.append(COLON); buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector(), JSON_NULL); } sb.append(RBRACE); } break; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; if (o == null) { sb.append(nullStr); } else { sb.append(LBRACE); sb.append(uoi.getTag(o)); sb.append(COLON); buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL); sb.append(RBRACE); } break; } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } }