List of usage examples for org.apache.hadoop.io Text Text
public Text()
From source file:co.cask.cdap.data.stream.TextStreamInputFormat.java
License:Apache License
@Override protected StreamEventDecoder<LongWritable, Text> createStreamEventDecoder() { return new StreamEventDecoder<LongWritable, Text>() { private final LongWritable key = new LongWritable(); private final Text value = new Text(); @Override/*from w ww . ja va2 s .c om*/ public DecodeResult<LongWritable, Text> decode(StreamEvent event, DecodeResult<LongWritable, Text> result) { key.set(event.getTimestamp()); value.set(Charsets.UTF_8.decode(event.getBody()).toString()); return result.setKey(key).setValue(value); } }; }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
/** * // ww w .j av a 2 s . com * @param delimiter * @param column * * */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF); this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
public boolean nextKeyValue() throws IOException { if (value == null) { value = new Text(); }//from w ww.ja va 2 s . com int newSize = 0; while (pos < end) { newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); if (newSize == 0) { break; } pos += newSize; if (newSize < maxLineLength) { break; } // line too long. try again logger.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } if (newSize == 0) { key = null; value = null; return false; } else { // we calculate the key from the value here if (value != null) { logger.debug("Value is: " + value); logger.debug("Column is: " + column); logger.debug("Delimiter is: " + delimiter); key = getColumn(value, column, delimiter); logger.debug("Value after generating keyColumn: " + value); logger.debug("Key is: " + key); } return true; } }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
public Text getColumn(Text val, int column, String delimiter) throws IOException { if (delimiter == null || delimiter.equals("")) { throw new IOException("Value of delimiter is empty"); }//w ww .j a v a 2 s.co m int lastOccurance = 0; int occurance = 0; for (int i = 0; i < column; i++) { occurance = val.find(delimiter, lastOccurance) - lastOccurance; lastOccurance = lastOccurance + occurance + delimiter.length(); } logger.debug("text value is: " + val); int delimiterLength = delimiter.length(); int startPosition = lastOccurance - (occurance + delimiterLength); Text keyColumn = new Text(); keyColumn.set(val.getBytes(), startPosition, occurance); return keyColumn; }
From source file:co.nubetech.hiho.dedup.HihoTuple.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { logger.debug("Reading fields"); hash = new MD5Hash(); hash.readFields(in);/*from w w w . ja v a2 s. c o m*/ keyClass = new Text(); keyClass.readFields(in); try { logger.debug("Key class in readField() of HihoTuple class is :" + keyClass); key = (K) Class.forName(keyClass.toString()).newInstance(); } catch (Exception e) { e.printStackTrace(); throw new IOException("Error in serializing the HihoTuple ", e); } key.readFields(in); }
From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java
License:Apache License
@Override public Text createKey() { logger.debug("Creating key"); return new Text(); }
From source file:co.nubetech.hiho.mapreduce.DBInputDelimMapper.java
License:Apache License
public DBInputDelimMapper() { outkey = new Text(); outval = new Text(); }
From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java
License:Apache License
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Configuration conf = new Configuration(); conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, ","); when(context.getConfiguration()).thenReturn(conf); DBInputDelimMapper mapper = new DBInputDelimMapper(); ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn"); ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn"); ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn"); ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn"); ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn"); ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn"); ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn"); ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn"); ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn"); ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, "floatColumn"); ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>(); columns.add(intColumn);//from w w w . j a v a2 s .c o m columns.add(stringColumn); columns.add(dateColumn); columns.add(longColumn); columns.add(booleanColumn); columns.add(doubleColumn); columns.add(charColumn); columns.add(timeColumn); columns.add(timeStampColumn); columns.add(floatColumn); ArrayList<Comparable> values = new ArrayList<Comparable>(); values.add(new Integer(12)); values.add(new String("sam")); values.add(new Date()); values.add(new Long(26564l)); values.add(true); values.add(1.235); values.add('a'); values.add(new Time(new Date().getTime())); values.add(new Time(new Date().getTime())); values.add(new Float(1.0f)); GenericDBWritable val = new GenericDBWritable(columns, values); LongWritable key = new LongWritable(1); mapper.map(key, val, context); Text outkey = new Text(); Text outval = new Text(); StringBuilder builder = new StringBuilder(); builder.append(new Integer(12) + "," + new String("sam") + "," + new Date() + "," + new Long(26564l) + "," + true + "," + 1.235 + "," + 'a' + "," + new Time(new Date().getTime()) + "," + new Time(new Date().getTime()) + "," + new Float(1.0f)); outval.set(builder.toString()); verify(context).write(outkey, outval); }
From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java
License:Apache License
@Test public final void testMapperValidValuesDelmiter() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Configuration conf = new Configuration(); String delimiter = "DELIM"; conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, delimiter); when(context.getConfiguration()).thenReturn(conf); DBInputDelimMapper mapper = new DBInputDelimMapper(); ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn"); ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn"); ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn"); ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn"); ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn"); ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn"); ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn"); ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn"); ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn"); ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, "floatColumn"); ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>(); columns.add(intColumn);/*from w w w .j a v a2 s. c o m*/ columns.add(stringColumn); columns.add(dateColumn); columns.add(longColumn); columns.add(booleanColumn); columns.add(doubleColumn); columns.add(charColumn); columns.add(timeColumn); columns.add(timeStampColumn); columns.add(floatColumn); ArrayList<Comparable> values = new ArrayList<Comparable>(); values.add(new Integer(12)); values.add(new String("sam")); values.add(new Date()); values.add(new Long(26564l)); values.add(true); values.add(1.235); values.add('a'); values.add(new Time(new Date().getTime())); values.add(new Time(new Date().getTime())); values.add(new Float(1.0f)); GenericDBWritable val = new GenericDBWritable(columns, values); LongWritable key = new LongWritable(1); mapper.map(key, val, context); Text outkey = new Text(); Text outval = new Text(); StringBuilder builder = new StringBuilder(); builder.append(new Integer(12) + delimiter + new String("sam") + delimiter + new Date() + delimiter + new Long(26564l) + delimiter + true + delimiter + 1.235 + delimiter + 'a' + delimiter + new Time(new Date().getTime()) + delimiter + new Time(new Date().getTime()) + delimiter + new Float(1.0f)); outval.set(builder.toString()); verify(context).write(outkey, outval); }
From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java
License:Apache License
@Test public final void testMapperNullValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Configuration conf = new Configuration(); conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, ","); when(context.getConfiguration()).thenReturn(conf); DBInputDelimMapper mapper = new DBInputDelimMapper(); ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>(); ArrayList values = new ArrayList(); GenericDBWritable val = new GenericDBWritable(columns, values); LongWritable key = new LongWritable(1); mapper.map(key, val, context); Text outkey = new Text(); Text outval = new Text(); verify(context).write(outkey, outval); }