List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.knewton.mrtool.io.JsonRecordReaderTest.java
License:Apache License
/** * Tests the line reader in the record reader to see if records can be read correctly from a * random seek location in the input stream. * //from ww w . j av a 2 s. co m * @throws IOException * @throws InterruptedException */ @Test public void testJsonRecordReaderWithRandomPos() throws IOException, InterruptedException { JsonRecordReader<Text> rr = new JsonRecordReader<Text>() { @Override protected Class<?> getDataClass(String jsonStr) { return Text.class; } }; Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID()); FileSplit fileSplit = new FileSplit(new Path("recs.2013-03-20_02_52.log"), 10, recommendationBytes.length, new String[0]); new MockUp<FileSystem>() { @Mock public FSDataInputStream open(Path f) throws IOException { return new FSDataInputStream(new SeekableByteArrayInputStream(recommendationBytes)); } }; // Initialize it to get the compression codecs rr.initialize(fileSplit, context); // close the line reader and reopen it. rr.close(); LineReader lineReader = rr.initLineReader(fileSplit, conf); Text line = new Text(); lineReader.readLine(line); assertEquals(DummyJsonRecommendations.jsonRecommendations[1], line.toString()); line = new Text(); lineReader.readLine(line); assertTrue(line.toString().isEmpty()); lineReader.close(); }
From source file:com.kylinolap.cube.common.BytesSplitter.java
License:Apache License
public int detectDelim(Text value, int expectedParts) { for (int i = 0; i < COMMON_DELIMS.length; i++) { int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]); if (nParts == expectedParts) return COMMON_DELIMS[i]; }// w w w. j a v a 2 s.co m throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect " + expectedParts + " columns"); }
From source file:com.kylinolap.job.hadoop.cardinality.ColumnCardinalityMapper.java
License:Apache License
@Override public void map(T key, Text value, Context context) throws IOException, InterruptedException { String delim = context.getConfiguration().get(HiveColumnCardinalityJob.KEY_INPUT_DELIM); if (delim == null) { delim = DEFAULT_DELIM;//from w w w. j a va 2 s .co m } String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, delim); int i = 1; while (tokenizer.hasMoreTokens()) { String temp = tokenizer.nextToken(); getHllc(i).add(Bytes.toBytes(temp)); i++; } }
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { if (delim == -1) { delim = splitter.detectDelim(value, columns.length); }//from w w w . ja v a 2 s. c om int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim); SplittedBytes[] parts = splitter.getSplitBuffers(); if (nParts != columns.length) { throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only " + columns.length + " expected"); } for (short i = 0; i < nParts; i++) { outputKey.set(i); outputValue.set(parts[i].value, 0, parts[i].length); context.write(outputKey, outputValue); } }
From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { if (delim == -1) { delim = splitter.detectDelim(value, info.getColumnCount()); }// w ww .j a v a 2s. co m int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim); SplittedBytes[] parts = splitter.getSplitBuffers(); if (nParts != info.getColumnCount()) { throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only " + info.getColumnCount() + " expected"); } rec.reset(); for (int i = 0; i < nParts; i++) { rec.setValueString(i, Bytes.toString(parts[i].value, 0, parts[i].length)); } outputKey.set(rec.getTimestamp()); // outputValue's backing bytes array is the same as rec context.write(outputKey, outputValue); }
From source file:com.lakhani.anchorgraph.applestovectors.java
public static void main(String args[]) throws Exception { List<NamedVector> apples = new ArrayList<NamedVector>(); NamedVector apple;// w w w.j a v a 2s.c o m apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/cloudera/anchorgraph/output"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (NamedVector vector : apples) { vec.set(vector); writer.append(new Text(vector.getName()), vec); } writer.close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { System.out.println(key.toString() + " " + value.get().asFormatString()); } reader.close(); }
From source file:com.liferay.hadoop.job.Map.java
License:Open Source License
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one);//from w w w .ja v a 2s . c o m } }
From source file:com.linkedin.cubert.io.text.TextTupleCreator.java
License:Open Source License
@Override public Tuple create(Object key, Object value) throws ExecException { Text t = (Text) value; String[] fields = t.toString().split(separator); for (int i = 0; i < fields.length; i++) { Object obj = null;//from w w w . j av a 2s. co m if (fields[i] != null && fields[i].length() != 0) switch (typeArray[i]) { case INT: obj = new Integer(Integer.parseInt(fields[i])); break; case LONG: obj = new Long(Long.parseLong(fields[i])); break; case STRING: obj = fields[i]; break; case DOUBLE: obj = Double.parseDouble(fields[i]); break; case FLOAT: obj = Float.parseFloat(fields[i]); break; default: break; } tuple.set(i, obj); } return tuple; }
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
@Override public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try {// ww w . j a va 2 s. c o m final Text keySchema = meta.get(new Text("key.schema")); final Text valueSchema = meta.get(new Text("value.schema")); if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]", keySchema, valueSchema)); } return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()), new JsonTypeSerializer(valueSchema.toString()), baseInputFormat.createRecordReader(split, context)); } catch (Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFMapper.java
License:Open Source License
@Override protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { try {//from ww w. j a v a 2 s . c o m String rowLineContent = value.toString(); Matcher matcher = CommonVariables.MAPPER_INPUT_FORMAT_REGEX.matcher(rowLineContent); if (!matcher.find()) { return; } // if (StringUtils.isBlank(matcher.group(7)) || "-".equalsIgnoreCase(matcher.group(7).trim())) { // return; // } StringBuffer newValueStringBuffer = new StringBuffer(); newValueStringBuffer.append(this.turnDataForNone(matcher.group(1))).append(Constant.VALUE_SPLIT_CHAR);// Client-IP newValueStringBuffer.append(this.toTimestamp(matcher.group(2))).append(Constant.VALUE_SPLIT_CHAR);// Access-Timestamp newValueStringBuffer.append(this.turnDataForNone(matcher.group(3))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Request-Method newValueStringBuffer.append(this.turnDataForNone(matcher.group(4))).append(Constant.VALUE_SPLIT_CHAR);// URI newValueStringBuffer.append(this.turnDataForNone(matcher.group(5))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Response-Status newValueStringBuffer.append(this.turnDataForNone(matcher.group(6))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[referer] newValueStringBuffer.append(this.analysisTraceInfo(matcher.group(7))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[traceinfo] newValueStringBuffer.append(this.turnDataForNone(matcher.group(8))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[user_agent] newValueStringBuffer.append(this.turnDataForNone(matcher.group(9))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[x_forwarded_for] newValueStringBuffer.append(this.turnDateFormat(matcher.group(2)));// Access-Time context.write(new Text(newValueStringBuffer.toString()), new Text()); } catch (Exception e) { e.printStackTrace(); } }