Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.knewton.mrtool.io.JsonRecordReaderTest.java

License:Apache License

/**
 * Tests the line reader in the record reader to see if records can be read correctly from a
 * random seek location in the input stream.
 * //from  ww  w . j  av  a  2  s. co  m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void testJsonRecordReaderWithRandomPos() throws IOException, InterruptedException {
    JsonRecordReader<Text> rr = new JsonRecordReader<Text>() {
        @Override
        protected Class<?> getDataClass(String jsonStr) {
            return Text.class;
        }
    };

    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    FileSplit fileSplit = new FileSplit(new Path("recs.2013-03-20_02_52.log"), 10, recommendationBytes.length,
            new String[0]);

    new MockUp<FileSystem>() {
        @Mock
        public FSDataInputStream open(Path f) throws IOException {
            return new FSDataInputStream(new SeekableByteArrayInputStream(recommendationBytes));
        }
    };
    // Initialize it to get the compression codecs
    rr.initialize(fileSplit, context);
    // close the line reader and reopen it.
    rr.close();
    LineReader lineReader = rr.initLineReader(fileSplit, conf);
    Text line = new Text();
    lineReader.readLine(line);
    assertEquals(DummyJsonRecommendations.jsonRecommendations[1], line.toString());
    line = new Text();
    lineReader.readLine(line);
    assertTrue(line.toString().isEmpty());
    lineReader.close();
}

From source file:com.kylinolap.cube.common.BytesSplitter.java

License:Apache License

public int detectDelim(Text value, int expectedParts) {
    for (int i = 0; i < COMMON_DELIMS.length; i++) {
        int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]);
        if (nParts == expectedParts)
            return COMMON_DELIMS[i];
    }// w  w  w. j a  v a 2 s.co m
    throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect "
            + expectedParts + " columns");
}

From source file:com.kylinolap.job.hadoop.cardinality.ColumnCardinalityMapper.java

License:Apache License

@Override
public void map(T key, Text value, Context context) throws IOException, InterruptedException {
    String delim = context.getConfiguration().get(HiveColumnCardinalityJob.KEY_INPUT_DELIM);
    if (delim == null) {
        delim = DEFAULT_DELIM;//from   w  w w. j a va 2  s  .co  m
    }
    String line = value.toString();
    StringTokenizer tokenizer = new StringTokenizer(line, delim);
    int i = 1;
    while (tokenizer.hasMoreTokens()) {
        String temp = tokenizer.nextToken();
        getHllc(i).add(Bytes.toBytes(temp));
        i++;
    }
}

From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    if (delim == -1) {
        delim = splitter.detectDelim(value, columns.length);
    }//from   w w w .  ja  v a 2 s.  c om

    int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim);
    SplittedBytes[] parts = splitter.getSplitBuffers();

    if (nParts != columns.length) {
        throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only "
                + columns.length + " expected");
    }

    for (short i = 0; i < nParts; i++) {
        outputKey.set(i);
        outputValue.set(parts[i].value, 0, parts[i].length);
        context.write(outputKey, outputValue);
    }
}

From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    if (delim == -1) {
        delim = splitter.detectDelim(value, info.getColumnCount());
    }//  w ww .j a  v a  2s.  co  m

    int nParts = splitter.split(value.getBytes(), value.getLength(), (byte) delim);
    SplittedBytes[] parts = splitter.getSplitBuffers();

    if (nParts != info.getColumnCount()) {
        throw new RuntimeException("Got " + parts.length + " from -- " + value.toString() + " -- but only "
                + info.getColumnCount() + " expected");
    }

    rec.reset();
    for (int i = 0; i < nParts; i++) {
        rec.setValueString(i, Bytes.toString(parts[i].value, 0, parts[i].length));
    }

    outputKey.set(rec.getTimestamp());
    // outputValue's backing bytes array is the same as rec

    context.write(outputKey, outputValue);
}

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;//  w  w w.j  a v  a  2s.c o  m
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.liferay.hadoop.job.Map.java

License:Open Source License

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    String line = value.toString();
    StringTokenizer tokenizer = new StringTokenizer(line);

    while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());

        output.collect(word, one);//from  w  w w .ja  v  a 2s  .  c  o  m
    }
}

From source file:com.linkedin.cubert.io.text.TextTupleCreator.java

License:Open Source License

@Override
public Tuple create(Object key, Object value) throws ExecException {
    Text t = (Text) value;
    String[] fields = t.toString().split(separator);

    for (int i = 0; i < fields.length; i++) {
        Object obj = null;//from  w w w . j  av  a  2s. co  m

        if (fields[i] != null && fields[i].length() != 0)
            switch (typeArray[i]) {
            case INT:
                obj = new Integer(Integer.parseInt(fields[i]));
                break;

            case LONG:
                obj = new Long(Long.parseLong(fields[i]));
                break;

            case STRING:
                obj = fields[i];
                break;

            case DOUBLE:
                obj = Double.parseDouble(fields[i]);
                break;

            case FLOAT:
                obj = Float.parseFloat(fields[i]);
                break;
            default:
                break;
            }

        tuple.set(i, obj);
    }

    return tuple;
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();

    String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
    log.info("Input file path:" + inputPathString);
    Path inputPath = new Path(inputPathString);

    SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
    SequenceFile.Metadata meta = reader.getMetadata();

    try {// ww  w . j  a va  2  s.  c o  m
        final Text keySchema = meta.get(new Text("key.schema"));
        final Text valueSchema = meta.get(new Text("value.schema"));

        if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
            throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",
                    keySchema, valueSchema));
        }

        return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),
                new JsonTypeSerializer(valueSchema.toString()),
                baseInputFormat.createRecordReader(split, context));
    } catch (Exception e) {
        throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFMapper.java

License:Open Source License

@Override
protected void map(LongWritable key, Text value,
        org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
    try {//from   ww  w. j  a  v a  2  s  . c o  m
        String rowLineContent = value.toString();
        Matcher matcher = CommonVariables.MAPPER_INPUT_FORMAT_REGEX.matcher(rowLineContent);
        if (!matcher.find()) {
            return;
        }
        //                if (StringUtils.isBlank(matcher.group(7)) || "-".equalsIgnoreCase(matcher.group(7).trim())) {
        //                    return;
        //                }

        StringBuffer newValueStringBuffer = new StringBuffer();
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(1))).append(Constant.VALUE_SPLIT_CHAR);// Client-IP
        newValueStringBuffer.append(this.toTimestamp(matcher.group(2))).append(Constant.VALUE_SPLIT_CHAR);// Access-Timestamp
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(3))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Request-Method
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(4))).append(Constant.VALUE_SPLIT_CHAR);// URI
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(5))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Response-Status
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(6))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[referer]
        newValueStringBuffer.append(this.analysisTraceInfo(matcher.group(7))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[traceinfo]
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(8))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[user_agent]
        newValueStringBuffer.append(this.turnDataForNone(matcher.group(9))).append(Constant.VALUE_SPLIT_CHAR);// HTTP-Header[x_forwarded_for]
        newValueStringBuffer.append(this.turnDateFormat(matcher.group(2)));// Access-Time

        context.write(new Text(newValueStringBuffer.toString()), new Text());
    } catch (Exception e) {
        e.printStackTrace();
    }
}