Example usage for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text()

Source Link

Usage

From source file:co.nubetech.hiho.merge.HihoValue.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    isOld = new BooleanWritable();
    isOld.readFields(in);/*from ww  w  . j a  va  2s  .  c o m*/
    valClass = new Text();
    valClass.readFields(in);
    try {
        val = (V) Class.forName(valClass.toString()).newInstance();
    } catch (Exception e) {
        e.printStackTrace();
    }
    val.readFields(in);
}

From source file:co.nubetech.hiho.similarity.ngram.ValuePair.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    value1 = new Text();
    value1.readFields(in);//from w  w  w .j  a  v  a 2  s . c  o m

    value2 = new Text();
    value2.readFields(in);
}

From source file:co.nubetech.hiho.testdata.SequenceFileWriteDemo.java

License:Apache License

public static void main(String[] args) throws IOException {
    String uri = "input2.seq";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*from   w w w.ja  va  2s .c o  m*/
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 2; i++) {
            key.set(2 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 * /* w w w.  ja  va 2  s  .co  m*/
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        //
        Text[] texts = new Text[parsed.getColumnCount()];
        int index = 0;
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            //            if (i == parser.getRowKeyColumnIndex()){
            //               continue;
            //            }
            text = new Text();
            //?
            text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i));
            texts[index] = text;
            index++;
        }
        writer.set(texts);
        /*
        //rowkey
        String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
                
        // hash rowkey
        String newRowKey = oriRowKey;
        if(rowkeyGenerator != null){
           newRowKey = (String)rowkeyGenerator.generate(oriRowKey);
        }
        */
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey
        //LOG.info("single column newRowKey = " + newRowKey);
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer);
    } catch (BadTsvLineException badLine) {
        if (skipBadLines) {
            LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            badLineCount.increment(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);/*from   ww  w. jav a2  s .  c  o  m*/

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);

    FileInputFormat.addInputPath(jobConf, inputFile);

    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    assertEquals(1, splits.length);

    CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat
            .getRecordReader(splits[0], jobConf, new DummyReporter());
    Text k = new Text();
    Text v = new Text();
    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertFalse(rr.next(k, v));
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);//w w  w.ja v a2 s .  co m
    writeSequenceFile(inputFile2);

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);

    FileInputFormat.addInputPath(jobConf, inputFile1);
    FileInputFormat.addInputPath(jobConf, inputFile2);

    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    assertEquals(1, splits.length);

    CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat
            .getRecordReader(splits[0], jobConf, new DummyReporter());
    Text k = new Text();
    Text v = new Text();

    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertFalse(rr.next(k, v));
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java

License:Apache License

protected void initialize(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from   ww  w  .  ja  v a 2 s .  c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (codec != null) {
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job,
                    (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(fileIn, job);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java

License:Apache License

@Override
public Text createValue() {
    return new Text();
}

From source file:com.alexholmes.hadooputils.sort.LzoDelimitedLineRecordReader.java

License:Apache License

@Override
protected void initialize(Configuration job, FileSplit split) throws IOException {
    start = split.getStart();//from  ww  w.  jav  a2s .c om
    end = start + split.getLength();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (rowDelim != null) {
        byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
        in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
    } else {
        in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
    }

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.SortRecordReader.java

License:Apache License

/**
 * Extract the key from the sort line, using the spupplied options.
 *
 * @param value          the sort line//from  www .ja  va2s  . c  o  m
 * @param startKey       the start key, or null if there isn't one
 * @param endKey         the end key, or null if there isn't one
 * @param fieldSeparator the field separator, used if a start (and optionally end) key are set
 * @param ignoreCase     whether the result should be lower-cased to ensure case is ignored
 * @return the key
 * @throws IOException if something goes wrong
 */
protected static Text extractKey(final Text value, final Integer startKey, final Integer endKey,
        final String fieldSeparator, final boolean ignoreCase) throws IOException {

    Text result = new Text();

    if (startKey == null) {
        result.set(value);
    } else {

        // startKey is 1-based in the Linux sort, so decrement them to be 0-based
        //
        int startIdx = startKey - 1;

        String[] parts = StringUtils.split(value.toString(), fieldSeparator);

        if (startIdx >= parts.length) {
            throw new IOException("Start index is greater than parts in line");
        }

        int endIdx = parts.length;

        if (endKey != null) {
            // endKey is also 1-based in the Linux sort, but the StringUtils.join
            // end index is exclusive, so no need to decrement
            //
            endIdx = endKey;
            if (endIdx > parts.length) {
                throw new IOException("End index is greater than parts in line");
            }
        }

        result.set(StringUtils.join(parts, fieldSeparator, startIdx, endIdx));
    }

    if (ignoreCase) {
        result.set(result.toString().toLowerCase());
    }

    return result;
}