Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text() 

Source Link

Usage

From source file:co.nubetech.hiho.merge.HihoValue.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    isOld = new BooleanWritable();
    isOld.readFields(in);/*from ww  w  . j a  va  2s  .  c o m*/
    valClass = new Text();
    valClass.readFields(in);
    try {
        val = (V) Class.forName(valClass.toString()).newInstance();
    } catch (Exception e) {
        e.printStackTrace();
    }
    val.readFields(in);
}

From source file:co.nubetech.hiho.similarity.ngram.ValuePair.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    value1 = new Text();
    value1.readFields(in);//from w  w  w .j  a  v  a 2  s . c  o m

    value2 = new Text();
    value2.readFields(in);
}

From source file:co.nubetech.hiho.testdata.SequenceFileWriteDemo.java

License:Apache License

public static void main(String[] args) throws IOException {
    String uri = "input2.seq";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*from   w w w.ja  va  2s .c o  m*/
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 2; i++) {
            key.set(2 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 * /* w w w.  ja  va 2  s  .co  m*/
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        //
        Text[] texts = new Text[parsed.getColumnCount()];
        int index = 0;
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            //            if (i == parser.getRowKeyColumnIndex()){
            //               continue;
            //            }
            text = new Text();
            //?
            text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i));
            texts[index] = text;
            index++;
        }
        writer.set(texts);
        /*
        //rowkey
        String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
                
        // hash rowkey
        String newRowKey = oriRowKey;
        if(rowkeyGenerator != null){
           newRowKey = (String)rowkeyGenerator.generate(oriRowKey);
        }
        */
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey
        //LOG.info("single column newRowKey = " + newRowKey);
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer);
    } catch (BadTsvLineException badLine) {
        if (skipBadLines) {
            LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            badLineCount.increment(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);/*from   ww  w. jav a2  s .  c  o  m*/

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);

    FileInputFormat.addInputPath(jobConf, inputFile);

    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    assertEquals(1, splits.length);

    CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat
            .getRecordReader(splits[0], jobConf, new DummyReporter());
    Text k = new Text();
    Text v = new Text();
    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertFalse(rr.next(k, v));
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);//w w  w.ja v a2 s .  co m
    writeSequenceFile(inputFile2);

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);

    FileInputFormat.addInputPath(jobConf, inputFile1);
    FileInputFormat.addInputPath(jobConf, inputFile2);

    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    assertEquals(1, splits.length);

    CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat
            .getRecordReader(splits[0], jobConf, new DummyReporter());
    Text k = new Text();
    Text v = new Text();

    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.next(k, v));

    assertEquals(key, k);
    assertEquals(value, v);

    assertFalse(rr.next(k, v));
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java

License:Apache License

protected void initialize(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from   ww  w  .  ja  v a 2 s .  c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (codec != null) {
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job,
                    (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(fileIn, job);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java

License:Apache License

@Override
public Text createValue() {
    return new Text();
}

From source file:com.alexholmes.hadooputils.sort.LzoDelimitedLineRecordReader.java

License:Apache License

@Override
protected void initialize(Configuration job, FileSplit split) throws IOException {
    start = split.getStart();//from  ww  w.  jav  a2s .c om
    end = start + split.getLength();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (rowDelim != null) {
        byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
        in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
    } else {
        in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
    }

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.SortRecordReader.java

License:Apache License

/**
 * Extract the key from the sort line, using the spupplied options.
 *
 * @param value          the sort line//from  www .ja  va2s  . c  o  m
 * @param startKey       the start key, or null if there isn't one
 * @param endKey         the end key, or null if there isn't one
 * @param fieldSeparator the field separator, used if a start (and optionally end) key are set
 * @param ignoreCase     whether the result should be lower-cased to ensure case is ignored
 * @return the key
 * @throws IOException if something goes wrong
 */
protected static Text extractKey(final Text value, final Integer startKey, final Integer endKey,
        final String fieldSeparator, final boolean ignoreCase) throws IOException {

    Text result = new Text();

    if (startKey == null) {
        result.set(value);
    } else {

        // startKey is 1-based in the Linux sort, so decrement them to be 0-based
        //
        int startIdx = startKey - 1;

        String[] parts = StringUtils.split(value.toString(), fieldSeparator);

        if (startIdx >= parts.length) {
            throw new IOException("Start index is greater than parts in line");
        }

        int endIdx = parts.length;

        if (endKey != null) {
            // endKey is also 1-based in the Linux sort, but the StringUtils.join
            // end index is exclusive, so no need to decrement
            //
            endIdx = endKey;
            if (endIdx > parts.length) {
                throw new IOException("End index is greater than parts in line");
            }
        }

        result.set(StringUtils.join(parts, fieldSeparator, startIdx, endIdx));
    }

    if (ignoreCase) {
        result.set(result.toString().toLowerCase());
    }

    return result;
}