List of usage examples for org.apache.hadoop.io Text Text
public Text()
From source file:co.nubetech.hiho.merge.HihoValue.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { isOld = new BooleanWritable(); isOld.readFields(in);/*from ww w . j a va 2s . c o m*/ valClass = new Text(); valClass.readFields(in); try { val = (V) Class.forName(valClass.toString()).newInstance(); } catch (Exception e) { e.printStackTrace(); } val.readFields(in); }
From source file:co.nubetech.hiho.similarity.ngram.ValuePair.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { value1 = new Text(); value1.readFields(in);//from w w w .j a v a 2 s . c o m value2 = new Text(); value2.readFields(in); }
From source file:co.nubetech.hiho.testdata.SequenceFileWriteDemo.java
License:Apache License
public static void main(String[] args) throws IOException { String uri = "input2.seq"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {/*from w w w.ja va 2s .c o m*/ writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < 2; i++) { key.set(2 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. * /* w w w. ja va 2 s .co m*/ */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); // Text[] texts = new Text[parsed.getColumnCount()]; int index = 0; for (int i = 0; i < parsed.getColumnCount(); i++) { // if (i == parser.getRowKeyColumnIndex()){ // continue; // } text = new Text(); //? text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); texts[index] = text; index++; } writer.set(texts); /* //rowkey String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); // hash rowkey String newRowKey = oriRowKey; if(rowkeyGenerator != null){ newRowKey = (String)rowkeyGenerator.generate(oriRowKey); } */ String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey //LOG.info("single column newRowKey = " + newRowKey); context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer); } catch (BadTsvLineException badLine) { if (skipBadLines) { LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); badLineCount.increment(1); return; } else { throw new IOException(badLine); } } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);/*from ww w. jav a2 s . c o m*/ Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.addInputPath(jobConf, inputFile); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); assertEquals(1, splits.length); CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat .getRecordReader(splits[0], jobConf, new DummyReporter()); Text k = new Text(); Text v = new Text(); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertFalse(rr.next(k, v)); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);//w w w.ja v a2 s . co m writeSequenceFile(inputFile2); Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.addInputPath(jobConf, inputFile1); FileInputFormat.addInputPath(jobConf, inputFile2); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); assertEquals(1, splits.length); CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat .getRecordReader(splits[0], jobConf, new DummyReporter()); Text k = new Text(); Text v = new Text(); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertFalse(rr.next(k, v)); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java
License:Apache License
protected void initialize(Configuration job, FileSplit split) throws IOException { this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from ww w . ja v a 2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; String rowDelim = job.get("textinputformat.record.delimiter", null); if (codec != null) { if (rowDelim != null) { byte[] hexcode = SortConfig.getHexDelimiter(rowDelim); in = new DelimitedLineReader(codec.createInputStream(fileIn), job, (hexcode != null) ? hexcode : rowDelim.getBytes()); } else { in = new DelimitedLineReader(codec.createInputStream(fileIn), job); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (rowDelim != null) { byte[] hexcode = SortConfig.getHexDelimiter(rowDelim); in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes()); } else { in = new DelimitedLineReader(fileIn, job); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java
License:Apache License
@Override public Text createValue() { return new Text(); }
From source file:com.alexholmes.hadooputils.sort.LzoDelimitedLineRecordReader.java
License:Apache License
@Override protected void initialize(Configuration job, FileSplit split) throws IOException { start = split.getStart();//from ww w. jav a2s .c om end = start + split.getLength(); final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("No codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header String rowDelim = job.get("textinputformat.record.delimiter", null); if (rowDelim != null) { byte[] hexcode = SortConfig.getHexDelimiter(rowDelim); in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes()); } else { in = new DelimitedLineReader(codec.createInputStream(fileIn), job); } if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:com.alexholmes.hadooputils.sort.SortRecordReader.java
License:Apache License
/** * Extract the key from the sort line, using the spupplied options. * * @param value the sort line//from www .ja va2s . c o m * @param startKey the start key, or null if there isn't one * @param endKey the end key, or null if there isn't one * @param fieldSeparator the field separator, used if a start (and optionally end) key are set * @param ignoreCase whether the result should be lower-cased to ensure case is ignored * @return the key * @throws IOException if something goes wrong */ protected static Text extractKey(final Text value, final Integer startKey, final Integer endKey, final String fieldSeparator, final boolean ignoreCase) throws IOException { Text result = new Text(); if (startKey == null) { result.set(value); } else { // startKey is 1-based in the Linux sort, so decrement them to be 0-based // int startIdx = startKey - 1; String[] parts = StringUtils.split(value.toString(), fieldSeparator); if (startIdx >= parts.length) { throw new IOException("Start index is greater than parts in line"); } int endIdx = parts.length; if (endKey != null) { // endKey is also 1-based in the Linux sort, but the StringUtils.join // end index is exclusive, so no need to decrement // endIdx = endKey; if (endIdx > parts.length) { throw new IOException("End index is greater than parts in line"); } } result.set(StringUtils.join(parts, fieldSeparator, startIdx, endIdx)); } if (ignoreCase) { result.set(result.toString().toLowerCase()); } return result; }