List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
public Text getColumn(Text val, int column, String delimiter) throws IOException { if (delimiter == null || delimiter.equals("")) { throw new IOException("Value of delimiter is empty"); }//from w w w.java 2 s . c om int lastOccurance = 0; int occurance = 0; for (int i = 0; i < column; i++) { occurance = val.find(delimiter, lastOccurance) - lastOccurance; lastOccurance = lastOccurance + occurance + delimiter.length(); } logger.debug("text value is: " + val); int delimiterLength = delimiter.length(); int startPosition = lastOccurance - (occurance + delimiterLength); Text keyColumn = new Text(); keyColumn.set(val.getBytes(), startPosition, occurance); return keyColumn; }
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(Text key) throws IOException { return MD5Hash.digest(key.getBytes()); }
From source file:com.acme.io.JsonLoader.java
License:Apache License
/** * Retrieves the next tuple to be processed. Implementations should NOT * reuse tuple objects (or inner member objects) they return across calls * and should return a different tuple object in each call. * @return the next tuple to be processed or null if there are no more * tuples to be processed.//from w w w .j a v a2 s. co m * @throws IOException if there is an exception while retrieving the next * tuple */ public Tuple getNext() throws IOException { Text val = null; try { // Read the next key value pair from the record reader. If it's // finished, return null if (!reader.nextKeyValue()) return null; // Get the current value. We don't use the key. val = (Text) reader.getCurrentValue(); } catch (InterruptedException ie) { throw new IOException(ie); } // Create a parser specific for this input line. This may not be the // most efficient approach. ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes()); JsonParser p = jsonFactory.createJsonParser(bais); // Create the tuple we will be returning. We create it with the right // number of fields, as the Tuple object is optimized for this case. Tuple t = tupleFactory.newTuple(fields.length); // Read the start object marker. Throughout this file if the parsing // isn't what we expect we return a tuple with null fields rather than // throwing an exception. That way a few mangled lines don't fail the // job. if (p.nextToken() != JsonToken.START_OBJECT) { log.warn("Bad record, could not find start of record " + val.toString()); return t; } // Read each field in the record for (int i = 0; i < fields.length; i++) { t.set(i, readField(p, fields[i], i)); } if (p.nextToken() != JsonToken.END_OBJECT) { log.warn("Bad record, could not find end of record " + val.toString()); return t; } p.close(); return t; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. *//*from w ww. j a v a 2s. c o m*/ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); ts = System.currentTimeMillis(); try { MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey Put put = new Put(newRowKey.getBytes()); for (int i = 0; i < parsed.getColumnCount(); i++) { String columnQualifierStr = new String(parser.getQualifier(i)); String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr); if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) { continue; } KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts, kv.getValue()); kv = null; put.add(newKv); } context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put); } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(badLine); } } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(e); } } catch (InterruptedException e) { e.printStackTrace(); } catch (RowKeyGeneratorException e) { System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage()); throw new IOException(e); } finally { totalLineCount.increment(1); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. * //from ww w. j a v a2 s.c o m */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); // Text[] texts = new Text[parsed.getColumnCount()]; int index = 0; for (int i = 0; i < parsed.getColumnCount(); i++) { // if (i == parser.getRowKeyColumnIndex()){ // continue; // } text = new Text(); //? text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); texts[index] = text; index++; } writer.set(texts); /* //rowkey String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); // hash rowkey String newRowKey = oriRowKey; if(rowkeyGenerator != null){ newRowKey = (String)rowkeyGenerator.generate(oriRowKey); } */ String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey //LOG.info("single column newRowKey = " + newRowKey); context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer); } catch (BadTsvLineException badLine) { if (skipBadLines) { LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); badLineCount.increment(1); return; } else { throw new IOException(badLine); } } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java
License:Apache License
/** * Serializes {@link StringOption} object. * @param option the target object//from www.j a va2s. c o m * @param output the target output * @throws IOException if I/O error was occurred while serializing the object */ public static void serialize(StringOption option, DataOutput output) throws IOException { if (option.isNull()) { writeCompactInt(UNSIGNED_NULL, output); } else { Text entity = option.get(); int length = entity.getLength(); writeCompactInt(length, output); output.write(entity.getBytes(), 0, length); } }
From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java
License:Apache License
/** * Deserializes {@link StringOption} object. * @param option the target object//from www .j a v a 2 s .com * @param input the source input * @throws IOException if I/O error was occurred while deserializing the object */ public static void deserialize(StringOption option, DataInput input) throws IOException { int length = readCompactInt(input); if (length == UNSIGNED_NULL) { option.setNull(); } else { if (option.isNull() == false) { Text entity = option.get(); if (length == entity.getLength()) { // optimize for same-length text properties input.readFully(entity.getBytes(), 0, length); return; } } else { // set as non-null option.reset(); } byte[] buffer = getLocalBuffer(length, Integer.MAX_VALUE); input.readFully(buffer, 0, length); option.modify(buffer, 0, length); } }
From source file:com.asakusafw.runtime.io.line.BasicLineOutput.java
License:Apache License
private void write(Text entity) throws IOException { if (entity.getLength() == 0) { return;//from www .j av a 2 s. c om } ByteBuffer buffer = wrapperCache; byte[] b = entity.getBytes(); if (buffer == null || buffer.array() != b) { buffer = ByteBuffer.wrap(b); wrapperCache = buffer; } buffer.position(0); buffer.limit(entity.getLength()); boolean flushing = false; CharBuffer cs = charBuffer; while (true) { cs.clear(); CoderResult result; if (flushing) { result = decoder.flush(cs); } else { result = decoder.decode(buffer, cs, true); } if (result.isError() == false) { cs.flip(); if (cs.hasRemaining()) { writer.append(cs); } if (result.isUnderflow()) { if (flushing) { flushing = true; } else { break; } } } else { assert result.isError(); try { result.throwException(); } catch (CharacterCodingException e) { throw new IOException(MessageFormat.format("exception occurred while decoding text: {0}", path), e); } } } }
From source file:com.asakusafw.runtime.io.line.Utf8LineOutput.java
License:Apache License
@Override public void write(StringOption model) throws IOException { if (model == null) { throw new IllegalArgumentException("model must not be null"); //$NON-NLS-1$ }/* w w w . ja v a 2 s .c o m*/ if (model.isNull()) { return; } Text entity = model.get(); output.write(entity.getBytes(), 0, entity.getLength()); output.write(LINE_BREAK); }
From source file:com.asakusafw.runtime.io.TsvEmitter.java
License:Apache License
@Override public void emit(StringOption option) throws IOException { startCell();//from www. j a va2 s . c o m if (emitNull(option)) { return; } Text text = option.get(); if (text.getLength() == 0) { return; } byte[] bytes = text.getBytes(); ByteBuffer source = ByteBuffer.wrap(bytes, 0, text.getLength()); decoder.reset(); decodeBuffer.clear(); while (true) { CoderResult result = decoder.decode(source, decodeBuffer, true); if (result.isError()) { throw new RecordFormatException( MessageFormat.format("Cannot process a character string (\"{0}\")", result)); } if (result.isUnderflow()) { consumeDecoded(); break; } if (result.isOverflow()) { consumeDecoded(); } } while (true) { CoderResult result = decoder.flush(decodeBuffer); if (result.isError()) { throw new RecordFormatException( MessageFormat.format("Cannot process a character string (\"{0}\")", result)); } if (result.isUnderflow()) { consumeDecoded(); break; } if (result.isOverflow()) { consumeDecoded(); } } }