List of usage examples for org.apache.hadoop.io Text copyBytes
public byte[] copyBytes()
From source file:mapred.io.CustomRecordReader.java
License:Apache License
private int skipUtfByteOrderMark() throws IOException { Text value = new Text(); // Strip BOM(Byte Order Mark) // Text only support UTF-8, we only need to check UTF-8 BOM // (0xEF,0xBB,0xBF) at the start of the text stream. int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE); int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos)); // Even we read 3 extra bytes for the first line, // we won't alter existing behavior (no backwards incompat issue). // Because the newSize is less than maxLineLength and // the number of bytes copied to Text is always no more than newSize. // If the return size from readLine is not less than maxLineLength, // we will discard the current line and read the next line. pos += newSize;/*from w w w . j a va 2 s.c o m*/ int textLength = value.getLength(); byte[] textBytes = value.getBytes(); if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB) && (textBytes[2] == (byte) 0xBF)) { // find UTF-8 BOM, strip it. LOG.info("Found UTF-8 BOM and skipped it"); textLength -= 3; newSize -= 3; if (textLength > 0) { // It may work to use the same buffer and not do the copyBytes textBytes = value.copyBytes(); value.set(textBytes, 3, textLength); } else { value.clear(); } } return newSize; }
From source file:mr.MyFileRecordReader2.java
License:Apache License
private int skipUtfByteOrderMark(Text value) throws IOException { // Strip BOM(Byte Order Mark) // Text only support UTF-8, we only need to check UTF-8 BOM // (0xEF,0xBB,0xBF) at the start of the text stream. int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE); int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos)); // Even we read 3 extra bytes for the first line, // we won't alter existing behavior (no backwards incompat issue). // Because the newSize is less than maxLineLength and // the number of bytes copied to Text is always no more than newSize. // If the return size from readLine is not less than maxLineLength, // we will discard the current line and read the next line. pos += newSize;/* ww w .j a v a2s. com*/ int textLength = value.getLength(); byte[] textBytes = value.getBytes(); if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB) && (textBytes[2] == (byte) 0xBF)) { // find UTF-8 BOM, strip it. LOG.info("Found UTF-8 BOM and skipped it"); textLength -= 3; newSize -= 3; if (textLength > 0) { // It may work to use the same buffer and not do the copyBytes textBytes = value.copyBytes(); value.set(textBytes, 3, textLength); } else { value.clear(); } } return newSize; }
From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexerTest.java
License:Apache License
static String toHumanString(Text text) { return toHumanString(text == null ? null : text.copyBytes()); }
From source file:mvm.rya.indexing.KeyParts.java
License:Apache License
public static String toHumanString(Text text) { return toHumanString(text == null ? null : text.copyBytes()); }
From source file:org.apache.accumulo.core.data.LoadPlan.java
License:Apache License
private static byte[] copy(Text data) { return data == null ? null : data.copyBytes(); }
From source file:org.apache.rya.indexing.KeyParts.java
License:Apache License
public static String toHumanString(final Text text) { return toHumanString(text == null ? null : text.copyBytes()); }
From source file:org.utils.UnsplittableFileReader.java
License:Apache License
@Override public synchronized boolean nextKeyValue() throws IOException { boolean res = reader.nextKeyValue(); if (res) {//from ww w. ja va2 s . c o m LongWritable lineNumber = reader.getCurrentKey(); Text lineString = reader.getCurrentValue(); key.clear(); key.setFilename(filename); key.setLine(lineNumber.get()); value.clear(); value.set(lineString.copyBytes()); LOG.debug("read " + key); } return res; }