List of usage examples for org.apache.hadoop.io Text encode
public static ByteBuffer encode(String string) throws CharacterCodingException
From source file:com.ebay.nest.io.sede.lazy.LazyDate.java
License:Apache License
/** * Writes a Date in SQL date format to the output stream. * @param out// w w w . ja va 2 s .co m * The output stream * @param i * The Date to write * @throws IOException */ public static void writeUTF8(OutputStream out, DateWritable d) throws IOException { ByteBuffer b = Text.encode(d.toString()); out.write(b.array(), 0, b.limit()); }
From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java
License:Apache License
/** * Write out the text representation of a Primitive Object to a UTF8 byte * stream./* w w w .ja va2s . c o m*/ * * @param out * The UTF8 byte OutputStream * @param o * The primitive Object * @param needsEscape * Whether a character needs escaping. This array should have size of * 128. */ public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException { switch (oi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector) oi).get(o); if (b) { out.write(trueBytes, 0, trueBytes.length); } else { out.write(falseBytes, 0, falseBytes.length); } break; } case BYTE: { LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o)); break; } case SHORT: { LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o)); break; } case INT: { LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o)); break; } case LONG: { LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o)); break; } case FLOAT: { float f = ((FloatObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(f)); out.write(b.array(), 0, b.limit()); break; } case DOUBLE: { double d = ((DoubleObjectInspector) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(d)); out.write(b.array(), 0, b.limit()); break; } case STRING: { Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case VARCHAR: { HiveVarcharWritable hc = ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o); Text t = hc.getTextValue(); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength()); byte[] toWrite = Base64.encodeBase64(toEncode); out.write(toWrite, 0, toWrite.length); break; } case DATE: { LazyDate.writeUTF8(out, ((DateObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case TIMESTAMP: { LazyTimestamp.writeUTF8(out, ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)); break; } case DECIMAL: { HiveDecimal bd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); ByteBuffer b = Text.encode(bd.toString()); out.write(b.array(), 0, b.limit()); break; } default: { throw new RuntimeException("Hive internal error."); } } }
From source file:net.orpiske.tcs.wc.map.WordMapper.java
License:Apache License
private String getColumnValue(SortedMap<ByteBuffer, Column> columns, final String name) throws IOException, InterruptedException { ByteBuffer byteBuffer = Text.encode(name); Column column = columns.get(byteBuffer); String ret = ByteBufferUtil.string(column.value()); return ret;/* w w w. j a v a 2 s . c o m*/ }
From source file:org.apache.orc.impl.TestReaderImpl.java
License:Apache License
private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException { ByteBuffer header = Text.encode(headerStr); ByteBuffer footer = Text.encode(footerStr); int headerLen = header.remaining(); int footerLen = footer.remaining() + 1; ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen); buf.put(header);/*from w w w . j a va 2 s. c om*/ buf.put(footer); buf.put((byte) footerLen); return buf.array(); }
From source file:org.mitre.ccv.mapred.io.KmerEntropyPairWritable.java
License:Open Source License
@Override public void write(DataOutput out) throws IOException { out.writeByte(0); // place holder out.writeDouble(this.value); //out.writeUTF(this.key); /**/*from w w w . j a va 2 s .c o m*/ * Adapted from Text, but we do not use writeVInt becuase * we need to read this back in from a ByteBuffer. */ ByteBuffer bytes = Text.encode(this.key); int length = bytes.limit(); out.writeInt(length); out.write(bytes.array(), 0, length); }
From source file:org.terrier.indexing.TwitterJSONDocument.java
License:Mozilla Public License
protected int byteLength(String t) { try {//from w w w . j a v a 2s . com return Text.encode(t).array().length; } catch (Exception e) { assert false; return -1; } }
From source file:org.terrier.structures.indexing.CompressingMetaIndexBuilder.java
License:Mozilla Public License
/** {@inheritDoc} */ @Override/*from w ww . j a va 2 s. c o m*/ public void writeDocumentEntry(String[] data) throws IOException { int i = 0; for (String value : data) { if (value == null) value = ""; else if (value.length() > valueLensChars[i]) if (CROP_LONG) value = value.substring(0, valueLensChars[i] - 1); else throw new IllegalArgumentException("Data (" + value + ") of string length " + value.length() + " for key " + keyNames[i] + " exceeds max string length of " + valueLensChars[i] + "(byte length of " + valueLensBytes[i] + "). Crop in the Document, increase indexer.meta.forward.keylens, or set metaindex.compressed.crop.long"); final byte[] b = Text.encode(value).array(); final int numberOfBytesToWrite = b.length; if (numberOfBytesToWrite > valueLensBytes[i]) throw new IllegalArgumentException("Data (" + value + ") of byte length " + numberOfBytesToWrite + " for key " + keyNames[i] + " exceeds max byte length of " + valueLensBytes[i] + "(string length of " + valueLensChars[i] + "). Crop in the Document, or increase indexer.meta.forward.keylens"); baos.write(b); if (numberOfBytesToWrite < valueLensBytes[i]) baos.write(spaces, 0, valueLensBytes[i] - numberOfBytesToWrite); i++; } zip.reset(); zip.setInput(baos.toByteArray()); zip.finish(); baos.reset(); indexOutput.writeLong(currentOffset); currentIndexOffset += 8; int compressedEntrySize = 0; while (!zip.finished()) { final int numOfCompressedBytes = zip.deflate(compressedBuffer); dataOutput.write(compressedBuffer, 0, numOfCompressedBytes); compressedEntrySize += numOfCompressedBytes; } currentOffset += compressedEntrySize; for (i = 0; i < forwardKeys.length; i++) { Text key = keyFactories[i].newInstance(); key.set(data[forwardKeys[i]]); IntWritable value = new IntWritable(); value.set(entryCount); forwardWriters[i].write(key, value); if (lastValues[i] != null && data[forwardKeys[i]].compareTo(lastValues[i]) < 1) forwardKeyValuesSorted[i] = false; lastValues[i] = data[forwardKeys[i]]; } entryCount++; //check for low memory, and flush if necessary if (entryCount % DOCS_PER_CHECK == 0 && memCheck.checkMemory()) { flush(); memCheck.reset(); } }