List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.apache.kylin.storage.hbase.steps.KeyValueCreator.java
License:Apache License
public KeyValue create(Text key, byte[] value, int voffset, int vlen) { return create(key.getBytes(), 0, key.getLength(), value, voffset, vlen); }
From source file:org.apache.kylin.storage.hbase.steps.RangeKeyDistributionReducer.java
License:Apache License
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { int nRegion = Math.round((float) gbPoints.size() / cut); nRegion = Math.max(minRegionCount, nRegion); nRegion = Math.min(maxRegionCount, nRegion); int gbPerRegion = gbPoints.size() / nRegion; gbPerRegion = Math.max(1, gbPerRegion); if (hfileSizeGB <= 0) { hfileSizeGB = gbPerRegion;/*w w w . java 2 s.c om*/ } int hfilePerRegion = (int) (gbPerRegion / hfileSizeGB); hfilePerRegion = Math.max(1, hfilePerRegion); System.out.println(nRegion + " regions"); System.out.println(gbPerRegion + " GB per region"); System.out.println(hfilePerRegion + " hfile per region"); Path hfilePartitionFile = new Path(output + "/part-r-00000_hfile"); SequenceFile.Writer hfilePartitionWriter = new SequenceFile.Writer( hfilePartitionFile.getFileSystem(context.getConfiguration()), context.getConfiguration(), hfilePartitionFile, ImmutableBytesWritable.class, NullWritable.class); int hfileCountInOneRegion = 0; for (int i = hfileSizeGB; i < gbPoints.size(); i += hfileSizeGB) { hfilePartitionWriter.append(new ImmutableBytesWritable(gbPoints.get(i).getBytes()), NullWritable.get()); if (++hfileCountInOneRegion >= hfilePerRegion) { Text key = gbPoints.get(i); outputValue.set(i); System.out.println(StringUtils.byteToHexString(key.getBytes()) + "\t" + outputValue.get()); context.write(key, outputValue); hfileCountInOneRegion = 0; } } hfilePartitionWriter.close(); }
From source file:org.apache.mahout.text.ChunkedWriter.java
License:Apache License
public void write(String key, String value) throws IOException { if (currentChunkSize > maxChunkSizeInBytes) { writer.close();/*from w w w .j av a 2s. c o m*/ writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID++), Text.class, Text.class); currentChunkSize = 0; } Text keyT = new Text(key); Text valueT = new Text(value); currentChunkSize += keyT.getBytes().length + valueT.getBytes().length; // Overhead writer.append(keyT, valueT); }
From source file:org.apache.mahout.utils.io.ChunkedWriter.java
License:Apache License
/** Writes a new key-value pair, creating a new sequence file if necessary.*/ public void write(String key, String value) throws IOException { if (currentChunkSize > maxChunkSizeInBytes) { Closeables.close(writer, false); currentChunkID++;//from www . j a va 2 s .c o m writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID), Text.class, Text.class); currentChunkSize = 0; } Text keyT = new Text(key); Text valueT = new Text(value); currentChunkSize += keyT.getBytes().length + valueT.getBytes().length; // Overhead writer.append(keyT, valueT); }
From source file:org.apache.orc.impl.writer.StringBaseTreeWriter.java
License:Apache License
private void flushDictionary() throws IOException { final int[] dumpOrder = new int[dictionary.size()]; if (useDictionaryEncoding) { // Write the dictionary by traversing the red-black tree writing out // the bytes and lengths; and creating the map from the original order // to the final sorted order. dictionary.visit(new StringRedBlackTree.Visitor() { private int currentId = 0; @Override/*from ww w .j av a2 s. c o m*/ public void visit(StringRedBlackTree.VisitorContext context) throws IOException { context.writeBytes(stringOutput); lengthOutput.write(context.getLength()); dumpOrder[context.getOriginalPosition()] = currentId++; } }); } else { // for direct encoding, we don't want the dictionary data stream stringOutput.suppress(); } int length = rows.size(); int rowIndexEntry = 0; OrcProto.RowIndex.Builder rowIndex = getRowIndex(); Text text = new Text(); // write the values translated into the dump order. for (int i = 0; i <= length; ++i) { // now that we are writing out the row values, we can finalize the // row index if (buildIndex) { while (i == rowIndexValueCount.get(rowIndexEntry) && rowIndexEntry < savedRowIndex.size()) { OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(rowIndexEntry++).toBuilder(); if (useDictionaryEncoding) { rowOutput.getPosition(new RowIndexPositionRecorder(base)); } else { PositionRecorder posn = new RowIndexPositionRecorder(base); directStreamOutput.getPosition(posn); lengthOutput.getPosition(posn); } rowIndex.addEntry(base.build()); } } if (i != length) { if (useDictionaryEncoding) { rowOutput.write(dumpOrder[rows.get(i)]); } else { dictionary.getText(text, rows.get(i)); directStreamOutput.write(text.getBytes(), 0, text.getLength()); lengthOutput.write(text.getLength()); } } } rows.clear(); }
From source file:org.apache.orc.mapred.OrcMapredRecordWriter.java
License:Apache License
static void setCharValue(BytesColumnVector vector, int row, Text value, int length) { // we need to trim or pad the string with spaces to required length int actualLength = value.getLength(); if (actualLength >= length) { setBinaryValue(vector, row, value, length); } else {//w w w . j ava 2s .co m byte[] spaces = SPACE_BUFFER.get(); if (length - actualLength > spaces.length) { spaces = new byte[length - actualLength]; Arrays.fill(spaces, (byte) ' '); SPACE_BUFFER.set(spaces); } vector.setConcat(row, value.getBytes(), 0, actualLength, spaces, 0, length - actualLength); } }
From source file:org.apache.pig.builtin.JsonLoader.java
License:Apache License
public Tuple getNext() throws IOException { Text val = null; try {/* w ww .j a v a 2 s . c o m*/ // Read the next key value pair from the record reader. If it's // finished, return null if (!reader.nextKeyValue()) return null; // Get the current value. We don't use the key. val = (Text) reader.getCurrentValue(); } catch (InterruptedException ie) { throw new IOException(ie); } // Create a parser specific for this input line. This may not be the // most efficient approach. byte[] newBytes = new byte[val.getLength()]; System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength()); ByteArrayInputStream bais = new ByteArrayInputStream(newBytes); JsonParser p = jsonFactory.createJsonParser(bais); // Create the tuple we will be returning. We create it with the right // number of fields, as the Tuple object is optimized for this case. ResourceFieldSchema[] fields = schema.getFields(); Tuple t = tupleFactory.newTuple(fields.length); // Read the start object marker. Throughout this file if the parsing // isn't what we expect we return a tuple with null fields rather than // throwing an exception. That way a few mangled lines don't fail the // job. if (p.nextToken() != JsonToken.START_OBJECT) { warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1); return t; } // Read each field in the record for (int i = 0; i < fields.length; i++) { t.set(i, readField(p, fields[i], i)); } if (p.nextToken() != JsonToken.END_OBJECT) { warn("Bad record, could not find end of record " + val.toString(), PigWarning.UDF_WARNING_1); return t; } p.close(); return t; }
From source file:org.apache.pig.builtin.PigStorage.java
License:Apache License
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); if (!mRequiredColumnsInitialized) { if (signature != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature)); }//from w w w .j a va 2 s .co m mRequiredColumnsInitialized = true; } //Prepend input source path if source tagging is enabled if (tagFile) { mProtoTuple.add(new DataByteArray(sourcePath.getName())); } else if (tagPath) { mProtoTuple.add(new DataByteArray(sourcePath.toString())); } try { boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); int start = 0; int fieldID = 0; for (int i = 0; i < len; i++) { if (buf[i] == fieldDel) { if (mRequiredColumns == null || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID])) addTupleValue(mProtoTuple, buf, start, i); start = i + 1; fieldID++; } } // pick up the last field if (start <= len && (mRequiredColumns == null || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID]))) { addTupleValue(mProtoTuple, buf, start, len); } Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); return dontLoadSchema ? t : applySchema(t); } catch (InterruptedException e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } }
From source file:org.apache.pig.builtin.TextLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { try {//from w w w . j a v a 2 s . com boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = (Text) in.getCurrentValue(); byte[] ba = value.getBytes(); // make a copy of the bytes representing the input since // TextInputFormat will reuse the byte array return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength())); } catch (InterruptedException e) { throw new IOException("Error getting input"); } }
From source file:org.apache.pig.impl.streaming.OutputHandler.java
License:Apache License
private byte[] readNextLine() throws IOException { Text line = new Text(); int num = in.readLine(line); byte[] lineBytes = line.getBytes(); if (num <= 0) { return null; }// w w w .ja va 2 s.c o m return lineBytes; }