List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:com.facebook.presto.hive.DwrfHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;//from ww w . j av a 2 s . c om nulls[column] = false; OrcLazyObject lazyObject = getRawValue(column); if (lazyObject == null) { nulls[column] = true; return; } Object value = materializeValue(lazyObject); if (value == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST || type.getCategory() == Category.STRUCT) { slices[column] = Slices .wrappedBuffer(getJsonBytes(sessionTimeZone, lazyObject, fieldInspectors[column])); } else if (type.equals(HIVE_STRING)) { Text text = checkWritable(value, Text.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = checkWritable(value, BytesWritable.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.orc.DwrfHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;//from w ww.ja v a 2s.co m nulls[column] = false; OrcLazyObject lazyObject = getRawValue(column); if (lazyObject == null) { nulls[column] = true; return; } Object value = materializeValue(lazyObject); if (value == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.equals(HIVE_STRING)) { Text text = checkWritable(value, Text.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = checkWritable(value, BytesWritable.class); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.orc.OrcHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;/* www. j a va 2 s . c o m*/ nulls[column] = false; Object object = getFieldValue(row, hiveColumnIndexes[column]); if (object == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.equals(HIVE_STRING)) { Text text = Types.checkType(object, Text.class, "materialized string value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.facebook.presto.hive.OrcHiveRecordCursor.java
License:Apache License
private void parseStringColumn(int column) { // don't include column number in message because it causes boxing which is expensive here checkArgument(!isPartitionColumn[column], "Column is a partition key"); loaded[column] = true;//from www . j a va2s. c om nulls[column] = false; Object object = getFieldValue(row, hiveColumnIndexes[column]); if (object == null) { nulls[column] = true; return; } HiveType type = hiveTypes[column]; if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST || type.getCategory() == Category.STRUCT) { slices[column] = Slices.wrappedBuffer(getJsonBytes(sessionTimeZone, object, fieldInspectors[column])); } else if (type.equals(HIVE_STRING)) { Text text = Types.checkType(object, Text.class, "materialized string value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength()); } else if (type.equals(HIVE_BINARY)) { BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value"); slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0, bytesWritable.getLength()); } else { throw new RuntimeException(String.format("%s is not a valid STRING type", type)); } }
From source file:com.foobar.store.FromJSONSeqConverter.java
License:Apache License
/** * @param w// ww w . j a va 2s. c o m * @param i */ private JsonValue convertWritableToItem(Writable w, JsonValue val) { if (w == null) return null; Text t = null; if (w instanceof Text) { t = (Text) w; } else { t = new Text(w.toString()); } ByteArrayInputStream input = new ByteArrayInputStream(t.getBytes()); JsonParser parser = new JsonParser(input); try { val = parser.JsonVal(); } catch (Exception e) { throw new RuntimeException(e); } return val; }
From source file:com.gotometrics.orderly.RowKeyUtils.java
License:Apache License
/** Converts a Text object to a byte array, copying only if * necessary.//from w w w.ja v a2 s . com */ public static byte[] toBytes(Text t) { return toBytes(t.getBytes(), 0, t.getLength()); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * /*from w w w. jav a 2 s .c o m*/ * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Creates an lzo file with random data. * // w ww .j av a2s . com * @param outputDir Output directory. * @param fs File system we're using. * @param attemptContext Task attempt context, contains task id etc. * @throws IOException * @throws InterruptedException */ private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput) throws IOException, InterruptedException { TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>(); RecordWriter<Text, Text> rw = null; md5.reset(); try { rw = output.getRecordWriter(attemptContext); char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray(); Random r = new Random(System.currentTimeMillis()); Text key = new Text(); Text value = new Text(); int charsMax = chars.length - 1; for (int i = 0; i < charsToOutput;) { i += fillText(chars, r, charsMax, key); i += fillText(chars, r, charsMax, value); rw.write(key, value); md5.update(key.getBytes(), 0, key.getLength()); // text output format writes tab between the key and value md5.update("\t".getBytes("UTF-8")); md5.update(value.getBytes(), 0, value.getLength()); } } finally { if (rw != null) { rw.close(attemptContext); OutputCommitter committer = output.getOutputCommitter(attemptContext); committer.commitTask(attemptContext); committer.cleanupJob(attemptContext); } } byte[] result = md5.digest(); md5.reset(); return result; }
From source file:com.ibm.jaql.io.hadoop.converter.FromDelConverter.java
License:Apache License
/** Converts the given line into a JSON value. */ @Override//w ww .j a v a 2s .c o m public JsonValue convert(LongWritable key, Text value, JsonValue target) { return convert(key.get(), value.getBytes(), value.getLength(), target); }
From source file:com.ibm.jaql.io.hadoop.converter.TextToJsonString.java
License:Apache License
@Override public JsonValue convert(Text src, JsonValue target) { MutableJsonString str;/*from w w w . j av a2 s .c om*/ if (target instanceof MutableJsonString) { str = (MutableJsonString) target; } else { str = new MutableJsonString(); } str.set(src.getBytes(), src.getLength()); return str; }