List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:org.openx.data.jsonserde.JsonSerDe.java
License:Open Source License
/** * Deserializes the object. Reads a Writable and uses JSONObject to * parse its text/* w w w . ja v a2s.com*/ * * @param w the text to parse * @return a JSONObject * @throws SerDeException */ @Override public Object deserialize(Writable w) throws SerDeException { Text rowText = (Text) w; deserializedDataSize = rowText.getBytes().length; // Try parsing row into JSON object Object jObj = null; try { String txt = rowText.toString().trim(); if (txt.startsWith("{")) { jObj = new JSONObject(txt); } else if (txt.startsWith("[")) { jObj = new JSONArray(txt); } } catch (JSONException e) { // If row is not a JSON object, make the whole row NULL onMalformedJson("Row is not a valid JSON Object - JSONException: " + e.getMessage()); try { jObj = new JSONObject("{}"); } catch (JSONException ex) { onMalformedJson("Error parsing empty row. This should never happen."); } } return jObj; }
From source file:org.openx.data.jsonserde.JsonSerDe.java
License:Open Source License
/** * Hive will call this to serialize an object. Returns a writable object * of the same class returned by <a href="#getSerializedClass">getSerializedClass</a> * //w w w . j a v a 2 s. co m * @param obj The object to serialize * @param objInspector The ObjectInspector that knows about the object's structure * @return a serialized object in form of a Writable. Must be the * same type returned by <a href="#getSerializedClass">getSerializedClass</a> * @throws SerDeException */ @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { // make sure it is a struct record if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } JSONObject serializer = serializeStruct(obj, (StructObjectInspector) objInspector, columnNames); Text t = new Text(serializer.toString()); serializedDataSize = t.getBytes().length; return t; }
From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * //from w w w. j a v a 2 s . c o m * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); return new RecordWriter() { @SuppressWarnings("deprecation") public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; String strReplace = tr.toString().toLowerCase().replace(":", "::"); Text txtReplace = new Text(); txtReplace.set(strReplace); outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength()); // outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:org.platform.utils.bigdata.hive.CustomOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * //from ww w. j a va 2 s . c o m * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); return new RecordWriter() { @SuppressWarnings("deprecation") @Override public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; String strReplace = tr.toString().replace(":", "::"); Text txtReplace = new Text(); txtReplace.set(strReplace); outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength()); // outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } @Override public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:org.springframework.data.hadoop.store.input.TextFileReader.java
License:Apache License
@Override public String read() throws IOException { if (streamsHolder == null) { streamsHolder = getInput(getPath()); lineReader = new LineReader(streamsHolder.getStream(), delimiter); }/*from www. j a va 2s . c o m*/ Text text = new Text(); lineReader.readLine(text); byte[] value = text.getBytes(); return value != null && value.length > 0 ? new String(value) : null; }
From source file:org.springframework.data.hadoop.store.input.TextSequenceFileReader.java
License:Apache License
@Override public String read() throws IOException { if (reader == null) { reader = getInput();/*from www .j a v a 2s .co m*/ } Text k = new Text(); Text v = new Text(); reader.next(k, v); byte[] value = v.getBytes(); return value != null && value.length > 0 ? new String(value) : null; }
From source file:org.springframework.data.hadoop.store.text.DelimitedTextStorage.java
License:Apache License
@Override public synchronized StorageReader getStorageReader(Path path) throws IOException { if (lineReader == null) { lineReader = new LineReader(getInput(path).getStream(), getConfiguration()); }/* ww w . ja v a 2s .c o m*/ return new StorageReader() { @Override public byte[] read() throws IOException { Text text = new Text(); lineReader.readLine(text); return text.getBytes(); } }; }
From source file:org.springframework.data.hadoop.store.text.DelimitedTextStorage.java
License:Apache License
@Override public synchronized StorageReader getStorageReader(final InputSplit inputSplit) throws IOException { StorageReader splitStorageReader = splitStorageReaders.get(inputSplit); if (splitStorageReader == null) { final StreamsHolder<InputStream> holder = getInput(inputSplit); final LineReader splitReader = new LineReader(holder.getStream(), getConfiguration()); splitLineReaders.put(inputSplit, splitReader); final long startx; final long endx; if (holder.getStream() instanceof SplitCompressionInputStream) { startx = ((SplitCompressionInputStream) holder.getStream()).getAdjustedStart(); endx = ((SplitCompressionInputStream) holder.getStream()).getAdjustedEnd(); } else {//from ww w . jav a2s . c o m startx = inputSplit.getStart(); endx = startx + inputSplit.getLength(); } if (log.isDebugEnabled()) { log.debug("Split start=" + startx + " end=" + endx); } splitStorageReader = new StorageReader() { Seekable seekable = (Seekable) holder.getStream(); long start = startx; long end = endx; long pos = start; @Override public byte[] read() throws IOException { long position = getFilePosition(); if (position <= end) { Text text = new Text(); int newSize = splitReader.readLine(text); pos += newSize; return text.getBytes(); } else { return null; } } private long getFilePosition() throws IOException { long retVal; if (getCodec() != null && seekable != null) { retVal = seekable.getPos(); } else { retVal = pos; } return retVal; } }; splitStorageReaders.put(inputSplit, splitStorageReader); } return splitStorageReader; }
From source file:org.teiid.translator.accumulo.AccumuloQueryExecution.java
License:Open Source License
@Override public List<?> next() throws TranslatorException, DataNotAvailableException { SortedMap<Key, Value> rowItems = readNextRow(); boolean rowIdAdded = false; LinkedHashMap<String, byte[]> values = new LinkedHashMap<String, byte[]>(); for (Key key : rowItems.keySet()) { Text cf = key.getColumnFamily(); Text cq = key.getColumnQualifier(); Text rowid = key.getRow(); Value value = rowItems.get(key); Column match = findMatchingColumn(cf, cq); if (!rowIdAdded) { values.put(AccumuloMetadataProcessor.ROWID, rowid.getBytes()); rowIdAdded = true;/*from w w w . j a va2s .c o m*/ } if (match != null) { String valueIn = match.getProperty(AccumuloMetadataProcessor.VALUE_IN, false); // failed to use isolated scanner, but this if check will accomplish the same in getting the // most top value if (values.get(match.getName()) == null) { values.put(match.getName(), buildValue(valueIn, cq, value)); } } } return nextRow(values); }
From source file:org.teiid.translator.accumulo.AccumuloQueryExecution.java
License:Open Source License
private Column findMatchingColumn(Text rowCF, Text rowCQ) { String CF = new String(rowCF.getBytes()); String CQ = new String(rowCQ.getBytes()); Column column = this.visitor.lookupColumn(CF + "/" + CQ); //$NON-NLS-1$ if (column == null) { // this means CQ is not defined; In this pattern CQ is used for value column = this.visitor.lookupColumn(CF); }/*from ww w . j a va 2 s . com*/ return column; }