List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:org.hypertable.hadoop.mapred.HypertableRecordReader.java
License:Open Source License
public boolean next(Text key, Text value) throws IOException { try {/*from w w w. j a v a 2 s . c o m*/ if (m_eos) return false; if (m_cells == null || !m_iter.hasNext()) { m_cells = m_client.scanner_get_cells(m_scanner); if (m_cells.isEmpty()) { m_eos = true; return false; } m_iter = m_cells.iterator(); } Cell cell = m_iter.next(); fill_key(key, cell.key); m_bytes_read += 24 + cell.key.row.length(); if (cell.value == null || !cell.value.hasRemaining()) { value.set(""); } else { // do not escape string? if (m_no_escape == true) { m_bytes_read += cell.value.remaining(); value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(), cell.value.remaining()); } // escape string? else { byte[] buf = cell.value.array(); int pos = cell.value.arrayOffset() + cell.value.position(); int len = (int) cell.value.remaining(); // check if we have to escape boolean escape = false; for (int i = pos; i < pos + len; i++) { if (buf[i] == '\n' || buf[i] == '\t' || buf[i] == '\0' || buf[i] == '\\') { escape = true; break; } } // no need to escape; copy the original value if (!escape) { m_bytes_read += cell.value.remaining(); value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(), cell.value.remaining()); } // otherwise escape into a temporary ByteBuffer else { byte[] bb = new byte[len * 2]; int j = 0; for (int i = pos; i < pos + len; i++) { if (buf[i] == '\t') { bb[j++] = '\\'; bb[j++] = 't'; } else if (buf[i] == '\n') { bb[j++] = '\\'; bb[j++] = 'n'; } else if (buf[i] == '\0') { bb[j++] = '\\'; bb[j++] = '0'; } else if (buf[i] == '\\') { bb[j++] = '\\'; bb[j++] = '\\'; } else bb[j++] = buf[i]; } m_bytes_read += j; value.set(bb, 0, j); } } } if (cell.key.column_qualifier != null) m_bytes_read += cell.key.column_qualifier.length(); } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (TException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } return true; }
From source file:org.lilyproject.mapreduce.testjobs.Test1Mapper.java
License:Apache License
public void map(RecordIdWritable key, RecordWritable value, Context context) throws IOException, InterruptedException { Text keyOut = new Text(); Text valueOut = new Text(); // TODO do something useful keyOut.set("foo"); valueOut.set("bar"); context.write(keyOut, valueOut);/*from w ww . j av a 2 s .com*/ }
From source file:org.pentaho.hadoop.mapreduce.converter.converters.KettleTypeToTextConverter.java
License:Apache License
@Override public Text convert(ValueMetaInterface meta, Object obj) throws TypeConversionException { try {/*w w w.j a v a 2 s . c om*/ Text text = new Text(); text.set(meta.getString(obj)); return text; } catch (KettleValueException ex) { throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class, "ErrorConverting", Text.class.getSimpleName(), obj), ex); } }
From source file:org.pentaho.hadoop.mapreduce.converter.converters.LongWritableToTextConverter.java
License:Apache License
@Override public Text convert(ValueMetaInterface meta, LongWritable obj) throws TypeConversionException { Text result = new Text(); result.set(String.valueOf(obj.get())); return result; }
From source file:org.pentaho.hadoop.mapreduce.MockRecordReader.java
License:Apache License
@Override public boolean next(Text key, Text value) throws IOException { if (!rowIter.hasNext()) { return false; }/*from w ww .j a va2s . c o m*/ rowNum++; key.set(String.valueOf(rowNum)); value.set(rowIter.next()); return true; }
From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * /*w w w . j a v a 2s. co m*/ * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); return new RecordWriter() { @SuppressWarnings("deprecation") public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; String strReplace = tr.toString().toLowerCase().replace(":", "::"); Text txtReplace = new Text(); txtReplace.set(strReplace); outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength()); // outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:org.platform.utils.bigdata.hive.CustomOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * //from w ww . j a v a2 s . com * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); return new RecordWriter() { @SuppressWarnings("deprecation") @Override public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; String strReplace = tr.toString().replace(":", "::"); Text txtReplace = new Text(); txtReplace.set(strReplace); outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength()); // outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } @Override public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:org.plista.kornakapi.core.training.SemanticModel.java
License:Apache License
/** * Method to safe the model/*w w w. ja va 2s.c o m*/ * @throws IOException */ public void safe(String safeKey) throws IOException { /** * New Model training changes the key. Inference can only safe the model if its key is still valid. Thus since inference job start and end no new model was calculated */ if (!this.key.equals(safeKey)) { if (log.isInfoEnabled()) { log.info("Storing model Failed. Modelkey Changed"); } return; } if (itemFeatures != null) { Path model = path.suffix("/itemFeature.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, VectorWritable.class); for (String itemid : itemFeatures.keySet()) { Text id = new Text(); VectorWritable val = new VectorWritable(); id.set(itemid); val.set(itemFeatures.get(itemid)); w.append(id, val); } Closeables.close(w, false); } if (indexItem != null) { Path model = path.suffix("/indexItem.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, IntWritable.class, Text.class); for (Integer itemid : indexItem.keySet()) { IntWritable key = new IntWritable(); Text val = new Text(); key.set(itemid); val.set(indexItem.get(itemid)); w.append(key, val); } Closeables.close(w, false); } if (itemIndex != null) { Path model = path.suffix("/itemIndex.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, IntWritable.class); for (String itemid : itemIndex.keySet()) { IntWritable val = new IntWritable(); Text key = new Text(); key.set(itemid); val.set(itemIndex.get(itemid)); w.append(key, val); } Closeables.close(w, false); } if (log.isInfoEnabled()) { log.info("LDA Model Safed"); } }
From source file:org.plista.kornakapi.core.training.SemanticModel.java
License:Apache License
/** * Key is set to handle concurent writes from DocumentTopicInferenceTrainer and LDATrainer * @throws IOException//from w w w . j a v a2 s.co m */ private void writeKey(String key) throws IOException { Path keyPath = path.suffix("/key.txt"); Writer w = SequenceFile.createWriter(fs, lconf, keyPath, IntWritable.class, Text.class); IntWritable id = new IntWritable(); Text val = new Text(); id.set(1); val.set(key); w.append(id, val); Closeables.close(w, false); }
From source file:org.pooledtimeseries.cartesian.CartesianRecordReader.java
License:Apache License
@Override public boolean next(Text key, BytesWritable value) throws IOException { do {//from ww w . ja va 2 s . c o m // If we are to go to the next left key/value pair if (goToNextLeft) { // Read the next key value pair, false means no more pairs if (!leftRR.next(lkey, lvalue)) { // If no more, then this task is nearly finished alldone = true; break; } else { // If we aren't done, set the value to the key and set // our flags goToNextLeft = alldone = false; // Reset the right record reader this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter); } if (this.pairWithItself) { // shifting right data set to avoid repeated pairs // we consider a,b == b,a for (int i = 0; i < rightShiftCount; i++) { rightRR.next(rkey, rvalue); } rightShiftCount++; } } // Read the next key value pair from the right data set if (rightRR.next(rkey, rvalue)) { // If success, set key and value for left and right splits key.set(lkey.toString() + "~" + rkey.toString()); // Merge FeatureVector of both videos // Order is important and should be same as order of key List<FeatureVector> featureList = (List<FeatureVector>) PoTSerialiser.getObject(lvalue.getBytes()); featureList.addAll((List<FeatureVector>) PoTSerialiser.getObject(rvalue.getBytes())); byte[] featureListBytes = PoTSerialiser.getBytes(featureList); value.set(featureListBytes, 0, featureListBytes.length); // This assumes that key will always be unique among all splits if (lkey.toString().equals(rkey.toString())) { this.pairWithItself = true; } } else { // Otherwise, this right data set is complete // and we should go to the next left pair goToNextLeft = true; } // This loop will continue if we finished reading key/value // pairs from the right data set } while (goToNextLeft); if (alldone) { // reset shift counter rightShiftCount = 1; this.pairWithItself = false; } // Return true if a key/value pair was read, false otherwise return !alldone; }