Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:org.hypertable.hadoop.mapred.HypertableRecordReader.java

License:Open Source License

public boolean next(Text key, Text value) throws IOException {
    try {/*from   w  w  w. j  a v a  2 s . c  o  m*/
        if (m_eos)
            return false;
        if (m_cells == null || !m_iter.hasNext()) {
            m_cells = m_client.scanner_get_cells(m_scanner);
            if (m_cells.isEmpty()) {
                m_eos = true;
                return false;
            }
            m_iter = m_cells.iterator();
        }
        Cell cell = m_iter.next();
        fill_key(key, cell.key);
        m_bytes_read += 24 + cell.key.row.length();
        if (cell.value == null || !cell.value.hasRemaining()) {
            value.set("");
        } else {
            // do not escape string?
            if (m_no_escape == true) {
                m_bytes_read += cell.value.remaining();
                value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(),
                        cell.value.remaining());
            }
            // escape string?
            else {
                byte[] buf = cell.value.array();
                int pos = cell.value.arrayOffset() + cell.value.position();
                int len = (int) cell.value.remaining();

                // check if we have to escape
                boolean escape = false;
                for (int i = pos; i < pos + len; i++) {
                    if (buf[i] == '\n' || buf[i] == '\t' || buf[i] == '\0' || buf[i] == '\\') {
                        escape = true;
                        break;
                    }
                }
                // no need to escape; copy the original value
                if (!escape) {
                    m_bytes_read += cell.value.remaining();
                    value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(),
                            cell.value.remaining());
                }
                // otherwise escape into a temporary ByteBuffer
                else {
                    byte[] bb = new byte[len * 2];
                    int j = 0;
                    for (int i = pos; i < pos + len; i++) {
                        if (buf[i] == '\t') {
                            bb[j++] = '\\';
                            bb[j++] = 't';
                        } else if (buf[i] == '\n') {
                            bb[j++] = '\\';
                            bb[j++] = 'n';
                        } else if (buf[i] == '\0') {
                            bb[j++] = '\\';
                            bb[j++] = '0';
                        } else if (buf[i] == '\\') {
                            bb[j++] = '\\';
                            bb[j++] = '\\';
                        } else
                            bb[j++] = buf[i];
                    }
                    m_bytes_read += j;
                    value.set(bb, 0, j);
                }
            }
        }

        if (cell.key.column_qualifier != null)
            m_bytes_read += cell.key.column_qualifier.length();
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    }
    return true;
}

From source file:org.lilyproject.mapreduce.testjobs.Test1Mapper.java

License:Apache License

public void map(RecordIdWritable key, RecordWritable value, Context context)
        throws IOException, InterruptedException {

    Text keyOut = new Text();
    Text valueOut = new Text();

    // TODO do something useful
    keyOut.set("foo");
    valueOut.set("bar");

    context.write(keyOut, valueOut);/*from w ww  .  j av  a  2 s  .com*/
}

From source file:org.pentaho.hadoop.mapreduce.converter.converters.KettleTypeToTextConverter.java

License:Apache License

@Override
public Text convert(ValueMetaInterface meta, Object obj) throws TypeConversionException {
    try {/*w  w w.j a v  a  2  s .  c  om*/
        Text text = new Text();
        text.set(meta.getString(obj));
        return text;
    } catch (KettleValueException ex) {
        throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class, "ErrorConverting",
                Text.class.getSimpleName(), obj), ex);
    }
}

From source file:org.pentaho.hadoop.mapreduce.converter.converters.LongWritableToTextConverter.java

License:Apache License

@Override
public Text convert(ValueMetaInterface meta, LongWritable obj) throws TypeConversionException {
    Text result = new Text();
    result.set(String.valueOf(obj.get()));
    return result;
}

From source file:org.pentaho.hadoop.mapreduce.MockRecordReader.java

License:Apache License

@Override
public boolean next(Text key, Text value) throws IOException {
    if (!rowIter.hasNext()) {
        return false;
    }/*from  w  ww  .j a  va2s . c o  m*/
    rowNum++;
    key.set(String.valueOf(rowNum));
    value.set(rowIter.next());
    return true;
}

From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * /*w  w w  . j  a  v a  2s. co m*/
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().toLowerCase().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                //          outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.platform.utils.bigdata.hive.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * //from  w  ww . j  a  v a2  s .  com
 * @param jc
 *            the job configuration file
 * @param outPath
 *            the final output file to be created
 * @param valueClass
 *            the value class used for create
 * @param isCompressed
 *            whether the content is compressed or not
 * @param tableProperties
 *            the tableProperties of this file's corresponding table
 * @param progress
 *            progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                // outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * Method to safe the model/*w w  w.  ja va 2s.c  o  m*/
 * @throws IOException
 */
public void safe(String safeKey) throws IOException {
    /**
     * New Model training changes the key. Inference can only safe the model if its key is still valid. Thus since inference job start and end no new model was calculated
     */
    if (!this.key.equals(safeKey)) {
        if (log.isInfoEnabled()) {
            log.info("Storing model Failed. Modelkey Changed");
        }
        return;
    }

    if (itemFeatures != null) {
        Path model = path.suffix("/itemFeature.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, VectorWritable.class);
        for (String itemid : itemFeatures.keySet()) {
            Text id = new Text();
            VectorWritable val = new VectorWritable();
            id.set(itemid);
            val.set(itemFeatures.get(itemid));
            w.append(id, val);
        }
        Closeables.close(w, false);
    }
    if (indexItem != null) {
        Path model = path.suffix("/indexItem.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, IntWritable.class, Text.class);
        for (Integer itemid : indexItem.keySet()) {
            IntWritable key = new IntWritable();
            Text val = new Text();
            key.set(itemid);
            val.set(indexItem.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (itemIndex != null) {
        Path model = path.suffix("/itemIndex.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, IntWritable.class);
        for (String itemid : itemIndex.keySet()) {
            IntWritable val = new IntWritable();
            Text key = new Text();
            key.set(itemid);
            val.set(itemIndex.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (log.isInfoEnabled()) {
        log.info("LDA Model Safed");
    }
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * Key is set to handle concurent writes from DocumentTopicInferenceTrainer and LDATrainer
 * @throws IOException//from  w w  w  . j  a  v  a2  s.co  m
 */
private void writeKey(String key) throws IOException {
    Path keyPath = path.suffix("/key.txt");
    Writer w = SequenceFile.createWriter(fs, lconf, keyPath, IntWritable.class, Text.class);
    IntWritable id = new IntWritable();
    Text val = new Text();
    id.set(1);
    val.set(key);
    w.append(id, val);
    Closeables.close(w, false);
}

From source file:org.pooledtimeseries.cartesian.CartesianRecordReader.java

License:Apache License

@Override
public boolean next(Text key, BytesWritable value) throws IOException {

    do {//from   ww w  . ja va  2  s  .  c o m
        // If we are to go to the next left key/value pair
        if (goToNextLeft) {
            // Read the next key value pair, false means no more pairs
            if (!leftRR.next(lkey, lvalue)) {
                // If no more, then this task is nearly finished
                alldone = true;
                break;
            } else {
                // If we aren't done, set the value to the key and set
                // our flags
                goToNextLeft = alldone = false;

                // Reset the right record reader
                this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter);
            }

            if (this.pairWithItself) {
                // shifting right data set to avoid repeated pairs
                // we consider a,b == b,a
                for (int i = 0; i < rightShiftCount; i++) {
                    rightRR.next(rkey, rvalue);
                }
                rightShiftCount++;
            }
        }

        // Read the next key value pair from the right data set
        if (rightRR.next(rkey, rvalue)) {
            // If success, set key and value for left and right splits
            key.set(lkey.toString() + "~" + rkey.toString());
            // Merge FeatureVector of both videos
            // Order is important and should be same as order of key
            List<FeatureVector> featureList = (List<FeatureVector>) PoTSerialiser.getObject(lvalue.getBytes());
            featureList.addAll((List<FeatureVector>) PoTSerialiser.getObject(rvalue.getBytes()));
            byte[] featureListBytes = PoTSerialiser.getBytes(featureList);
            value.set(featureListBytes, 0, featureListBytes.length);

            // This assumes that key will always be unique among all splits
            if (lkey.toString().equals(rkey.toString())) {
                this.pairWithItself = true;
            }
        } else {
            // Otherwise, this right data set is complete
            // and we should go to the next left pair
            goToNextLeft = true;
        }

        // This loop will continue if we finished reading key/value
        // pairs from the right data set
    } while (goToNextLeft);

    if (alldone) {
        // reset shift counter
        rightShiftCount = 1;
        this.pairWithItself = false;
    }
    // Return true if a key/value pair was read, false otherwise
    return !alldone;
}