Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:org.hypertable.hadoop.mapred.HypertableRecordReader.java

License:Open Source License

public boolean next(Text key, Text value) throws IOException {
    try {/*from   w  w  w. j  a v a  2 s . c  o  m*/
        if (m_eos)
            return false;
        if (m_cells == null || !m_iter.hasNext()) {
            m_cells = m_client.scanner_get_cells(m_scanner);
            if (m_cells.isEmpty()) {
                m_eos = true;
                return false;
            }
            m_iter = m_cells.iterator();
        }
        Cell cell = m_iter.next();
        fill_key(key, cell.key);
        m_bytes_read += 24 + cell.key.row.length();
        if (cell.value == null || !cell.value.hasRemaining()) {
            value.set("");
        } else {
            // do not escape string?
            if (m_no_escape == true) {
                m_bytes_read += cell.value.remaining();
                value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(),
                        cell.value.remaining());
            }
            // escape string?
            else {
                byte[] buf = cell.value.array();
                int pos = cell.value.arrayOffset() + cell.value.position();
                int len = (int) cell.value.remaining();

                // check if we have to escape
                boolean escape = false;
                for (int i = pos; i < pos + len; i++) {
                    if (buf[i] == '\n' || buf[i] == '\t' || buf[i] == '\0' || buf[i] == '\\') {
                        escape = true;
                        break;
                    }
                }
                // no need to escape; copy the original value
                if (!escape) {
                    m_bytes_read += cell.value.remaining();
                    value.set(cell.value.array(), cell.value.arrayOffset() + cell.value.position(),
                            cell.value.remaining());
                }
                // otherwise escape into a temporary ByteBuffer
                else {
                    byte[] bb = new byte[len * 2];
                    int j = 0;
                    for (int i = pos; i < pos + len; i++) {
                        if (buf[i] == '\t') {
                            bb[j++] = '\\';
                            bb[j++] = 't';
                        } else if (buf[i] == '\n') {
                            bb[j++] = '\\';
                            bb[j++] = 'n';
                        } else if (buf[i] == '\0') {
                            bb[j++] = '\\';
                            bb[j++] = '0';
                        } else if (buf[i] == '\\') {
                            bb[j++] = '\\';
                            bb[j++] = '\\';
                        } else
                            bb[j++] = buf[i];
                    }
                    m_bytes_read += j;
                    value.set(bb, 0, j);
                }
            }
        }

        if (cell.key.column_qualifier != null)
            m_bytes_read += cell.key.column_qualifier.length();
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    }
    return true;
}

From source file:org.lilyproject.mapreduce.testjobs.Test1Mapper.java

License:Apache License

public void map(RecordIdWritable key, RecordWritable value, Context context)
        throws IOException, InterruptedException {

    Text keyOut = new Text();
    Text valueOut = new Text();

    // TODO do something useful
    keyOut.set("foo");
    valueOut.set("bar");

    context.write(keyOut, valueOut);/*from w ww  .  j av  a  2 s  .com*/
}

From source file:org.pentaho.hadoop.mapreduce.converter.converters.KettleTypeToTextConverter.java

License:Apache License

@Override
public Text convert(ValueMetaInterface meta, Object obj) throws TypeConversionException {
    try {/*w  w w.j a v  a  2  s .  c  om*/
        Text text = new Text();
        text.set(meta.getString(obj));
        return text;
    } catch (KettleValueException ex) {
        throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class, "ErrorConverting",
                Text.class.getSimpleName(), obj), ex);
    }
}

From source file:org.pentaho.hadoop.mapreduce.converter.converters.LongWritableToTextConverter.java

License:Apache License

@Override
public Text convert(ValueMetaInterface meta, LongWritable obj) throws TypeConversionException {
    Text result = new Text();
    result.set(String.valueOf(obj.get()));
    return result;
}

From source file:org.pentaho.hadoop.mapreduce.MockRecordReader.java

License:Apache License

@Override
public boolean next(Text key, Text value) throws IOException {
    if (!rowIter.hasNext()) {
        return false;
    }/*from  w  ww  .j a  va2s . c o  m*/
    rowNum++;
    key.set(String.valueOf(rowNum));
    value.set(rowIter.next());
    return true;
}

From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * /*w  w w  . j  a  v a  2s. co m*/
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().toLowerCase().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                //          outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.platform.utils.bigdata.hive.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * //from  w  ww . j  a  v a2  s .  com
 * @param jc
 *            the job configuration file
 * @param outPath
 *            the final output file to be created
 * @param valueClass
 *            the value class used for create
 * @param isCompressed
 *            whether the content is compressed or not
 * @param tableProperties
 *            the tableProperties of this file's corresponding table
 * @param progress
 *            progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                // outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * Method to safe the model/*w w  w.  ja va 2s.c  o  m*/
 * @throws IOException
 */
public void safe(String safeKey) throws IOException {
    /**
     * New Model training changes the key. Inference can only safe the model if its key is still valid. Thus since inference job start and end no new model was calculated
     */
    if (!this.key.equals(safeKey)) {
        if (log.isInfoEnabled()) {
            log.info("Storing model Failed. Modelkey Changed");
        }
        return;
    }

    if (itemFeatures != null) {
        Path model = path.suffix("/itemFeature.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, VectorWritable.class);
        for (String itemid : itemFeatures.keySet()) {
            Text id = new Text();
            VectorWritable val = new VectorWritable();
            id.set(itemid);
            val.set(itemFeatures.get(itemid));
            w.append(id, val);
        }
        Closeables.close(w, false);
    }
    if (indexItem != null) {
        Path model = path.suffix("/indexItem.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, IntWritable.class, Text.class);
        for (Integer itemid : indexItem.keySet()) {
            IntWritable key = new IntWritable();
            Text val = new Text();
            key.set(itemid);
            val.set(indexItem.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (itemIndex != null) {
        Path model = path.suffix("/itemIndex.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, IntWritable.class);
        for (String itemid : itemIndex.keySet()) {
            IntWritable val = new IntWritable();
            Text key = new Text();
            key.set(itemid);
            val.set(itemIndex.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (log.isInfoEnabled()) {
        log.info("LDA Model Safed");
    }
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * Key is set to handle concurent writes from DocumentTopicInferenceTrainer and LDATrainer
 * @throws IOException//from  w w  w  . j  a  v  a2  s.co  m
 */
private void writeKey(String key) throws IOException {
    Path keyPath = path.suffix("/key.txt");
    Writer w = SequenceFile.createWriter(fs, lconf, keyPath, IntWritable.class, Text.class);
    IntWritable id = new IntWritable();
    Text val = new Text();
    id.set(1);
    val.set(key);
    w.append(id, val);
    Closeables.close(w, false);
}

From source file:org.pooledtimeseries.cartesian.CartesianRecordReader.java

License:Apache License

@Override
public boolean next(Text key, BytesWritable value) throws IOException {

    do {//from   ww w  . ja va  2  s  .  c o m
        // If we are to go to the next left key/value pair
        if (goToNextLeft) {
            // Read the next key value pair, false means no more pairs
            if (!leftRR.next(lkey, lvalue)) {
                // If no more, then this task is nearly finished
                alldone = true;
                break;
            } else {
                // If we aren't done, set the value to the key and set
                // our flags
                goToNextLeft = alldone = false;

                // Reset the right record reader
                this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter);
            }

            if (this.pairWithItself) {
                // shifting right data set to avoid repeated pairs
                // we consider a,b == b,a
                for (int i = 0; i < rightShiftCount; i++) {
                    rightRR.next(rkey, rvalue);
                }
                rightShiftCount++;
            }
        }

        // Read the next key value pair from the right data set
        if (rightRR.next(rkey, rvalue)) {
            // If success, set key and value for left and right splits
            key.set(lkey.toString() + "~" + rkey.toString());
            // Merge FeatureVector of both videos
            // Order is important and should be same as order of key
            List<FeatureVector> featureList = (List<FeatureVector>) PoTSerialiser.getObject(lvalue.getBytes());
            featureList.addAll((List<FeatureVector>) PoTSerialiser.getObject(rvalue.getBytes()));
            byte[] featureListBytes = PoTSerialiser.getBytes(featureList);
            value.set(featureListBytes, 0, featureListBytes.length);

            // This assumes that key will always be unique among all splits
            if (lkey.toString().equals(rkey.toString())) {
                this.pairWithItself = true;
            }
        } else {
            // Otherwise, this right data set is complete
            // and we should go to the next left pair
            goToNextLeft = true;
        }

        // This loop will continue if we finished reading key/value
        // pairs from the right data set
    } while (goToNextLeft);

    if (alldone) {
        // reset shift counter
        rightShiftCount = 1;
        this.pairWithItself = false;
    }
    // Return true if a key/value pair was read, false otherwise
    return !alldone;
}