Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterable<VectorWritable> values, Context context)
        throws IOException, InterruptedException {
    for (VectorWritable vectorWritable : values) {
        skmeans.cluster(vectorWritable.get());
    }/*from  w w  w  .ja  v a2s.c o  m*/

    Text identifier = new Text();
    StreamingKMeansCluster cluster = null;
    Searcher centroids = skmeans.getCentroids();
    for (MatrixSlice matrixSlice : centroids) {
        cluster = StreamingKMeansCluster.getStreamingKMeansCluster(matrixSlice.vector(), distance);
        identifier.set(cluster.getIdentifier());
        context.write(identifier, cluster);
    }
}

From source file:edu.indiana.d2i.htrc.util.Utilities.java

License:Apache License

public static void Dictionary2SeqFile(String input, String output) throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(input));

    Configuration conf = new Configuration();
    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(output),
            Text.class, IntWritable.class);

    String line = null;/*from w  w w. j  a  v  a 2 s  .  com*/
    Text key = new Text();
    IntWritable value = new IntWritable();
    int count = 0;
    while ((line = reader.readLine()) != null) {
        key.set(line);
        value.set(count++);
        writer.append(key, value);
    }

    writer.close();
    reader.close();
}

From source file:edu.isi.mavuno.extract.ChunkExtractor.java

License:Apache License

private void loadChunkPairs() {
    // clear chunk pairs
    mChunkPairs.clear();//from   w w  w  .  j a  v  a 2 s . c  om

    // get sentence
    SentenceWritable<TratzParsedTokenWritable> sentence = mSentIter.next();

    // extract chunks from sentence
    mChunks.clear();
    mChunkTokens.clear();
    List<TratzParsedTokenWritable> tokens = sentence.getTokens();
    Text lastNETag = new Text();
    for (int i = 0; i < tokens.size(); i++) {
        TratzParsedTokenWritable t = tokens.get(i);
        byte chunkType = t.getChunkTag().getLength() > 0 ? t.getChunkTag().getBytes()[0] : 0;
        Text neTag = t.getNETag();
        if (neTag.compareTo(lastNETag.getBytes(), 0, lastNETag.getLength()) != 0
                || (neTag.getLength() == 1 && (neTag.getLength() > 0 && neTag.getBytes()[0] == 'O'))
                        && (chunkType == 'B' || chunkType == 'O')) {
            if (mChunkTokens.size() > 0) { // && mChunkType.getBytes()[0] != 'O') {
                Text chunk = createChunk(mChunkTokens, mChunkType);
                mChunks.add(chunk);
            }
            mChunkTokens.clear();
            mChunkType.set(t.getChunkTag());
        }
        mChunkTokens.add(t.getToken());
        lastNETag.set(neTag);
    }

    // handle last chunk in sentence
    if (mChunkTokens.size() > 0) { // && mChunkType.getBytes()[0] != 'O') {
        Text chunk = createChunk(mChunkTokens, mChunkType);
        mChunks.add(chunk);
    }

    // generate adjacent (context, pattern) pairs
    for (int patternPos = 0; patternPos < mChunks.size() - 1; patternPos++) {
        Text leftPattern = new Text();
        leftPattern.append(mChunks.get(patternPos).getBytes(), 0, mChunks.get(patternPos).getLength());
        leftPattern.append(ADJACENT_PATTERN_NAME.getBytes(), 0, ADJACENT_PATTERN_NAME.getLength());
        addPair(mChunks.get(patternPos), leftPattern, mChunks.get(patternPos + 1));

        Text rightPattern = new Text();
        rightPattern.append(ADJACENT_PATTERN_NAME.getBytes(), 0, ADJACENT_PATTERN_NAME.getLength());
        rightPattern.append(mChunks.get(patternPos + 1).getBytes(), 0, mChunks.get(patternPos + 1).getLength());
        addPair(mChunks.get(patternPos), rightPattern, mChunks.get(patternPos + 1));
    }

    // generate non-adjacent (context, pattern) pairs based on chunks
    for (int patternPos = 0; patternPos < mChunks.size(); patternPos++) {
        for (int leftSkip = 0; leftSkip <= mMaxSkipSize; leftSkip++) {
            if (patternPos - leftSkip - 1 < 0) {
                continue;
            }

            if (mOrContextStyle && !mRightOnlyContextStyle) {
                addPair(mChunks.get(patternPos - leftSkip - 1), mChunks.get(patternPos),
                        ContextPatternWritable.ASTERISK);
            }

            if (mOrContextStyle && mLeftOnlyContextStyle) {
                continue;
            }

            for (int rightSkip = 0; rightSkip <= mMaxSkipSize; rightSkip++) {
                if (patternPos + rightSkip + 1 >= mChunks.size()) {
                    continue;
                }

                // construct (context, pattern) pair
                if (mOrContextStyle) {
                    addPair(ContextPatternWritable.ASTERISK, mChunks.get(patternPos),
                            mChunks.get(patternPos + rightSkip + 1));
                } else {
                    addPair(mChunks.get(patternPos - leftSkip - 1), mChunks.get(patternPos),
                            mChunks.get(patternPos + rightSkip + 1));
                }
            }
        }
    }

    // get iterator
    mChunkPairsIter = mChunkPairs.iterator();
}

From source file:edu.isi.mavuno.nlp.NLProcTools.java

License:Apache License

private static Text extractGeneralizedChunk(int chunkStart, int chunkEnd, List<TratzParsedTokenWritable> tokens,
        boolean appendPOSTag) {
    Text chunk = new Text();

    int startPos;
    for (startPos = chunkEnd; startPos >= chunkStart; startPos--) {
        if (!tokens.get(startPos).getPosTag().toString().startsWith("NN")) {
            break;
        }/* w ww.ja  v  a  2 s  .  com*/
    }

    if (startPos != chunkStart - 1 && startPos != chunkEnd) {
        Text generalChunk = extractMainChunk(startPos + 1, chunkEnd + 1, tokens, appendPOSTag);
        if (generalChunk != null) {
            chunk.set(generalChunk);
        }
    }

    // return null if no valid terms are found
    if (chunk.getLength() == 0) {
        return null;
    }

    return chunk;
}

From source file:edu.isi.mavuno.util.TokenWritable.java

License:Apache License

protected static void safeSet(Text t, Text s) {
    if (s == null) {
        t.clear();/*from  w  w  w  .  java2s  .c  om*/
    } else {
        t.set(s);
    }
}

From source file:edu.isi.mavuno.util.TokenWritable.java

License:Apache License

protected static void safeSet(Text t, String s) {
    if (s == null) {
        t.clear();/* w  w  w .j av a2s.  c  om*/
    } else {
        t.set(s);
    }
}

From source file:edu.jhuapl.tinkerpop.AccumuloGraph.java

License:Apache License

public Vertex getVertex(Object id) {
    if (id == null) {
        throw ExceptionFactory.vertexIdCanNotBeNull();
    }/*from   ww w.  jav  a2s. c o m*/
    String myID;
    try {
        myID = (String) id;
    } catch (ClassCastException e) {
        return null;
    }

    Vertex vertex = null;
    if (vertexCache != null) {
        vertex = vertexCache.retrieve(myID);
        if (vertex != null) {
            return vertex;
        }
    }

    vertex = new AccumuloVertex(this, myID);

    Scanner scan = null;
    try {
        if (!config.skipExistenceChecks()) {
            // in addition to just an "existence" check, we will also load
            // any "preloaded" properties now, which saves us a round-trip
            // to Accumulo later...
            scan = getElementScanner(Vertex.class);
            scan.setRange(new Range(myID));
            scan.fetchColumn(TLABEL, TEXISTS);

            String[] preload = config.getPreloadedProperties();
            if (preload != null) {
                // user has requested specific properties...
                Text colf = new Text("");
                for (String key : preload) {
                    if (StringFactory.LABEL.equals(key)) {
                        colf.set(AccumuloGraph.LABEL);
                    } else {
                        colf.set(key);
                    }
                    scan.fetchColumnFamily(colf);
                }
            }

            Iterator<Entry<Key, Value>> iter = scan.iterator();
            if (!iter.hasNext()) {
                return null;
            }

            preloadProperties(iter, (AccumuloElement) vertex);

        }
    } finally {
        if (scan != null) {
            scan.close();
        }
    }

    if (vertexCache != null) {
        vertexCache.cache(vertex);
    }
    return vertex;
}

From source file:edu.jhuapl.tinkerpop.AccumuloGraph.java

License:Apache License

void preloadProperties(AccumuloElement element, Class<? extends Element> type) {
    String[] toPreload = config.getPreloadedProperties();
    if (toPreload == null) {
        return;/*ww  w .  j a  v a 2 s.  c o m*/
    }

    Scanner s = getElementScanner(type);
    s.setRange(new Range(element.getId().toString()));

    // user has requested specific properties...
    Text colf = new Text("");
    for (String key : toPreload) {
        if (StringFactory.LABEL.equals(key)) {
            colf.set(AccumuloGraph.LABEL);
        } else {
            colf.set(key);
        }
        s.fetchColumnFamily(colf);
    }

    Iterator<Entry<Key, Value>> iter = s.iterator();
    // Integer timeout = config.getPropertyCacheTimeoutMillis(); // Change this
    while (iter.hasNext()) {
        Entry<Key, Value> entry = iter.next();
        Object val = AccumuloByteSerializer.desserialize(entry.getValue().get());
        element.cacheProperty(entry.getKey().getColumnFamily().toString(), val,
                config.getPropertyCacheTimeoutMillis(entry.getKey().getColumnFamily().toString()));
    }
    s.close();
}

From source file:edu.mit.ll.graphulo.pig.backend.GraphuloOneTableStorage.java

License:Apache License

@Override
protected Tuple getTuple(Key key, Value value) throws IOException {
    SortedMap<Key, Value> rowKVs = WholeRowIterator.decodeRow(key, value);
    Tuple tuple = TupleFactory.getInstance().newTuple(columns.size() + 1);

    final Text cfHolder = new Text();
    final Text cqHolder = new Text();
    final Text row = key.getRow();
    int tupleOffset = 0;

    tuple.set(tupleOffset, new DataByteArray(Text.decode(row.getBytes(), 0, row.getLength())));

    for (Column column : this.columns) {
        tupleOffset++;//from  ww w . ja v a  2s. co  m

        switch (column.getType()) {
        case LITERAL:
            cfHolder.set(column.getColumnFamily());
            if (null != column.getColumnQualifier()) {
                cqHolder.set(column.getColumnQualifier());
            } else {
                cqHolder.set(EMPTY_TEXT);
            }

            // Get the key where our literal would exist (accounting for
            // "colf:colq" or "colf:" empty colq)
            Key literalStartKey = new Key(row, cfHolder, cqHolder);

            SortedMap<Key, Value> tailMap = rowKVs.tailMap(literalStartKey);

            // Find the element
            if (tailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                Key actualKey = tailMap.firstKey();

                // Only place it in the tuple if it matches the user
                // request, avoid using a value from a
                // key with the wrong colqual
                if (0 == literalStartKey.compareTo(actualKey, PartialKey.ROW_COLFAM_COLQUAL)) {
                    tuple.set(tupleOffset, new DataByteArray(tailMap.get(actualKey).get()));
                } else {
                    // This row doesn't have the column we were looking for
                    tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
                }
            }

            break;
        case COLFAM_PREFIX:
            cfHolder.set(column.getColumnFamily());
            Range colfamPrefixRange = Range.prefix(row, cfHolder);
            Key colfamPrefixStartKey = new Key(row, cfHolder);

            SortedMap<Key, Value> cfTailMap = rowKVs.tailMap(colfamPrefixStartKey);

            // Find the element
            if (cfTailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>();

                // Build up a map for all the entries in this row that match
                // the colfam prefix
                for (Entry<Key, Value> entry : cfTailMap.entrySet()) {
                    if (colfamPrefixRange.contains(entry.getKey())) {
                        entry.getKey().getColumnFamily(cfHolder);
                        entry.getKey().getColumnQualifier(cqHolder);
                        DataByteArray val = new DataByteArray(entry.getValue().get());

                        // Avoid adding an extra ':' when colqual is empty
                        if (0 == cqHolder.getLength()) {
                            tupleMap.put(cfHolder.toString(), val);
                        } else {
                            tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val);
                        }
                    } else {
                        break;
                    }
                }

                if (!tupleMap.isEmpty()) {
                    tuple.set(tupleOffset, tupleMap);
                }
            }

            break;
        case COLQUAL_PREFIX:
            cfHolder.set(column.getColumnFamily());
            cqHolder.set(column.getColumnQualifier());
            Range colqualPrefixRange = Range.prefix(row, cfHolder, cqHolder);
            Key colqualPrefixStartKey = new Key(row, cfHolder, cqHolder);

            SortedMap<Key, Value> cqTailMap = rowKVs.tailMap(colqualPrefixStartKey);
            if (cqTailMap.isEmpty()) {
                tuple.set(tupleOffset, EMPTY_DATA_BYTE_ARRAY);
            } else {
                HashMap<String, DataByteArray> tupleMap = new HashMap<String, DataByteArray>();

                // Build up a map for all the entries in this row that match
                // the colqual prefix
                for (Entry<Key, Value> entry : cqTailMap.entrySet()) {
                    if (colqualPrefixRange.contains(entry.getKey())) {
                        entry.getKey().getColumnFamily(cfHolder);
                        entry.getKey().getColumnQualifier(cqHolder);
                        DataByteArray val = new DataByteArray(entry.getValue().get());

                        // Avoid the extra ':' on empty colqual
                        if (0 == cqHolder.getLength()) {
                            tupleMap.put(cfHolder.toString(), val);
                        } else {
                            tupleMap.put(cfHolder.toString() + COLON + cqHolder.toString(), val);
                        }
                    } else {
                        break;
                    }
                }

                if (!tupleMap.isEmpty()) {
                    tuple.set(tupleOffset, tupleMap);
                }
            }

            break;
        default:
            break;
        }
    }

    return tuple;
}

From source file:edu.stolaf.cs.wmrserver.streaming.StreamKeyValUtil.java

License:Apache License

/**
 * split a UTF-8 byte array into key and value 
 * assuming that the delimilator is at splitpos. 
 * @param utf utf-8 encoded string/*from  w  w  w .j a  v a  2s. c o m*/
 * @param start starting offset
 * @param length no. of bytes
 * @param key contains key upon the method is returned
 * @param val contains value upon the method is returned
 * @param splitPos the split pos
 * @param separatorLength the length of the separator between key and value
 * @throws IOException
 */
public static void splitKeyVal(byte[] utf, int start, int length, Text key, Text val, int splitPos,
        int separatorLength) throws IOException {
    if (splitPos < start || splitPos >= (start + length))
        throw new IllegalArgumentException(
                "splitPos must be in the range " + "[" + start + ", " + (start + length) + "]: " + splitPos);
    int keyLen = (splitPos - start);
    byte[] keyBytes = new byte[keyLen];
    System.arraycopy(utf, start, keyBytes, 0, keyLen);
    int valLen = (start + length) - splitPos - separatorLength;
    byte[] valBytes = new byte[valLen];
    System.arraycopy(utf, splitPos + separatorLength, valBytes, 0, valLen);
    key.set(keyBytes);
    val.set(valBytes);
}