Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java

License:Apache License

public byte[] indexTextSet(boolean skipSingle, byte[] existingData, Iterable<Text> values, boolean isAnalyzed,
        String fieldName) throws IOException {

    byte[] finalData = null;
    int containerKey = 0;
    int containervalue = 0;
    String[] resultValue = new String[2];
    HSearchTableKVIndex table = new HSearchTableKVIndex();

    if (null != existingData) {
        throw new IOException("Append is not supported for the non repetable analyzed fields");
    }//from w  w  w. ja  v a2 s.c o  m

    String line = null;

    int docType = 1;
    int fieldType = 1;
    String metaDoc = "-";
    boolean flag = true;

    records.clear();
    for (Text text : values) {

        if (null == text)
            continue;
        Arrays.fill(resultValue, null);

        line = text.toString();

        LineReaderUtil.fastSplit(resultValue, line, KVIndexer.FIELD_SEPARATOR);

        containerKey = Integer.parseInt(resultValue[0]);
        if (null == resultValue[1])
            continue;
        containervalue = Integer.parseInt(resultValue[1]);
        table.put(docType, fieldType, metaDoc, containervalue, containerKey, flag);

        if (skipSingle) {
            if (records.size() < 2)
                records.add(containerKey);
        }

    }

    if (skipSingle && records.size() < 2)
        return null;

    finalData = table.toBytes();
    return finalData;
}

From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java

License:Apache License

public byte[] indexTextBitset(boolean skipSingle, byte[] existingData, Iterable<Text> values,
        boolean isAnalyzed, String fieldName, boolean isCompressed) throws IOException {

    byte[] finalData = null;
    int containerKey = 0;

    BitSetWrapper foundIds = null;/*  w ww. ja va2s .c  om*/
    int existingDataLen = (null == existingData) ? 0 : existingData.length;
    if (existingDataLen > 0) {
        byte[] uncompressedData = existingData;
        if (isCompressed) {
            uncompressedData = Snappy.uncompress(existingData, 0, existingDataLen);
        }
        foundIds = SortedBytesBitset.getInstanceBitset().bytesToBitSet(uncompressedData, 0,
                uncompressedData.length);
    } else {
        foundIds = new BitSetWrapper();
    }

    for (Text text : values) {
        if (null == text)
            continue;
        containerKey = Integer.parseInt(text.toString());
        foundIds.set(containerKey);
    }

    if (skipSingle && foundIds.cardinality() < 2)
        return null;

    if (isCompressed)
        finalData = SortedBytesBitsetCompressed.getInstanceBitset().bitSetToBytes(foundIds);
    else
        finalData = SortedBytesBitset.getInstanceBitset().bitSetToBytes(foundIds);

    if (null == finalData)
        return finalData;

    return finalData;
}

From source file:com.bizosys.hsearch.kv.indexing.KVReducerHBase.java

License:Apache License

@Override
protected void reduce(TextPair key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    String rowKeyP1 = key.getFirst().toString();
    String rowKeyP2 = key.getSecond().toString();
    int sepIndex = -1;
    String dataType = null;//www  .  j a  v a 2 s.c om
    int sourceSeq = 0;
    Field fld = null;
    char dataTypeChar = '-';

    if (!rowKeyP1.equals(KVIndexer.MERGEKEY_ROW)) {

        sepIndex = rowKeyP2.indexOf(KVIndexer.FIELD_SEPARATOR);
        dataType = rowKeyP2.substring(0, sepIndex).toLowerCase();
        sourceSeq = Integer.parseInt(rowKeyP2.substring(sepIndex + 1));
        fld = fm.sourceSeqWithField.get(sourceSeq);
        dataTypeChar = KVIndexer.dataTypesPrimitives.get(dataType);

    }

    byte[] existingData = null;

    if (fm.append) {
        List<NVBytes> cells = HReader.getCompleteRow(fm.tableName, rowKeyP1.getBytes());
        if (null != cells) {
            if (cells.size() > 0) {
                NVBytes nvBytes = cells.get(0);
                if (null != nvBytes)
                    existingData = nvBytes.data;
            }
        }

    }

    StringBuilder rowKeyText = new StringBuilder(rowKeyP1);

    try {

        byte[] finalData = reducerUtil.cookBytes(rowKeyText, values, existingData, fld, dataTypeChar);
        if (null == finalData)
            return;

        Put put = new Put(rowKeyText.toString().getBytes());
        put.add(KVIndexer.FAM_NAME, KVIndexer.COL_NAME, finalData);

        context.write(null, put);

    } catch (NumberFormatException ex) {
        IdSearchLog.l.fatal("Error at index reducer", ex);
        Iterator<Text> itr = values.iterator();
        while (itr.hasNext()) {
            Text text = itr.next();
            IdSearchLog.l.fatal(text.toString());
        }
        throw new IOException("Error indexing data because " + ex.getMessage());
    }
}

From source file:com.bizosys.hsearch.kv.indexing.MapFileSizeReader.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 1) {
        System.out.println("Usage: " + MapFileSizeReader.class + " <<hdfs-filepath>> <<key>>");
        System.exit(1);//from   w  w w .  j ava  2  s. com
    }

    String hdfsFilePath = args[0].trim();
    String askedKey = null;
    if (args.length == 2)
        askedKey = (args[1].trim());

    MapFile.Reader reader = null;
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(hdfsFilePath), conf);
        reader = new MapFile.Reader(fs, hdfsFilePath, conf);

        if (null == askedKey) {
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                if (null == value)
                    System.out.println(key.toString() + "\t0");
                System.out.println(key.toString() + "\t" + value.getLength());
            }
        } else {
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            key.set(askedKey.getBytes());
            BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            reader.get(key, value);
            System.out.println(key.toString() + "\t" + value.getLength());
        }
    } catch (Exception e) {
        System.err.println("Error in reading from HDFSFilepath:" + hdfsFilePath);
        e.printStackTrace(System.out);
    } finally {
        IOUtils.closeStream(reader);
    }

}

From source file:com.blackberry.logdriver.mapred.boom.BoomFilterMapper.java

License:Apache License

@Override
public void map(LogLineData key, Text value, OutputCollector<LogLineData, Text> output, Reporter reporter)
        throws IOException {
    if (filters == null || filters.size() == 0) {
        throw new IOException("No filters found.");
    }//from ww w .  j av a  2s . c o m

    Filter thisFilter = null;
    for (int i = 0; i < filters.size(); i++) {
        thisFilter = filters.get(i);

        // Check for a match
        if (thisFilter.accept(value.toString())) {
            output.collect(key, value);

            // Reorder the filters, if necessary.
            Filter tmpFilter;
            while (i > 0 && thisFilter.getNumMatches() > filters.get(i - 1).getNumMatches()) {
                // move this filter up..
                tmpFilter = filters.get(i - 1);
                filters.set(i - 1, thisFilter);
                filters.set(i, tmpFilter);
                i--;

                LOG.info("Filter set reordered.  Currently: {}", filters);
            }

            // Stop processing filters.
            break;
        }
    }
}

From source file:com.blackberry.logdriver.mapred.boom.ReBoomRecordWriter.java

License:Apache License

@Override
public void write(LogLineData key, Text value) throws IOException {
    writer.writeLine(key.getTimestamp(), value.toString(), key.getEventId(), key.getCreateTime(),
            key.getBlockNumber());//from   ww  w.ja v  a 2s .  c o  m
}

From source file:com.blackberry.logdriver.mapreduce.boom.BoomFilterMapper.java

License:Apache License

@Override
protected void map(LogLineData key, Text value, Context context) throws IOException, InterruptedException {
    if (filters.size() == 0) {
        throw new IOException("No filters found.");
    }//w  w  w  . j av a  2 s .co m

    Filter thisFilter = null;
    for (int i = 0; i < filters.size(); i++) {
        thisFilter = filters.get(i);

        // Check for a match
        if (thisFilter.accept(value.toString())) {
            context.write(key, value);

            // Reorder the filters, if necessary.
            Filter tmpFilter;
            while (i > 0 && thisFilter.getNumMatches() > filters.get(i - 1).getNumMatches()) {
                // move this filter up..
                tmpFilter = filters.get(i - 1);
                filters.set(i - 1, thisFilter);
                filters.set(i, tmpFilter);
                i--;

                LOG.info("Filter set reordered.  Currently: {}", filters);
            }

            // Stop processing filters.
            break;
        }
    }
}

From source file:com.boozallen.cognition.lens.Column.java

License:Apache License

/**
 * Initialize a column./*w w  w.  ja  v a 2 s . c om*/
 * @param source -- the source of the data
 * @param cf -- the column family in accumulo
 * @param cq -- the column qualifier in accumulo
 */
public Column(Source source, Text cf, Text cq) {
    this(source, cf.toString(), cq.toString());
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingMapper.java

License:Apache License

@Override
protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException {

    String[] items = splitter.split(input.toString());
    Set<String> uniqueItems = Sets.newHashSet(Arrays.asList(items));
    for (String item : uniqueItems) {
        if (item.trim().isEmpty()) {
            continue;
        }//from w  ww .j  av  a 2s  .  c om
        context.setStatus("Parallel Counting Mapper: " + item);
        context.write(new Text(item), ONE);
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException {

    String[] items = splitter.split(input.toString());

    OpenIntHashSet itemSet = new OpenIntHashSet();

    for (String item : items) {
        if (fMap.containsKey(item) && !item.trim().isEmpty()) {
            itemSet.add(fMap.get(item));
        }/*from   w  w w .  j  ava2  s .  co  m*/
    }

    IntArrayList itemArr = new IntArrayList(itemSet.size());
    itemSet.keys(itemArr);
    itemArr.sort();

    OpenIntHashSet groups = new OpenIntHashSet();
    for (int j = itemArr.size() - 1; j >= 0; j--) {
        // generate group dependent shards
        int item = itemArr.get(j);
        int groupID = PFPGrowth.getGroup(item, maxPerGroup);

        if (!groups.contains(groupID)) {
            IntArrayList tempItems = new IntArrayList(j + 1);
            tempItems.addAllOfFromTo(itemArr, 0, j);
            context.setStatus("Parallel FPGrowth: Generating Group Dependent transactions for: " + item);
            wGroupID.set(groupID);
            context.write(wGroupID, new TransactionTree(tempItems, 1L));
        }
        groups.add(groupID);
    }

}