Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:microbench.WordCountOnHDFSDataLocal.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException {
    try {/*from  ww w  . j  av  a  2s.  c  om*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                DataMPIUtil.printArgs(args);
            }
            System.out.println("The O task " + rank + " of " + size + " is working...");

            HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir,
                    TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O);
            Text word = new Text();
            IntWritable one = new IntWritable(1);
            LongWritable khead = reader.createKey();
            Text vhead = reader.createValue();
            while (reader.next(khead, vhead)) {
                StringTokenizer itr = new StringTokenizer(vhead.toString());
                while (itr.hasMoreTokens()) {
                    word.set(itr.nextToken());
                    // send key-value
                    MPI_D.Send(word, one);
                }
            }
            reader.close();
        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A);
            System.out.println("The A task " + rank + " of " + size + " is working...");

            HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class,
                    IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            Text oldKey = null;
            IntWritable valueData = new IntWritable();
            int sum = 0;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                Text key = (Text) keyValue[0];
                IntWritable value = (IntWritable) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                    sum = value.get();
                } else {
                    if (key.equals(oldKey)) {
                        sum += value.get();
                    } else {
                        valueData.set(sum);
                        outrw.write(oldKey, valueData);
                        oldKey = key;
                        sum = value.get();
                    }
                }
                keyValue = MPI_D.Recv();
            }
            if (oldKey != null) {
                valueData.set(sum);
                outrw.write(oldKey, valueData);
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mlbench.pagerank.PagerankNaive.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {/*from   w ww .jav  a2  s  .co m*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, edgeDir, rank);
            FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, vecDir, rank);
            FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    IntWritable k = new IntWritable();
                    Text v = new Text();
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        // ignore comments in edge file
                        if (line_text.startsWith("#"))
                            continue;

                        final String[] line = line_text.split("\t");
                        if (line.length < 2)
                            continue;

                        // vector : ROWID VALUE('vNNNN')
                        if (line[1].charAt(0) == 'v') {
                            k.set(Integer.parseInt(line[0]));
                            v.set(line[1]);
                            MPI_D.Send(k, v);
                        } else {
                            /*
                             * In other matrix-vector multiplication, we
                            * output (dst, src) here However, In PageRank,
                            * the matrix-vector computation formula is M^T
                            * * v. Therefore, we output (src,dst) here.
                            */
                            int src_id = Integer.parseInt(line[0]);
                            int dst_id = Integer.parseInt(line[1]);
                            k.set(src_id);
                            v.set(line[1]);
                            MPI_D.Send(k, v);

                            if (make_symmetric == 1) {
                                k.set(dst_id);
                                v.set(line[0]);
                                MPI_D.Send(k, v);
                            }
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " A start.");
            }

            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            int i;
            double cur_rank = 0;
            ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>();
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                // A new key arrives
                if (!key.equals(oldKey)) {
                    outrw.write(oldKey, new Text("s" + cur_rank));
                    int outdeg = dst_nodes_list.size();
                    if (outdeg > 0) {
                        cur_rank = cur_rank / (double) outdeg;
                    }
                    for (i = 0; i < outdeg; i++) {
                        outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                    }
                    oldKey = key;
                    cur_rank = 0;
                    dst_nodes_list = new ArrayList<Integer>();
                }
                // common record
                String line_text = value.toString();
                final String[] line = line_text.split("\t");
                if (line.length == 1) {
                    if (line_text.charAt(0) == 'v') { // vector : VALUE
                        cur_rank = Double.parseDouble(line_text.substring(1));
                    } else { // edge : ROWID
                        dst_nodes_list.add(Integer.parseInt(line[0]));
                    }
                }
                keyValue = MPI_D.Recv();
            }
            // write the left part
            if (cur_rank != 0) {
                outrw.write(oldKey, new Text("s" + cur_rank));
                int outdeg = dst_nodes_list.size();
                if (outdeg > 0) {
                    cur_rank = cur_rank / (double) outdeg;
                }
                for (i = 0; i < outdeg; i++) {
                    outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                }
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mx.itam.metodos.lshclustering.MinhashEmitMapper.java

License:Apache License

@Override
public void map(Text id, IntArrayWritable values, Context context) throws IOException, InterruptedException {
    for (int i = 0; i < functionsCount; i++) {
        hashValues[i] = Integer.MAX_VALUE;
    }/*from   w  ww .jav a  2 s. c  o m*/
    for (int i = 0; i < functionsCount; i++) {
        HashFunction hf = functions[i];
        for (Writable wr : values.get()) {
            IntWritable value = (IntWritable) wr;
            int hash = hf.hashInt(value.get()).asInt();
            if (hash < hashValues[i]) {
                hashValues[i] = hash;
            }
        }
    }
    Text sketch = new Text();
    Hasher hasher = lsh.newHasher();
    int band = 0;
    for (int i = 0; i < functionsCount; i++) {
        hasher.putInt(hashValues[i]);
        if (i > 0 && (i % rows) == 0) {
            sketch.set(band + "-" + hasher.hash().toString());
            context.write(new SecondarySortKey(sketch, id), id);
            hasher = lsh.newHasher();
            band++;
        }
    }
    sketch.set(band + "-" + hasher.hash().toString());
    context.write(new SecondarySortKey(sketch, id), id);
}

From source file:mx.itam.metodos.minhashing.MinhashMapper.java

License:Apache License

@Override
public void map(Text id, IntArrayWritable values, Context ctx) throws IOException, InterruptedException {
    for (int i = 0; i < functionsCount; i++) {
        hashValues[i] = Integer.MAX_VALUE;
    }//from  w ww  . j  a  va 2 s  . co  m
    for (int i = 0; i < functionsCount; i++) {
        HashFunction hf = functions[i];
        for (Writable wr : values.get()) {
            IntWritable value = (IntWritable) wr;
            int hash = hf.hashInt(value.get()).asInt();
            if (hash < hashValues[i]) {
                hashValues[i] = hash;
            }
        }
    }
    Text sketch = new Text();
    Hasher hasher = lsh.newHasher();
    int band = 0;
    for (int i = 0; i < functionsCount; i++) {
        hasher.putInt(hashValues[i]);
        if (i > 0 && (i % rows) == 0) {
            sketch.set(band + "-" + hasher.hash().toString());
            write(id, sketch, ctx);
            hasher = lsh.newHasher();
            band++;
        }
    }
    sketch.set(band + "-" + hasher.hash().toString());
    write(id, sketch, ctx);
}

From source file:mx.iteso.msc.asn.mrwordcount.MyMapper.java

License:Apache License

protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    Text word = new Text();

    StringTokenizer itr = new StringTokenizer(value.toString());
    while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        context.write(word, new IntWritable(1));
    }//from www .  j a  v  a  2 s  .c  o  m
}

From source file:net.peacesoft.nutch.crawl.ReIndexerMapReduce.java

License:Apache License

public void map(Text key, Writable value, OutputCollector<Text, NutchWritable> output, Reporter reporter)
        throws IOException {

    String urlString = filterUrl(normalizeUrl(key.toString()));
    if (urlString == null) {
        return;/*from   w  ww  . ja v a 2 s .c o  m*/
    } else {
        key.set(urlString);
    }

    output.collect(key, new NutchWritable(value));
}

From source file:oracle.kv.hadoop.hive.table.TableHiveRecordReader.java

License:Open Source License

@Override
public boolean next(Text key, Text value) {

    LOG.trace("next [key = " + key + ", value = " + value + "]");

    if (key == null || value == null) {
        return false;
    }/*w  ww.  ja  va2 s . co  m*/
    boolean ret = false;
    try {
        key.clear();
        value.clear();
        ret = v2RecordReader.nextKeyValue();
        if (ret) {
            final Row curRow = v2RecordReader.getCurrentValue();
            assert curRow != null;
            key.set(curRow.createPrimaryKey().toString());
            value.set(curRow.toString());
        }
    } catch (Exception e) {
        LOG.error("TableHiveRecordReader " + this + " caught: " + e, e);
    }
    return ret;
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public void map(Text json, Text nothing, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {//  ww  w.j ava 2s. co m
        Text outJson = new Text();
        String jsonStr = json.toString();

        JSONTokener tokener = new JSONTokener(jsonStr);
        JSONObject jsonObj = new JSONObject(tokener);

        fixATweet(jsonObj);
        if (jsonObj.has("retweeted_status")) {
            Object retweetObj = jsonObj.get("retweeted_status");
            if (retweetObj.toString() != "null") {
                fixATweet((JSONObject) retweetObj);
            }
        }

        outJson.set(jsonObj.toString());
        output.collect(outJson, nothing);
    } catch (EOFException e) {
        // do nothing
    } catch (JSONException e) {
        // do nothing
    }
}

From source file:org.ankus.mapreduce.algorithms.correlation.booleanset.BooleanSetMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String row = value.toString();
    String[] columns = row.split(delimiter);
    StringBuffer uniqueKeyStringBuffer = new StringBuffer();

    for (int i = 0; i < columns.length; i++) {
        String column = columns[i];
        if (i == Integer.parseInt(keyIndex)) {
            uniqueKeyStringBuffer.append(column);
        } else {/*  w  ww . j a v  a2  s  .  c om*/
            continue;
        }
    }

    for (int i = 1; i < columns.length; i++) {
        // If the data value is not equal 0 or 1, the value is 1.
        if (columns[i].equals("0") || columns[i].equals("1")) {
            value.set(columns[i]);
        } else {
            value.set("1");
        }
        TextIntegerPairWritableComparable textIntegerPairWritableComparable = new TextIntegerPairWritableComparable(
                uniqueKeyStringBuffer.toString(), Integer.parseInt(value.toString()));
        context.write(new Text("item-" + i), textIntegerPairWritableComparable);
    }
}

From source file:org.ankus.mapreduce.algorithms.correlation.numericset.NumericSetMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String row = value.toString();
    String[] columns = row.split(delimiter);

    StringBuffer uniqueKeyStringBuffer = new StringBuffer();

    for (int i = 0; i < columns.length; i++) {
        String column = columns[i];
        if (i == Integer.parseInt(keyIndex)) {
            uniqueKeyStringBuffer.append(column);
        } else {//from   w ww .j av a2 s.  c  om
            continue;
        }
    }

    for (int k = 1; k < columns.length; k++) {
        value.set(columns[k]);
        TextDoublePairWritableComparable textDoublePairWritableComparable = new TextDoublePairWritableComparable(
                uniqueKeyStringBuffer.toString(), Double.parseDouble(value.toString()));
        context.write(new Text("item-" + k), textDoublePairWritableComparable);
    }
}