Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:microbench.WordCountOnHDFSDataLocal.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException {
    try {/*from  ww w  . j  av  a  2s.  c  om*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                DataMPIUtil.printArgs(args);
            }
            System.out.println("The O task " + rank + " of " + size + " is working...");

            HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir,
                    TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O);
            Text word = new Text();
            IntWritable one = new IntWritable(1);
            LongWritable khead = reader.createKey();
            Text vhead = reader.createValue();
            while (reader.next(khead, vhead)) {
                StringTokenizer itr = new StringTokenizer(vhead.toString());
                while (itr.hasMoreTokens()) {
                    word.set(itr.nextToken());
                    // send key-value
                    MPI_D.Send(word, one);
                }
            }
            reader.close();
        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A);
            System.out.println("The A task " + rank + " of " + size + " is working...");

            HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class,
                    IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            Text oldKey = null;
            IntWritable valueData = new IntWritable();
            int sum = 0;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                Text key = (Text) keyValue[0];
                IntWritable value = (IntWritable) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                    sum = value.get();
                } else {
                    if (key.equals(oldKey)) {
                        sum += value.get();
                    } else {
                        valueData.set(sum);
                        outrw.write(oldKey, valueData);
                        oldKey = key;
                        sum = value.get();
                    }
                }
                keyValue = MPI_D.Recv();
            }
            if (oldKey != null) {
                valueData.set(sum);
                outrw.write(oldKey, valueData);
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mlbench.pagerank.PagerankNaive.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {/*from   w ww .jav  a2  s  .co m*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, edgeDir, rank);
            FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, vecDir, rank);
            FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    IntWritable k = new IntWritable();
                    Text v = new Text();
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        // ignore comments in edge file
                        if (line_text.startsWith("#"))
                            continue;

                        final String[] line = line_text.split("\t");
                        if (line.length < 2)
                            continue;

                        // vector : ROWID VALUE('vNNNN')
                        if (line[1].charAt(0) == 'v') {
                            k.set(Integer.parseInt(line[0]));
                            v.set(line[1]);
                            MPI_D.Send(k, v);
                        } else {
                            /*
                             * In other matrix-vector multiplication, we
                            * output (dst, src) here However, In PageRank,
                            * the matrix-vector computation formula is M^T
                            * * v. Therefore, we output (src,dst) here.
                            */
                            int src_id = Integer.parseInt(line[0]);
                            int dst_id = Integer.parseInt(line[1]);
                            k.set(src_id);
                            v.set(line[1]);
                            MPI_D.Send(k, v);

                            if (make_symmetric == 1) {
                                k.set(dst_id);
                                v.set(line[0]);
                                MPI_D.Send(k, v);
                            }
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " A start.");
            }

            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            int i;
            double cur_rank = 0;
            ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>();
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                // A new key arrives
                if (!key.equals(oldKey)) {
                    outrw.write(oldKey, new Text("s" + cur_rank));
                    int outdeg = dst_nodes_list.size();
                    if (outdeg > 0) {
                        cur_rank = cur_rank / (double) outdeg;
                    }
                    for (i = 0; i < outdeg; i++) {
                        outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                    }
                    oldKey = key;
                    cur_rank = 0;
                    dst_nodes_list = new ArrayList<Integer>();
                }
                // common record
                String line_text = value.toString();
                final String[] line = line_text.split("\t");
                if (line.length == 1) {
                    if (line_text.charAt(0) == 'v') { // vector : VALUE
                        cur_rank = Double.parseDouble(line_text.substring(1));
                    } else { // edge : ROWID
                        dst_nodes_list.add(Integer.parseInt(line[0]));
                    }
                }
                keyValue = MPI_D.Recv();
            }
            // write the left part
            if (cur_rank != 0) {
                outrw.write(oldKey, new Text("s" + cur_rank));
                int outdeg = dst_nodes_list.size();
                if (outdeg > 0) {
                    cur_rank = cur_rank / (double) outdeg;
                }
                for (i = 0; i < outdeg; i++) {
                    outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                }
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mx.itam.metodos.lshclustering.MinhashEmitMapper.java

License:Apache License

@Override
public void map(Text id, IntArrayWritable values, Context context) throws IOException, InterruptedException {
    for (int i = 0; i < functionsCount; i++) {
        hashValues[i] = Integer.MAX_VALUE;
    }/*from   w  ww .jav a  2 s. c  o m*/
    for (int i = 0; i < functionsCount; i++) {
        HashFunction hf = functions[i];
        for (Writable wr : values.get()) {
            IntWritable value = (IntWritable) wr;
            int hash = hf.hashInt(value.get()).asInt();
            if (hash < hashValues[i]) {
                hashValues[i] = hash;
            }
        }
    }
    Text sketch = new Text();
    Hasher hasher = lsh.newHasher();
    int band = 0;
    for (int i = 0; i < functionsCount; i++) {
        hasher.putInt(hashValues[i]);
        if (i > 0 && (i % rows) == 0) {
            sketch.set(band + "-" + hasher.hash().toString());
            context.write(new SecondarySortKey(sketch, id), id);
            hasher = lsh.newHasher();
            band++;
        }
    }
    sketch.set(band + "-" + hasher.hash().toString());
    context.write(new SecondarySortKey(sketch, id), id);
}

From source file:mx.itam.metodos.minhashing.MinhashMapper.java

License:Apache License

@Override
public void map(Text id, IntArrayWritable values, Context ctx) throws IOException, InterruptedException {
    for (int i = 0; i < functionsCount; i++) {
        hashValues[i] = Integer.MAX_VALUE;
    }//from  w ww  . j  a  va 2 s  . co  m
    for (int i = 0; i < functionsCount; i++) {
        HashFunction hf = functions[i];
        for (Writable wr : values.get()) {
            IntWritable value = (IntWritable) wr;
            int hash = hf.hashInt(value.get()).asInt();
            if (hash < hashValues[i]) {
                hashValues[i] = hash;
            }
        }
    }
    Text sketch = new Text();
    Hasher hasher = lsh.newHasher();
    int band = 0;
    for (int i = 0; i < functionsCount; i++) {
        hasher.putInt(hashValues[i]);
        if (i > 0 && (i % rows) == 0) {
            sketch.set(band + "-" + hasher.hash().toString());
            write(id, sketch, ctx);
            hasher = lsh.newHasher();
            band++;
        }
    }
    sketch.set(band + "-" + hasher.hash().toString());
    write(id, sketch, ctx);
}

From source file:mx.iteso.msc.asn.mrwordcount.MyMapper.java

License:Apache License

protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    Text word = new Text();

    StringTokenizer itr = new StringTokenizer(value.toString());
    while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        context.write(word, new IntWritable(1));
    }//from www .  j a  v  a  2 s  .c  o  m
}

From source file:net.peacesoft.nutch.crawl.ReIndexerMapReduce.java

License:Apache License

public void map(Text key, Writable value, OutputCollector<Text, NutchWritable> output, Reporter reporter)
        throws IOException {

    String urlString = filterUrl(normalizeUrl(key.toString()));
    if (urlString == null) {
        return;/*from   w  ww  . ja v a 2 s .c o  m*/
    } else {
        key.set(urlString);
    }

    output.collect(key, new NutchWritable(value));
}

From source file:oracle.kv.hadoop.hive.table.TableHiveRecordReader.java

License:Open Source License

@Override
public boolean next(Text key, Text value) {

    LOG.trace("next [key = " + key + ", value = " + value + "]");

    if (key == null || value == null) {
        return false;
    }/*w  ww.  ja  va2 s . co  m*/
    boolean ret = false;
    try {
        key.clear();
        value.clear();
        ret = v2RecordReader.nextKeyValue();
        if (ret) {
            final Row curRow = v2RecordReader.getCurrentValue();
            assert curRow != null;
            key.set(curRow.createPrimaryKey().toString());
            value.set(curRow.toString());
        }
    } catch (Exception e) {
        LOG.error("TableHiveRecordReader " + this + " caught: " + e, e);
    }
    return ret;
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public void map(Text json, Text nothing, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {//  ww  w.j ava 2s. co m
        Text outJson = new Text();
        String jsonStr = json.toString();

        JSONTokener tokener = new JSONTokener(jsonStr);
        JSONObject jsonObj = new JSONObject(tokener);

        fixATweet(jsonObj);
        if (jsonObj.has("retweeted_status")) {
            Object retweetObj = jsonObj.get("retweeted_status");
            if (retweetObj.toString() != "null") {
                fixATweet((JSONObject) retweetObj);
            }
        }

        outJson.set(jsonObj.toString());
        output.collect(outJson, nothing);
    } catch (EOFException e) {
        // do nothing
    } catch (JSONException e) {
        // do nothing
    }
}

From source file:org.ankus.mapreduce.algorithms.correlation.booleanset.BooleanSetMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String row = value.toString();
    String[] columns = row.split(delimiter);
    StringBuffer uniqueKeyStringBuffer = new StringBuffer();

    for (int i = 0; i < columns.length; i++) {
        String column = columns[i];
        if (i == Integer.parseInt(keyIndex)) {
            uniqueKeyStringBuffer.append(column);
        } else {/*  w  ww . j a v  a2  s  .  c om*/
            continue;
        }
    }

    for (int i = 1; i < columns.length; i++) {
        // If the data value is not equal 0 or 1, the value is 1.
        if (columns[i].equals("0") || columns[i].equals("1")) {
            value.set(columns[i]);
        } else {
            value.set("1");
        }
        TextIntegerPairWritableComparable textIntegerPairWritableComparable = new TextIntegerPairWritableComparable(
                uniqueKeyStringBuffer.toString(), Integer.parseInt(value.toString()));
        context.write(new Text("item-" + i), textIntegerPairWritableComparable);
    }
}

From source file:org.ankus.mapreduce.algorithms.correlation.numericset.NumericSetMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String row = value.toString();
    String[] columns = row.split(delimiter);

    StringBuffer uniqueKeyStringBuffer = new StringBuffer();

    for (int i = 0; i < columns.length; i++) {
        String column = columns[i];
        if (i == Integer.parseInt(keyIndex)) {
            uniqueKeyStringBuffer.append(column);
        } else {//from   w ww .j av a2 s.  c  om
            continue;
        }
    }

    for (int k = 1; k < columns.length; k++) {
        value.set(columns[k]);
        TextDoublePairWritableComparable textDoublePairWritableComparable = new TextDoublePairWritableComparable(
                uniqueKeyStringBuffer.toString(), Double.parseDouble(value.toString()));
        context.write(new Text("item-" + k), textDoublePairWritableComparable);
    }
}