Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from   w  w  w.  ja v  a2 s .  c o  m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*ww w .  j  ava  2  s  . c  o  m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;
                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                if (vertex == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(vertex);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.NotDivideWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes.
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception/*from  w  w  w .  j  a  v a  2s. c om*/
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int lost = 0;
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
            if (vertex == null) {
                lost++;
                continue;
            }
            staff.getGraphData().addForAll(vertex);
            local++;

        }
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + 0);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.RangeWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Every vertex in the
 * split is partitioned to the local staff.
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception//from   w ww . jav a  2s  .  c o m
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int lost = 0;
    int partitionid = this.staff.getPartition();
    int maxid = Integer.MIN_VALUE;
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                local++;
                int vertexid = Integer.parseInt(vertexID.toString());
                if (vertexid > maxid) {
                    maxid = vertexid;
                }
                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                this.staff.getGraphData().addForAll(vertex);
            } else {
                lost++;
                continue;
            }
        }
        if (lost == 0) {
            counter.put(maxid, partitionid);
            this.ssrc.setDirFlag(new String[] { "3" });
            this.ssrc.setCounter(counter);
            HashMap<Integer, Integer> rangerouter = this.sssc.rangerouter(ssrc);
            this.staff.setRangeRouter(rangerouter);
        }
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of verteices in the partition that cound not be" + " parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.RecordParseDefault.java

License:Apache License

/**
 * This method is used to parse a record and obtain VertexID .
 * @param key The key of the vertex record
 * @return the vertex id// ww w .j  av  a  2s  . co m
 */
@Override
public Text getVertexID(Text key) {
    try {
        StringTokenizer str = new StringTokenizer(key.toString(), Constants.SPLIT_FLAG);
        if (str.countTokens() != 2) {
            return null;
        }
        return new Text(str.nextToken());
    } catch (Exception e) {
        return null;
    }
}

From source file:com.chinamobile.bcbsp.router.RangeRoute.java

License:Apache License

/**The method decide the vertexid is belong to which partition.
 * @param vertexID The id of the vertex.
 * @return partitionid/*from  w  w  w. ja  v a 2  s  .com*/
 */
@Override
public int getpartitionID(Text vertexID) {
    int vertexid = Integer.parseInt(vertexID.toString());
    int tempMaxMin = Integer.MAX_VALUE;
    for (Integer e : rangerouter.keySet()) {
        if (vertexid <= e) {
            if (tempMaxMin > e) {
                tempMaxMin = e;
            }
        }
    }
    return rangerouter.get(tempMaxMin);
}

From source file:com.chinnu.churndetection.fuzzykmeans.FuzzyKMeansMapper.java

@Override
protected void map(LongWritable key, Text value,
        Mapper<LongWritable, Text, IntWritable, Vector>.Context context)
        throws IOException, InterruptedException {

    HashMap<Integer, double[]> centers = new HashMap<>();
    int idx = 0;/* www .  j  av a  2 s  .  c  o  m*/
    String[] lineSplit = CENTERS.split("\n");
    for (int j = 0; j < lineSplit.length; j++) {
        String line = lineSplit[j];
        double[] center = new double[DATALENGTH];
        String[] split = line.split(",");
        for (int i = 0; i < DATALENGTH; i++) {
            center[i] = Double.parseDouble(split[i]);
        }
        centers.put(idx++, center);

    }

    String line = value.toString();
    String[] split = line.split(",");
    double[] data = new double[DATALENGTH];

    for (int i = STARTINDEX; i < ENDINDEX; i++) {
        data[i - STARTINDEX] = Double.parseDouble(split[i]);
    }

    double etahSum = 0d;
    for (Integer cKey : centers.keySet()) {
        double[] cenetr = centers.get(cKey);
        double dist = DistanceComparator.findDistance(cenetr, data);
        double inv_dist = 1 / dist;
        double pow = 1 / (m - 1);
        double etah = Math.pow(inv_dist, pow);
        etahSum += etah;
    }

    String className = split[CLASSINDEX];

    Vector vector = new Vector();
    vector.setData(data);
    vector.setClassName(className);

    vector.setIndex(Integer.parseInt(split[0]));

    int nearCenter = DistanceComparator.findMinimumDistance(data, centers);

    double[] cenetr = centers.get(nearCenter);
    double dist = DistanceComparator.findDistance(cenetr, data);
    double inv_dist = 1 / dist;
    double pow = 1 / (m - 1);
    double etah = Math.pow(inv_dist, pow);

    double mew = etah / etahSum;
    vector.setMew(mew);
    MRLogger.Log("Mew : " + mew);

    IntWritable k = new IntWritable(nearCenter);
    context.write(k, vector);
}

From source file:com.chinnu.churndetection.kmeans.KMeansMapper.java

@Override
protected void map(LongWritable key, Text value,
        Mapper<LongWritable, Text, IntWritable, Vector>.Context context)
        throws IOException, InterruptedException {

    HashMap<Integer, double[]> centers = new HashMap<>();
    int idx = 0;//from   ww w  .  j  av a 2 s . co  m
    String[] lineSplit = CENTERS.split("\n");
    for (int j = 0; j < lineSplit.length; j++) {
        String line = lineSplit[j];
        double[] center = new double[DATALENGTH];
        String[] split = line.split(",");
        for (int i = 0; i < DATALENGTH; i++) {
            center[i] = Double.parseDouble(split[i]);
        }
        centers.put(idx++, center);

    }

    String line = value.toString();
    String[] split = line.split(",");
    double[] data = new double[DATALENGTH];

    for (int i = STARTINDEX; i < ENDINDEX; i++) {
        data[i - STARTINDEX] = Double.parseDouble(split[i]);
    }

    String className = split[CLASSINDEX];

    Vector vector = new Vector();
    vector.setData(data);
    vector.setClassName(className);

    vector.setIndex(Integer.parseInt(split[0]));

    int nearCenter = DistanceComparator.findMinimumDistance(data, centers);

    IntWritable k = new IntWritable(nearCenter);
    context.write(k, vector);
}

From source file:com.chriscx.mapred.Map.java

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase();

    for (String pattern : patternsToSkip) {
        line = line.replaceAll(pattern, "");
    }//from w  w w. jav a  2 s.  c om

    StringTokenizer tokenizer = new StringTokenizer(line);
    while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        output.collect(word, one);
        reporter.incrCounter(Counters.INPUT_WORDS, 1);
    }

    if ((++numRecords % 100) == 0) {
        reporter.setStatus(
                "Finished processing " + numRecords + " records " + "from the input file: " + inputFile);
    }
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.join.api.DataJoinReducerBase.java

License:Apache License

/**
 * This is the function that re-groups values for a key into sub-groups
 * based on a secondary key (input tag).
 * //from  w ww. j a v  a 2s.c om
 * @param values
 * @return
 */
private SortedMap<Text, ResetableIterator<TaggedMapOutput>> regroup(Text key, Iterator<TaggedMapOutput> values,
        Context context) throws IOException {
    this.numOfValues = 0;
    SortedMap<Text, ResetableIterator<TaggedMapOutput>> retv = new TreeMap<Text, ResetableIterator<TaggedMapOutput>>();
    TaggedMapOutput aRecord = null;
    while (values.hasNext()) {
        this.numOfValues += 1;
        if (this.numOfValues % 100 == 0) {
            reporter.setStatus("key: " + key.toString() + " numOfValues: " + this.numOfValues);
        }
        if (this.numOfValues > this.maxNumOfValuesPerGroup) {
            continue;
        }
        aRecord = values.next().clone(context.getConfiguration());
        Text tag = aRecord.getTag();
        ResetableIterator<TaggedMapOutput> data = retv.get(tag);
        if (data == null) {
            data = createResetableIterator();
            retv.put(tag, data);
        }
        data.add(aRecord);
    }
    if (this.numOfValues > this.largestNumOfValues) {
        this.largestNumOfValues = numOfValues;
        LOG.info("key: " + key.toString() + " this.largestNumOfValues: " + this.largestNumOfValues);
    }
    return retv;
}