List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*from w w w. ja v a2 s . c o m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int staffNum = this.staff.getStaffNum(); BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART); int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[] buffer = new byte[bufferSize]; int bufindex = 0; SerializationFactory sFactory = new SerializationFactory(new Configuration()); Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class); byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER]; int psindex = 0; BytesWritable pidbytes = new BytesWritable(); int psize = 0; BytesWritable sizebytes = new BytesWritable(); int ssize = 0; try { this.keyserializer.open(bb); this.valueserializer.open(bb); psserializer.open(bb); } catch (IOException e) { throw e; } String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID(); File dir = new File("/tmp/bcbsp/" + this.staff.getJobID()); dir.mkdir(); dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID()); dir.mkdir(); ArrayList<File> files = new ArrayList<File>(); try { File file = new File(path + "/" + "data" + ".txt"); files.add(file); DataOutputStream dataWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true))); DataInputStream dataReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt"))); File filet = new File(path + "/" + "pidandsize" + ".txt"); files.add(filet); DataOutputStream psWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true))); DataInputStream psReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt"))); while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (this.counter.containsKey(pid)) { this.counter.put(pid, (this.counter.get(pid) + 1)); } else { this.counter.put(pid, 1); } bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(ksize + vsize)); sizebytes.set(bb.getData(), 0, bb.getLength()); ssize = sizebytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(pid)); pidbytes.set(bb.getData(), 0, bb.getLength()); psize = pidbytes.getLength(); if ((pidandsize.length - psindex) > (ssize + psize)) { System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } else { psWriter.write(pidandsize, 0, psindex); psindex = 0; System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } if ((buffer.length - bufindex) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } else if (buffer.length < (ksize + vsize)) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; LOG.info("This is a super record"); dataWriter.write(kbytes.getBytes(), 0, ksize); dataWriter.write(vbytes.getBytes(), 0, vsize); } else { dataWriter.write(buffer, 0, bufindex); bufindex = 0; System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } } if (psindex != 0) { psWriter.write(pidandsize, 0, psindex); } if (bufindex != 0) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; } dataWriter.close(); dataWriter = null; psWriter.close(); psWriter = null; buffer = null; pidandsize = null; this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(this.counter); HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc, Constants.PARTITION_TYPE.HASH); this.staff.setHashBucketToPartition(hashBucketToPartition); byte[][] databuf = new byte[staffNum][dataBufferSize]; int[] databufindex = new int[staffNum]; try { IntWritable pid = new IntWritable(); IntWritable size = new IntWritable(); int belongPid = 0; while (true) { size.readFields(psReader); pid.readFields(psReader); belongPid = hashBucketToPartition.get(pid.get()); if (belongPid != this.staff.getPartition()) { send++; } else { local++; } if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) { dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } else if (databuf[belongPid].length < size.get()) { LOG.info("This is a super record"); byte[] tmp = new byte[size.get()]; dataReader.read(tmp, 0, size.get()); if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(tmp))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(tmp, 0, size.get()); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } tmp = null; } else { if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream(new BufferedInputStream( new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(databuf[belongPid], 0, databufindex[belongPid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } databufindex[belongPid] = 0; dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } } } catch (EOFException ex) { LOG.error("[write]", ex); } for (int i = 0; i < staffNum; i++) { if (databufindex[i] != 0) { if (i == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(databuf[i], 0, databufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } } dataReader.close(); dataReader = null; psReader.close(); psReader = null; for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); tpool.cleanup(); tpool = null; databuf = null; databufindex = null; this.counter = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } finally { for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); } }
From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*ww w . j ava 2 s . c o m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize]; int[] bufindex = new int[this.staff.getStaffNum()]; BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); try { this.keyserializer.open(bb); this.valueserializer.open(bb); } catch (IOException e) { throw e; } try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (pid == this.staff.getPartition()) { local++; Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } staff.getGraphData().addForAll(vertex); } else { send++; bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } else if (buffer[pid].length < (ksize + vsize)) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); byte[] tmp = new byte[vsize + ksize]; System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize); System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize); data.set(tmp, 0, (ksize + vsize)); t.setData(data); tmp = null; LOG.info("Using Thread is: " + t.getThreadNumber()); LOG.info("this is a super record"); t.setStatus(true); } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); data.set(buffer[pid], 0, bufindex[pid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); bufindex[pid] = 0; // store data System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } } } for (int i = 0; i < this.staff.getStaffNum(); i++) { if (bufindex[i] != 0) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(buffer[i], 0, bufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } tpool.cleanup(); tpool = null; buffer = null; bufindex = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }
From source file:com.chinamobile.bcbsp.partition.NotDivideWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. * @param recordReader The recordreader of the split. * @throws IOException The io exception/*from w w w . j a v a 2s. c om*/ * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int lost = 0; try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } staff.getGraphData().addForAll(vertex); local++; } LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + 0); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }
From source file:com.chinamobile.bcbsp.partition.RangeWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Every vertex in the * split is partitioned to the local staff. * @param recordReader The recordreader of the split. * @throws IOException The io exception//from w ww . jav a 2s . c o m * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int lost = 0; int partitionid = this.staff.getPartition(); int maxid = Integer.MIN_VALUE; try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { local++; int vertexid = Integer.parseInt(vertexID.toString()); if (vertexid > maxid) { maxid = vertexid; } Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); this.staff.getGraphData().addForAll(vertex); } else { lost++; continue; } } if (lost == 0) { counter.put(maxid, partitionid); this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(counter); HashMap<Integer, Integer> rangerouter = this.sssc.rangerouter(ssrc); this.staff.setRangeRouter(rangerouter); } LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of verteices in the partition that cound not be" + " parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }
From source file:com.chinamobile.bcbsp.partition.RecordParseDefault.java
License:Apache License
/** * This method is used to parse a record and obtain VertexID . * @param key The key of the vertex record * @return the vertex id// ww w .j av a 2s . co m */ @Override public Text getVertexID(Text key) { try { StringTokenizer str = new StringTokenizer(key.toString(), Constants.SPLIT_FLAG); if (str.countTokens() != 2) { return null; } return new Text(str.nextToken()); } catch (Exception e) { return null; } }
From source file:com.chinamobile.bcbsp.router.RangeRoute.java
License:Apache License
/**The method decide the vertexid is belong to which partition. * @param vertexID The id of the vertex. * @return partitionid/*from w w w. ja v a 2 s .com*/ */ @Override public int getpartitionID(Text vertexID) { int vertexid = Integer.parseInt(vertexID.toString()); int tempMaxMin = Integer.MAX_VALUE; for (Integer e : rangerouter.keySet()) { if (vertexid <= e) { if (tempMaxMin > e) { tempMaxMin = e; } } } return rangerouter.get(tempMaxMin); }
From source file:com.chinnu.churndetection.fuzzykmeans.FuzzyKMeansMapper.java
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, Vector>.Context context) throws IOException, InterruptedException { HashMap<Integer, double[]> centers = new HashMap<>(); int idx = 0;/* www . j av a 2 s . c o m*/ String[] lineSplit = CENTERS.split("\n"); for (int j = 0; j < lineSplit.length; j++) { String line = lineSplit[j]; double[] center = new double[DATALENGTH]; String[] split = line.split(","); for (int i = 0; i < DATALENGTH; i++) { center[i] = Double.parseDouble(split[i]); } centers.put(idx++, center); } String line = value.toString(); String[] split = line.split(","); double[] data = new double[DATALENGTH]; for (int i = STARTINDEX; i < ENDINDEX; i++) { data[i - STARTINDEX] = Double.parseDouble(split[i]); } double etahSum = 0d; for (Integer cKey : centers.keySet()) { double[] cenetr = centers.get(cKey); double dist = DistanceComparator.findDistance(cenetr, data); double inv_dist = 1 / dist; double pow = 1 / (m - 1); double etah = Math.pow(inv_dist, pow); etahSum += etah; } String className = split[CLASSINDEX]; Vector vector = new Vector(); vector.setData(data); vector.setClassName(className); vector.setIndex(Integer.parseInt(split[0])); int nearCenter = DistanceComparator.findMinimumDistance(data, centers); double[] cenetr = centers.get(nearCenter); double dist = DistanceComparator.findDistance(cenetr, data); double inv_dist = 1 / dist; double pow = 1 / (m - 1); double etah = Math.pow(inv_dist, pow); double mew = etah / etahSum; vector.setMew(mew); MRLogger.Log("Mew : " + mew); IntWritable k = new IntWritable(nearCenter); context.write(k, vector); }
From source file:com.chinnu.churndetection.kmeans.KMeansMapper.java
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, Vector>.Context context) throws IOException, InterruptedException { HashMap<Integer, double[]> centers = new HashMap<>(); int idx = 0;//from ww w . j av a 2 s . co m String[] lineSplit = CENTERS.split("\n"); for (int j = 0; j < lineSplit.length; j++) { String line = lineSplit[j]; double[] center = new double[DATALENGTH]; String[] split = line.split(","); for (int i = 0; i < DATALENGTH; i++) { center[i] = Double.parseDouble(split[i]); } centers.put(idx++, center); } String line = value.toString(); String[] split = line.split(","); double[] data = new double[DATALENGTH]; for (int i = STARTINDEX; i < ENDINDEX; i++) { data[i - STARTINDEX] = Double.parseDouble(split[i]); } String className = split[CLASSINDEX]; Vector vector = new Vector(); vector.setData(data); vector.setClassName(className); vector.setIndex(Integer.parseInt(split[0])); int nearCenter = DistanceComparator.findMinimumDistance(data, centers); IntWritable k = new IntWritable(nearCenter); context.write(k, vector); }
From source file:com.chriscx.mapred.Map.java
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase(); for (String pattern : patternsToSkip) { line = line.replaceAll(pattern, ""); }//from w w w. jav a 2 s. c om StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); reporter.incrCounter(Counters.INPUT_WORDS, 1); } if ((++numRecords % 100) == 0) { reporter.setStatus( "Finished processing " + numRecords + " records " + "from the input file: " + inputFile); } }
From source file:com.citic.zxyjs.zwlscx.mapreduce.join.api.DataJoinReducerBase.java
License:Apache License
/** * This is the function that re-groups values for a key into sub-groups * based on a secondary key (input tag). * //from w ww. j a v a 2s.c om * @param values * @return */ private SortedMap<Text, ResetableIterator<TaggedMapOutput>> regroup(Text key, Iterator<TaggedMapOutput> values, Context context) throws IOException { this.numOfValues = 0; SortedMap<Text, ResetableIterator<TaggedMapOutput>> retv = new TreeMap<Text, ResetableIterator<TaggedMapOutput>>(); TaggedMapOutput aRecord = null; while (values.hasNext()) { this.numOfValues += 1; if (this.numOfValues % 100 == 0) { reporter.setStatus("key: " + key.toString() + " numOfValues: " + this.numOfValues); } if (this.numOfValues > this.maxNumOfValuesPerGroup) { continue; } aRecord = values.next().clone(context.getConfiguration()); Text tag = aRecord.getTag(); ResetableIterator<TaggedMapOutput> data = retv.get(tag); if (data == null) { data = createResetableIterator(); retv.put(tag, data); } data.add(aRecord); } if (this.numOfValues > this.largestNumOfValues) { this.largestNumOfValues = numOfValues; LOG.info("key: " + key.toString() + " this.largestNumOfValues: " + this.largestNumOfValues); } return retv; }