List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:clustering.link_back.step2.JoinReducer.java
License:Apache License
/** * @param key entry_id@@g_no, join_order * @param values cluster_id in step1 result, * or g_name \t g_model [\t else] in pre result. * {@inheritDoc}/*from ww w.ja v a2 s . com*/ */ @Override public void reduce(Step2KeyWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // called on every group of keys for (Text value : values) { if (key.getTag().get() == 1) { // step result, value = cluster_id this.outputValue.set(Integer.valueOf(value.toString())); } else { this.outputKey.set(key.getJoinKey().toString() + "\t" + value.toString()); // entry_id@@g_no \t g_name \t g_model [\t else], cluster_id context.write(this.outputKey, this.outputValue); } } }
From source file:clustering.link_back.step2.SetKeyMapper.java
License:Apache License
/** * @param key entry_id@@g_no/*from ww w . j ava 2s . co m*/ * @param value cluster_id or content * {@inheritDoc} */ @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { this.taggedKey.set(key.toString(), this.joinOrder); // (group_id,join_order) \t cluster_id or content context.write(this.taggedKey, value); }
From source file:clustering.mst.ChildMapper.java
License:Apache License
/** * @param key group_id1,group_id2/*from www .j a v a2 s . c o m*/ * @param value similarity * {@inheritDoc} */ @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { String idPair = key.toString(); String[] ids = idPair.split(","); int id1 = Integer.valueOf(ids[0]); int id2 = Integer.valueOf(ids[1]); // get the weight double weight = Double.valueOf(value.toString()); this.outputKey.set(weight); int container = belongsTo(id1, id2); this.outputValue.set(idPair + ":" + container); // weight \t src,dest:containder_id context.write(this.outputKey, this.outputValue); }
From source file:clustering.mst.ChildPartitioner.java
License:Apache License
/** * @param key weight/* w ww. j a v a 2s .c o m*/ * @param value (group_id1,group_id2):container_id * {@inheritDoc} */ @Override public int getPartition(DoubleWritable key, Text value, int numPartitions) { if (numPartitions == 0) { return 0; } String[] contents = value.toString().split(":"); return Integer.valueOf(contents[1]) % numPartitions; }
From source file:clustering.mst.ChildReducer.java
License:Apache License
/** * @param inputKey similarity// w ww.j a va2 s .com * @param values groupId1,groupId2:containerId * {@inheritDoc} */ @Override public void reduce(DoubleWritable inputKey, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { String[] srcDestPair = value.toString().split(":"); String[] srcDest = srcDestPair[0].split(","); int src = Integer.valueOf(srcDest[0]); int dest = Integer.valueOf(srcDest[1]); if (this.unionFind.union(src, dest)) { this.outputValue.set(srcDestPair[0]); context.write(inputKey, this.outputValue); } } }
From source file:clustering.mst.FinalMapper.java
License:Apache License
/** * @param key similarity// w w w. j a v a 2 s .c o m * @param value doc_id1,doc_id2 * {@inheritDoc} */ @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { this.outputKey.set(Double.valueOf(key.toString())); // similarity \t doc_id1,doc_id2 context.write(this.outputKey, value); }
From source file:clustering.mst.FinalReducer.java
License:Apache License
/** * @param inputKey similarity//from ww w.j a va2 s. c om * @param values groupId1,groupId2 * {@inheritDoc} */ @Override public void reduce(DoubleWritable inputKey, Iterable<Text> values, Context context) throws IOException, InterruptedException { if (inputKey.get() < this.threshold) { for (Text val : values) { String[] srcDest = val.toString().split(","); int src = Integer.valueOf(srcDest[0]); int dest = Integer.valueOf(srcDest[1]); this.unionFind.union(src, dest); } } }
From source file:clustering.simhash.Step1Mapper.java
License:Apache License
/** * Calculate the SimHash signature of each commodity. * The output key is the signature and the output value is the * commodity id(entry_id + "@@" + g_no) with commodity info(g_name + "##" + g_model). * * @param key entry_id@@g_no//w w w . jav a 2 s .com * @param value g_name##g_model * {@inheritDoc} */ @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String nameAndModel = value.toString(); SimHash signature = SimHash.Builder.of(nameAndModel.replace("##", " ")).build(); this.outputKey.set(signature.getHashCode()); this.outputValue.set(key.toString() + "::" + nameAndModel); // simhash in long \t entry_id@@g_no::g_name##g_model context.write(this.outputKey, this.outputValue); }
From source file:clustering.simhash.Step1Reducer.java
License:Apache License
/** * @param key simhash// www . jav a 2s. co m * @param values entry_id@@g_no::g_name##g_model * {@inheritDoc} */ @Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { String[] docIdAndCommoInfo = value.toString().split("::"); SimHash thisHash = SimHash.Builder.of(docIdAndCommoInfo[1]).build(key.get()); int id = this._pool.hasSimilar(thisHash, this.threshold); if (id == -1) { // does not contain id = count.incrementAndGet(); this._pool.update(thisHash, id); } this.outputKey.set(id); // group_id \t entry_id@@g_no::g_name##g_model context.write(this.outputKey, value); } }
From source file:clustering.simhash.Step2Mapper.java
License:Apache License
/** * @param key id//from w w w . j a v a2 s . c o m * @param value entry_id@@g_no::g_name##g_model * {@inheritDoc} */ @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { this.outputKey.set(Integer.valueOf(key.toString())); context.write(this.outputKey, value); }