List of usage examples for org.apache.hadoop.io Text hashCode
@Override public int hashCode()
From source file:com.marcolotz.MRComponents.KeyStructureWritable.java
License:Creative Commons License
/*** * The default partitioner is the HashPartitioner, which uses the hashCode * method to determine which reducer to send the K,V pair to. For this * reason, objects with the same key should give the same hash value. * // w w w . j ava2 s . c om * In this implementation the hashCode is given by the SeriesInstanceUID * Hash. */ @Override public int hashCode() { Text hashText = new Text(getSeriesInstanceUID()); return hashText.hashCode(); }
From source file:org.apache.blur.manager.BlurPartitioner.java
License:Apache License
/** * Gets the partition or reducer from the the rowId, based on the number of * shards in the table.//w ww .j a v a 2s.co m * * @param rowId * the rowId * @param numberOfShardsInTable * the number of shards in the table. * @return the partition where this rowId should be processed. */ @Override public int getPartition(Text key, Writable value, int numReduceTasks) { return (key.hashCode() & Integer.MAX_VALUE) % numReduceTasks; }
From source file:org.apache.hama.ml.recommendation.cf.OnlineTrainBSP.java
License:Apache License
private void askForFeatures(BSPPeer<Text, VectorWritable, Text, VectorWritable, MapWritable> peer, HashSet<Text> requiredUserFeatures, HashSet<Text> requiredItemFeatures) throws IOException, SyncException, InterruptedException { int peerCount = peer.getNumPeers(); int peerId = peer.getPeerIndex(); if (requiredUserFeatures != null) { Iterator<Text> iter = requiredUserFeatures.iterator(); Text key = null; while (iter.hasNext()) { MapWritable msg = new MapWritable(); key = iter.next();// www .ja v a 2 s. c om msg.put(OnlineCF.Settings.MSG_INP_USER_FEATURES, key); msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId)); peer.send(peer.getPeerName(key.hashCode() % peerCount), msg); } } if (requiredItemFeatures != null) { Iterator<Text> iter = requiredItemFeatures.iterator(); Text key = null; while (iter.hasNext()) { MapWritable msg = new MapWritable(); key = iter.next(); msg.put(OnlineCF.Settings.MSG_INP_ITEM_FEATURES, key); msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId)); peer.send(peer.getPeerName(key.hashCode() % peerCount), msg); } } }
From source file:org.apache.mahout.freqtermsets.AggregatorReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<TopKStringPatterns> values, Context context) throws IOException, InterruptedException { // YA get data to do more than freq merge int myMaxHeapSize = maxHeapSize; Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); //TODO: do I need?getLocal(conf); String cachePath = FilenameUtils.concat(FileUtils.getTempDirectory().toURI().toString(), Thread.currentThread().getName() + "_" + key.hashCode() + "_patterns"); org.apache.hadoop.io.ArrayFile.Writer cacheWr = new ArrayFile.Writer(conf, fs, cachePath, TopKStringPatterns.class); final String keyStr = key.toString(); final OpenObjectLongHashMap<String> jointFreq = new OpenObjectLongHashMap<String>(); TopKStringPatterns metaPatterns = new TopKStringPatterns(); for (TopKStringPatterns value : values) { List<Pair<List<String>, Long>> vPatterns = value.getPatterns(); for (int p = vPatterns.size() - 1; p >= 0; --p) { Pair<List<String>, Long> pattern = vPatterns.get(p); if (pattern == null) { continue; // just like their merge }//from w w w . java 2 s . c om for (String other : pattern.getFirst()) { if (other.charAt(0) == METADATA_PREFIX) { // Keep metadata out of merge vPatterns.remove(p); // Make sure it has space to be merged ++myMaxHeapSize; // Store the metadata temporarily.. we will add it in the end // where it can't be pruned out metaPatterns.getPatterns().add(pattern); // done processing metadata itemset break; } if (keyStr.equals(other)) { continue; } long freq = jointFreq.get(other); if (pattern.getSecond() > freq) { freq = pattern.getSecond(); } jointFreq.put(other, freq); } } cacheWr.append(value); } cacheWr.close(); org.apache.hadoop.io.ArrayFile.Reader cacheRd = new ArrayFile.Reader(fs, cachePath, conf); // END YA get data TopKStringPatterns patterns = new TopKStringPatterns(); TopKStringPatterns value = new TopKStringPatterns(); while (cacheRd.next(value) != null) { context.setStatus("Aggregator Reducer: Selecting TopK patterns for: " + key); // YA Mutual info merge.. TODO: more metrics passed as class name of comparator if (sortByMutualInfo) { patterns = patterns.merge(value, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() { private double calcNormalizedMutualInfo(String[] bagOfTokens) { double numer = 0; double denim = 0; double ft1 = fMap.get(keyStr); for (int t2 = 0; t2 < bagOfTokens.length; ++t2) { if (bagOfTokens[t2].equals(keyStr)) { continue; } double ft2 = fMap.get(bagOfTokens[t2]); double jf = jointFreq.get(bagOfTokens[t2]); // This check shouldn't be even plausible.. save time: // if(jf != 0){ double jp = jf / totalNterms; numer += jp * (Math.log(jf / (ft1 * ft2)) + lnTotalNTerms); denim += jp * Math.log(jp); } double result = numer; if (denim != 0) { result /= -denim; } return result; } @Override public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) { String[] bagOfTokens = o1.getFirst().toArray(new String[0]); double mi1 = calcNormalizedMutualInfo(bagOfTokens); bagOfTokens = o2.getFirst().toArray(new String[0]); double mi2 = calcNormalizedMutualInfo(bagOfTokens); int result = Double.compare(mi1, mi2); if (result == 0) { result = Double.compare(o1.getFirst().size(), o2.getFirst().size()); if (result == 0) { result = o1.getSecond().compareTo(o2.getSecond()); } } return result; } }); // END YA Mutual info merge } else { patterns = patterns.mergeFreq(value, myMaxHeapSize); } } // YA get data cacheRd.close(); fs.delete(new Path(cachePath), true); patterns = patterns.merge(metaPatterns, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() { @Override public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) { // Force the metadata to be accepted return -1; } }); // END YA get data context.write(key, patterns); }
From source file:org.shadowmask.engine.hive.udf.UDFHashTest.java
License:Apache License
@Test public void testUDFHash() { UDFHash udfHash = new UDFHash(); Text data1 = new Text("hello"); IntWritable result = udfHash.evaluate(data1); assertEquals(data1.hashCode(), result.get()); LongWritable data2 = new LongWritable(80000000000L); result = udfHash.evaluate(data2);//from w w w.j av a 2s . co m assertEquals(data2.hashCode(), result.get()); IntWritable data3 = new IntWritable(345); result = udfHash.evaluate(data3); assertEquals(data3.hashCode(), result.get()); data3 = null; result = udfHash.evaluate(data3); assertNull(result); }