Java tutorial
/* * Copyright (C) 2013-2015 Uncharted Software Inc. * * Property of Uncharted(TM), formerly Oculus Info Inc. * http://uncharted.software/ * * Released under the MIT License. * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package influent.server.clustering; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Hashtable; import java.util.LinkedList; import java.util.List; import java.util.Map; import influent.idl.FL_Cluster; import influent.idl.FL_DistributionRange; import influent.idl.FL_Entity; import influent.idl.FL_EntityTag; import influent.idl.FL_Frequency; import influent.idl.FL_GeoData; import influent.idl.FL_Geocoding; import influent.idl.FL_Property; import influent.idl.FL_PropertyTag; import influent.idl.FL_PropertyType; import influent.idl.FL_RangeType; import influent.idlhelper.ClusterHelper; import influent.idlhelper.EntityHelper; import influent.idlhelper.PropertyHelper; import influent.idlhelper.SingletonRangeHelper; import influent.server.clustering.utils.EntityClusterFactory; import influent.server.spi.impl.BasicCountryLevelGeocoding; import influent.server.utilities.IdGenerator; import influent.server.utilities.InfluentId; import oculus.aperture.spi.common.Properties; import org.apache.commons.lang.StringUtils; import org.junit.Assert; import org.junit.Test; public class EntityClustererTest { EntityClusterFactory clusterFactory; private GeneralEntityClusterer createClusterer(final String clusterProperties, final int maxClusterSize) { Properties pMgr = new Properties() { Map<String, String> values = new Hashtable<String, String>(); public void init() { values.put("entity.clusterer.enablestopwords", "true"); values.put("entity.clusterer.maxclustersize", Integer.toString(maxClusterSize)); values.put("entity.clusterer.stopwords", "a,able,about,above,across,after,again,against,all,almost,alone,along,already,also,although,always,am,among,an,and,another,any,anybody,anyone,anything,anywhere,are,area,areas,around,as,ask,asked,asking,asks,at,away,b,back,backed,backing,backs,be,became,because,become,becomes,been,before,began,behind,being,beings,best,better,between,big,both,but,by,c,came,can,cannot,case,cases,certain,certainly,clear,clearly,come,could,d,dear,did,differ,different,differently,do,does,done,down,downed,downing,downs,during,e,each,early,either,else,end,ended,ending,ends,enough,even,evenly,ever,every,everybody,everyone,everything,everywhere,f,face,faces,fact,facts,far,felt,few,find,finds,for,four,from,full,fully,further,furthered,furthering,furthers,g,gave,general,generally,get,gets,give,given,gives,go,going,good,goods,got,great,greater,greatest,group,grouped,grouping,groups,h,had,has,have,having,he,her,here,hers,herself,high,higher,highest,him,himself,his,how,however,i,if,important,in,interest,interested,interesting,interests,into,is,it,its,itself,j,just,k,keep,keeps,kind,knew,know,known,knows,l,large,largely,last,later,latest,least,less,let,lets,like,likely,long,longer,longest,m,made,make,making,man,many,may,me,member,members,men,might,more,most,mostly,mr,mrs,much,must,my,myself,n,necessary,need,needed,needing,needs,neither,never,new,newer,newest,next,no,nobody,non,noone,nor,not,nothing,now,nowhere,number,numbers,o,of,off,often,old,older,oldest,on,once,one,only,open,opened,opening,opens,or,order,ordered,ordering,orders,other,others,our,out,over,own,p,part,parted,parting,parts,per,perhaps,place,places,point,pointed,pointing,points,possible,present,presented,presenting,presents,problem,problems,put,puts,q,quite,r,rather,really,right,room,rooms,s,said,same,saw,say,says,second,seconds,see,seem,seemed,seeming,seems,sees,several,shall,she,should,show,showed,showing,shows,side,sides,since,small,smaller,smallest,so,some,somebody,someone,something,somewhere,state,states,still,such,sure,t,take,taken,than,that,the,their,them,then,there,therefore,these,they,thing,things,think,thinks,this,those,though,thought,thoughts,three,through,thus,tis,to,today,together,too,took,toward,turn,turned,turning,turns,twas,two,u,under,until,up,upon,us,use,used,uses,v,very,w,want,wanted,wanting,wants,was,way,ways,we,well,wells,went,were,what,when,where,whether,which,while,who,whole,whom,whose,why,will,with,within,without,work,worked,working,works,would,x,y,year,years,yet,you,young,younger,youngest,your,yours,z"); values.put("entity.clusterer.clusterfields", clusterProperties); values.put("entity.clusterer.clusterproperties", "TYPE:type-dist,GEO:Location"); } @Override public Object getObject(String key) { init(); return values.get(key); } @Override public Iterable<Object> getObjects(String key) { throw new UnsupportedOperationException(); } @Override public String getString(String key, String defaultValue) { init(); if (values.containsKey(key)) return values.get(key); else return defaultValue; } @Override public Iterable<String> getStrings(String key) { throw new UnsupportedOperationException(); } @Override public Boolean getBoolean(String key, Boolean defaultValue) { return true; // enablestopwords } @Override public Iterable<Boolean> getBooleans(String key) { throw new UnsupportedOperationException(); } @Override public Integer getInteger(String key, Integer defaultValue) { return 2; // max cluster size } @Override public Iterable<Integer> getIntegers(String key) { throw new UnsupportedOperationException(); } @Override public Long getLong(String key, Long defaultValue) { throw new UnsupportedOperationException(); } @Override public Iterable<Long> getLongs(String key) { throw new UnsupportedOperationException(); } @Override public Float getFloat(String key, Float defaultValue) { throw new UnsupportedOperationException(); } @Override public Iterable<Float> getFloats(String key) { throw new UnsupportedOperationException(); } @Override public Double getDouble(String key, Double defaultValue) { throw new UnsupportedOperationException(); } @Override public Iterable<Double> getDoubles(String key) { throw new UnsupportedOperationException(); } @Override public Properties getPropertiesSet(String key, Properties defaultValue) { throw new UnsupportedOperationException(); } @Override public Iterable<Properties> getPropertiesSets(String key) { throw new UnsupportedOperationException(); } }; FL_Geocoding geocoding = new BasicCountryLevelGeocoding(); IdGenerator idGen = new IdGenerator() { Integer id = 0; @Override public String nextId() { return InfluentId.fromNativeId(InfluentId.CLUSTER, "cluster", (++id).toString()).getInfluentId(); } }; clusterFactory = new EntityClusterFactory(idGen, geocoding, pMgr); GeneralEntityClusterer clusterer = new GeneralEntityClusterer(); clusterer.init(new Object[] { clusterFactory, geocoding, pMgr }); return clusterer; } private FL_Entity createEntity(String name, FL_EntityTag tag, String location, String cc, double lat, double lon, int indegree, int outdegree) { List<FL_Property> props = new ArrayList<FL_Property>(); props.add(new PropertyHelper("inflowing", "inflowing", indegree, Arrays.asList(FL_PropertyTag.INFLOWING, FL_PropertyTag.AMOUNT, FL_PropertyTag.USD))); props.add(new PropertyHelper("outflowing", "outflowing", outdegree, Arrays.asList(FL_PropertyTag.OUTFLOWING, FL_PropertyTag.AMOUNT, FL_PropertyTag.USD))); FL_GeoData geoData = FL_GeoData.newBuilder().setText(location).setLat(lat).setLon(lon).setCc(cc).build(); props.add(FL_Property.newBuilder().setKey("geo").setFriendlyText("") .setTags(Collections.singletonList(FL_PropertyTag.GEO)).setRange(SingletonRangeHelper.from(geoData)) .setProvenance(null).setUncertainty(null).build()); return new EntityHelper(name, name, tag.name(), tag, props); } @SuppressWarnings("unused") private FL_Cluster createClusterSummary(String name, Hashtable<String, Double> locationDist, Hashtable<String, Double> typeDist, int count, int indegree, int outdegree) { List<FL_Property> props = new ArrayList<FL_Property>(); props.add(new PropertyHelper("inflowing", "inflowing", indegree, Arrays.asList(FL_PropertyTag.INFLOWING, FL_PropertyTag.AMOUNT, FL_PropertyTag.USD))); props.add(new PropertyHelper("outflowing", "outflowing", outdegree, Arrays.asList(FL_PropertyTag.OUTFLOWING, FL_PropertyTag.AMOUNT, FL_PropertyTag.USD))); props.add(new PropertyHelper("count", "count", count, FL_PropertyType.INTEGER, FL_PropertyTag.STAT)); // create location dist prop List<FL_Frequency> freqs = new ArrayList<FL_Frequency>(); for (String cc : locationDist.keySet()) { FL_GeoData geo = FL_GeoData.newBuilder().setText(null).setLat(null).setLon(null).setCc(cc).build(); double freq = locationDist.get(cc); freqs.add(FL_Frequency.newBuilder().setRange(geo).setFrequency(freq).build()); } FL_DistributionRange range = FL_DistributionRange.newBuilder().setDistribution(freqs) .setRangeType(FL_RangeType.DISTRIBUTION).setType(FL_PropertyType.GEO).setIsProbability(false) .build(); props.add(FL_Property.newBuilder().setKey("location-dist").setFriendlyText("location-dist").setRange(range) .setProvenance(null).setUncertainty(null).setTags(Arrays.asList(FL_PropertyTag.GEO)).build()); // create type dist prop freqs = new ArrayList<FL_Frequency>(); for (String type : typeDist.keySet()) { double freq = typeDist.get(type); freqs.add(FL_Frequency.newBuilder().setRange(type).setFrequency(freq).build()); } range = FL_DistributionRange.newBuilder().setDistribution(freqs).setRangeType(FL_RangeType.DISTRIBUTION) .setType(FL_PropertyType.STRING).setIsProbability(false).build(); props.add(FL_Property.newBuilder().setKey("type-dist").setFriendlyText("type-dist").setRange(range) .setProvenance(null).setUncertainty(null).setTags(Arrays.asList(FL_PropertyTag.TYPE)).build()); return new ClusterHelper( InfluentId.fromNativeId(InfluentId.CLUSTER_SUMMARY, "cluster", name).getInfluentId(), name, Arrays.asList(FL_EntityTag.CLUSTER_SUMMARY), props, new ArrayList<String>(0), new ArrayList<String>(0), null, null, -1); } private String clusterToString(FL_Cluster cluster) { return "{" + cluster.getUid() + ":[" + StringUtils.join(cluster.getMembers(), ",") + "][" + StringUtils.join(cluster.getSubclusters(), ",") + "]}"; } private void assertClusterEquals(String clusterId, String value, ClusterContext context) { FL_Cluster cluster = context.clusters.get(clusterId); if (cluster == null) Assert.fail("Invalid cluster id"); Assert.assertEquals(clusterToString(cluster), value); } @Test public void testClusterByType() { GeneralEntityClusterer clusterer = createClusterer("TYPE:categorical", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("entityA", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("entityB", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("entityC", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[entityA,entityC][]}", context); assertClusterEquals("c.cluster.2", "{c.cluster.2:[entityB][]}", context); } @Test public void testClusterByTypeMax() { GeneralEntityClusterer clusterer = createClusterer("TYPE:categorical", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("entityA", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("entityB", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("entityC", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); entities.add(createEntity("entityD", FL_EntityTag.ACCOUNT, "LA", "USA", 34.0522342, -118.2436849, 17, 19)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[][c.cluster.4,c.cluster.3]}", context); assertClusterEquals("c.cluster.2", "{c.cluster.2:[entityB][]}", context); assertClusterEquals("c.cluster.3", "{c.cluster.3:[entityA,entityC][]}", context); assertClusterEquals("c.cluster.4", "{c.cluster.4:[entityD][]}", context); } @Test public void testClusterByMaxMultiLevel() { GeneralEntityClusterer clusterer = createClusterer("TYPE:categorical", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("entityA", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("entityB", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("entityC", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("entityD", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); entities.add(createEntity("entityE", FL_EntityTag.ACCOUNT, "LA", "USA", 34.0522342, -118.2436849, 17, 19)); entities.add(createEntity("entityF", FL_EntityTag.ACCOUNT, "LA", "USA", 34.0522342, -118.2436849, 17, 19)); entities.add(createEntity("entityG", FL_EntityTag.ACCOUNT, "LA", "USA", 34.0522342, -118.2436849, 17, 19)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[][c.cluster.4,c.cluster.3]}", context); assertClusterEquals("c.cluster.2", "{c.cluster.2:[entityB,entityC][]}", context); assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.6,c.cluster.5]}", context); assertClusterEquals("c.cluster.4", "{c.cluster.4:[entityE,entityF][]}", context); assertClusterEquals("c.cluster.5", "{c.cluster.5:[entityA,entityD][]}", context); assertClusterEquals("c.cluster.6", "{c.cluster.6:[entityG][]}", context); } @Test public void testClusterByLabel() { GeneralEntityClusterer clusterer = createClusterer("LABEL:label", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add( createEntity("Adamn", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("Alf", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // cluster by alpha "N-Z" assertClusterEquals("c.cluster.2", "{c.cluster.2:[Adamn,Alf][]}", context); // cluster by alpha "A-M" } @Test public void testClusterByLabelFingerprint() { GeneralEntityClusterer clusterer = createClusterer("LABEL:label", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amand", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("Bob", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Amanda", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // cluster by alpha "N-Z" assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.4,c.cluster.3]}", context); // cluster by alpha "A-M" assertClusterEquals("c.cluster.3", "{c.cluster.3:[Bob][]}", context); // cluster by alpha "B" assertClusterEquals("c.cluster.4", "{c.cluster.4:[][c.cluster.5]}", context); // cluster by alpha "A" assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.6]}", context); // cluster by fuzzy matching <-- Strange assertClusterEquals("c.cluster.6", "{c.cluster.6:[][c.cluster.8,c.cluster.7]}", context); // cluster by fingerprint FL_Cluster leaf1 = context.clusters.get("c.cluster.7"); if (leaf1.getMembers().size() == 1) { assertClusterEquals("c.cluster.7", "{c.cluster.7:[Alf][]}", context); // cluster by fingerprint assertClusterEquals("c.cluster.8", "{c.cluster.8:[Amand,Amanda][]}", context); // cluster by fingerprint } else { assertClusterEquals("c.cluster.8", "{c.cluster.8:[Alf][]}", context); // cluster by fingerprint assertClusterEquals("c.cluster.7", "{c.cluster.7:[Amand,Amanda][]}", context); // cluster by fingerprint } } @Test public void testClusterByLabelMax() { GeneralEntityClusterer clusterer = createClusterer("LABEL:label", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add( createEntity("Adan", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("Alf", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Amanda", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // cluster by alpha "N-Z" assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.3]}", context); // cluster by alpha "A-M" assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4]}", context); // cluster by alpha "A" assertClusterEquals("c.cluster.4", "{c.cluster.4:[][c.cluster.8,c.cluster.9]}", context); // cluster by alpha "A" FL_Cluster leaf1 = context.clusters.get("c.cluster.8"); FL_Cluster leaf2 = context.clusters.get("c.cluster.9"); // Make sure one leaf cluster contains two members and the other one Assert.assertTrue((leaf1.getMembers().size() == 1 && leaf2.getMembers().size() == 2) || (leaf2.getMembers().size() == 1 && leaf1.getMembers().size() == 2)); } @Test public void testClusterByLabelIncremental() { GeneralEntityClusterer clusterer = createClusterer("LABEL:label", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amand", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2, 3)); entities.add(createEntity("Bob", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 11, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // cluster by alpha "N-Z" assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.4,c.cluster.3]}", context); // cluster by alpha "A-M" assertClusterEquals("c.cluster.3", "{c.cluster.3:[Bob][]}", context); // cluster by alpha "B" assertClusterEquals("c.cluster.4", "{c.cluster.4:[Amand,Alf][]}", context); // cluster by alpha "A" // cluster a new entity entities = Collections.singletonList( createEntity("Amanda", FL_EntityTag.ANONYMOUS, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // cluster by alpha "N-Z" assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.4,c.cluster.3]}", context); // cluster by alpha "A-M" assertClusterEquals("c.cluster.3", "{c.cluster.3:[Bob][]}", context); // cluster by alpha "B" assertClusterEquals("c.cluster.4", "{c.cluster.4:[][c.cluster.5]}", context); // cluster by alpha "A" assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.7,c.cluster.6]}", context); // cluster by fuzzy matching FL_Cluster leaf1 = context.clusters.get("c.cluster.6"); if (leaf1.getMembers().size() == 1) { assertClusterEquals("c.cluster.6", "{c.cluster.6:[Alf][]}", context); // cluster by fingerprint assertClusterEquals("c.cluster.7", "{c.cluster.7:[Amand,Amanda][]}", context); // cluster by fingerprint } else { assertClusterEquals("c.cluster.7", "{c.cluster.7:[Alf][]}", context); // cluster by fingerprint assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amand,Amanda][]}", context); // cluster by fingerprint } } @Test public void testClusterByNumberSingleCluster() { GeneralEntityClusterer clusterer = createClusterer("INFLOWING:numeric:100", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 550, 3)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 500, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Amanda,Zulu][]}", context); } @Test public void testClusterByNumberMultipleClusters() { GeneralEntityClusterer clusterer = createClusterer("INFLOWING:numeric:6", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 500, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Amanda][]}", context); assertClusterEquals("c.cluster.2", "{c.cluster.2:[Zulu][]}", context); } @Test public void testClusterByNumberIncremental() { GeneralEntityClusterer clusterer = createClusterer("TYPE:categorical,INFLOWING:numeric:6", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Bob", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 500, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[][c.cluster.6,c.cluster.5]}", context); FL_Cluster leaf1 = context.clusters.get("c.cluster.5"); if (leaf1.getMembers().size() == 1) { assertClusterEquals("c.cluster.5", "{c.cluster.5:[Amanda][]}", context); assertClusterEquals("c.cluster.6", "{c.cluster.6:[Zulu,Bob][]}", context); } else { assertClusterEquals("c.cluster.5", "{c.cluster.5:[Zulu,Bob][]}", context); assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amanda][]}", context); } // cluster a new entity entities = Collections.singletonList( createEntity("Alf", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2500, -123.1000, 8, 7)); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[][c.cluster.6,c.cluster.5]}", context); if (leaf1.getMembers().size() == 1) { assertClusterEquals("c.cluster.5", "{c.cluster.5:[Amanda][]}", context); assertClusterEquals("c.cluster.6", "{c.cluster.6:[][c.cluster.8,c.cluster.9]}", context); assertClusterEquals("c.cluster.8", "{c.cluster.8:[Zulu][]}", context); assertClusterEquals("c.cluster.9", "{c.cluster.9:[Bob,Alf][]}", context); } else { assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amanda][]}", context); assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.8,c.cluster.9]}", context); assertClusterEquals("c.cluster.8", "{c.cluster.8:[Zulu][]}", context); assertClusterEquals("c.cluster.9", "{c.cluster.9:[Bob,Alf][]}", context); } } @Test public void testClusterByNumberMax() { GeneralEntityClusterer clusterer = createClusterer("TYPE:categorical,INFLOWING:numeric:6", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Bob", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2500, -123.1000, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2500, -123.1000, 8, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 6, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[][c.cluster.3,c.cluster.2]}", context); assertClusterEquals("c.cluster.2", "{c.cluster.2:[Amanda][]}", context); assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4]}", context); assertClusterEquals("c.cluster.4", "{c.cluster.4:[][c.cluster.5]}", context); assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.7,c.cluster.6]}", context); assertClusterEquals("c.cluster.6", "{c.cluster.6:[Bob,Alf][]}", context); assertClusterEquals("c.cluster.7", "{c.cluster.7:[Zulu][]}", context); } @Test public void testClusterByGeoMultiLevel() { GeneralEntityClusterer clusterer = createClusterer("GEO:geo", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Bob", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2827, -123.1207, 5, 7)); entities.add(createEntity("Steve", FL_EntityTag.ACCOUNT, "Victoria", "CAN", 48.4222, -123.3657, 5, 7)); entities.add(createEntity("Dan", FL_EntityTag.ACCOUNT, "Montreal", "CAN", 45.5017, -73.5673, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ACCOUNT, "London", "GBR", 51.5072, -0.1275, 8, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 6, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Alf][]}", context); // Europe assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.3]}", context); // North America assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4,c.cluster.5]}", context); // USA and CAN assertClusterEquals("c.cluster.4", "{c.cluster.4:[Zulu][]}", context); // USA assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.8,c.cluster.7,c.cluster.6]}", context); // CAN assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amanda][]}", context); // Toronto assertClusterEquals("c.cluster.7", "{c.cluster.7:[Bob,Steve][]}", context); // Vancouver and Victoria assertClusterEquals("c.cluster.8", "{c.cluster.8:[Dan][]}", context); // Montreal } @Test public void testClusterByGeoIncremental() { GeneralEntityClusterer clusterer = createClusterer("GEO:geo", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Bob", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2827, -123.1207, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ACCOUNT, "London", "GBR", 51.5072, -0.1275, 8, 7)); entities.add(createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 6, 13)); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Alf][]}", context); // Europe assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.3]}", context); // North America assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4,c.cluster.5]}", context); // USA and CAN assertClusterEquals("c.cluster.4", "{c.cluster.4:[Zulu][]}", context); // USA assertClusterEquals("c.cluster.5", "{c.cluster.5:[Amanda,Bob][]}", context); // CAN // cluster new entities entities.clear(); entities.add(createEntity("Steve", FL_EntityTag.ACCOUNT, "Victoria", "CAN", 48.4222, -123.3657, 5, 7)); entities.add(createEntity("Dan", FL_EntityTag.ACCOUNT, "Montreal", "CAN", 45.5017, -73.5673, 5, 7)); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); for (FL_Cluster c : context.clusters.values()) { System.out.println(this.clusterToString(c)); } assertClusterEquals("c.cluster.1", "{c.cluster.1:[Alf][]}", context); // Europe assertClusterEquals("c.cluster.2", "{c.cluster.2:[][c.cluster.3]}", context); // North America assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4,c.cluster.5]}", context); // USA and CAN assertClusterEquals("c.cluster.4", "{c.cluster.4:[Zulu][]}", context); // USA assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.8,c.cluster.7,c.cluster.6]}", context); // CAN assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amanda][]}", context); // Toronto assertClusterEquals("c.cluster.7", "{c.cluster.7:[Bob,Steve][]}", context); // Vancouver and Victoria assertClusterEquals("c.cluster.8", "{c.cluster.8:[Dan][]}", context); // Montreal } @Test public void testClusterByGeoMissingProp() { GeneralEntityClusterer clusterer = createClusterer("GEO:geo", 2); Collection<FL_Entity> entities = new LinkedList<FL_Entity>(); entities.add(createEntity("Amanda", FL_EntityTag.ACCOUNT, "Toronto", "CAN", 43.653226, -79.38318429999998, 2000, 3)); entities.add(createEntity("Bob", FL_EntityTag.ACCOUNT, "Vancouver", "CAN", 49.2827, -123.1207, 5, 7)); entities.add(createEntity("Steve", FL_EntityTag.ACCOUNT, "Victoria", "CAN", 48.4222, -123.3657, 5, 7)); entities.add(createEntity("Dan", FL_EntityTag.ACCOUNT, "Montreal", "CAN", 45.5017, -73.5673, 5, 7)); entities.add(createEntity("Alf", FL_EntityTag.ACCOUNT, "London", "GBR", 51.5072, -0.1275, 8, 7)); FL_Entity missingGeoEntity = createEntity("Zulu", FL_EntityTag.ACCOUNT, "NYC", "USA", 40.7143528, -74.0059731, 6, 13); missingGeoEntity.getProperties().remove(2); entities.add(missingGeoEntity); ClusterContext context = new ClusterContext(); context.addEntities(entities); context = clusterer.clusterEntities(entities, context); assertClusterEquals("c.cluster.1", "{c.cluster.1:[Zulu][]}", context); // Unknown assertClusterEquals("c.cluster.2", "{c.cluster.2:[Alf][]}", context); // Europe assertClusterEquals("c.cluster.3", "{c.cluster.3:[][c.cluster.4]}", context); // North America assertClusterEquals("c.cluster.4", "{c.cluster.4:[][c.cluster.5]}", context); // CAN assertClusterEquals("c.cluster.5", "{c.cluster.5:[][c.cluster.8,c.cluster.7,c.cluster.6]}", context); // CAN assertClusterEquals("c.cluster.6", "{c.cluster.6:[Amanda][]}", context); // Toronto assertClusterEquals("c.cluster.7", "{c.cluster.7:[Bob,Steve][]}", context); // Vancouver and Victoria assertClusterEquals("c.cluster.8", "{c.cluster.8:[Dan][]}", context); // Montreal } }