com.ikanow.infinit.e.api.knowledge.processing.AggregationUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.infinit.e.api.knowledge.processing.AggregationUtils.java

Source

/*******************************************************************************
 * Copyright 2012, The Infinit.e Open Source Project.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package com.ikanow.infinit.e.api.knowledge.processing;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.elasticsearch.client.action.search.SearchRequestBuilder;
import org.elasticsearch.index.query.BoolFilterBuilder;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.CrossVersionQueryBuilders;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.NestedFilterBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryFilterBuilder;
import org.elasticsearch.index.search.geo.GeoHashUtils;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogram;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogram.Interval;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.Facets;
import org.elasticsearch.search.facet.datehistogram.DateHistogramFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facets.CrossVersionFacetBuilder;
import org.elasticsearch.search.facets.CrossVersionFacetBuilders;
import org.elasticsearch.search.facets.FacetUtils;

import com.ikanow.infinit.e.api.knowledge.QueryHandler;
import com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable;
import com.ikanow.infinit.e.data_model.api.ResponsePojo;
import com.ikanow.infinit.e.data_model.api.knowledge.AdvancedQueryPojo;
import com.ikanow.infinit.e.data_model.api.knowledge.GeoAggregationPojo;
import com.ikanow.infinit.e.data_model.api.knowledge.AdvancedQueryPojo.QueryOutputPojo.AggregationOutputPojo;
import com.ikanow.infinit.e.data_model.store.document.AssociationPojo;
import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
import com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo;
import com.ikanow.infinit.e.data_model.utils.GeoOntologyMapping;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;

public class AggregationUtils {

    // Utilty class:

    public static class GeoContainer {
        public Set<GeoAggregationPojo> geotags;
        public long minCount = 0;
        public long maxCount = 0;
    }

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////

    // OUTPUT PARSING - TOP LEVEL

    public static void loadAggregationResults(ResponsePojo rp, Facets facets, Aggregations aggs,
            AggregationOutputPojo aggOutParams, ScoringUtils scoreStats, AliasLookupTable aliasLookup,
            String[] entityTypeFilterStrings, String[] assocVerbFilterStrings,
            AggregationUtils.GeoContainer extraAliasAggregatedGeo) {
        HashMap<String, List<? extends Object>> moments = null;

        if ((null != facets) && (null != facets.getFacets()))
            for (Map.Entry<String, Facet> facet : facets.getFacets().entrySet()) {
                // Geo

                if (facet.getKey().equals("geo")) {
                    TermsFacet geoFacet = (TermsFacet) facet.getValue();
                    Set<GeoAggregationPojo> geoCounts = null;
                    int nHighestCount = -1;
                    int nLowestCount = Integer.MAX_VALUE;
                    // If we've got some geotags from the alias masters then start with them:
                    if ((null != extraAliasAggregatedGeo) && (null != extraAliasAggregatedGeo.geotags)) {
                        geoCounts = extraAliasAggregatedGeo.geotags;
                        nHighestCount = (int) extraAliasAggregatedGeo.minCount;
                        nLowestCount = (int) extraAliasAggregatedGeo.maxCount;
                    } else {
                        geoCounts = new TreeSet<GeoAggregationPojo>();
                    }
                    for (TermsFacet.Entry geo : geoFacet.getEntries()) {
                        String geohash = FacetUtils.getTerm(geo).substring(2);
                        double[] loc = GeoHashUtils.decode(geohash);
                        GeoAggregationPojo geoObj = new GeoAggregationPojo(loc[0], loc[1]);
                        geoObj.count = geo.getCount();
                        geoObj.type = GeoOntologyMapping.decodeOntologyCode(FacetUtils.getTerm(geo).charAt(0));
                        geoCounts.add(geoObj);
                        // (note this aggregates geo points whose decoded lat/logns are the same, which can result in slightly fewer records than requested)
                        // (note the aggregation writes the aggregated count into geoObj.count)

                        if (geoObj.count > nHighestCount) { // (the counts can be modified by the add command above)
                            nHighestCount = geo.getCount();
                        }
                        if (geoObj.count < nLowestCount) {
                            nLowestCount = geo.getCount();
                        }
                    }
                    rp.setGeo(geoCounts, nHighestCount, nLowestCount);
                } //(TESTED)
                if (facet.getKey().equals("time")) {
                    DateHistogramFacet timeFacet = (DateHistogramFacet) facet.getValue();
                    rp.setTimes(timeFacet.getEntries(), QueryHandler.getInterval(aggOutParams.timesInterval, 'm'));
                } //(TESTED)

                if (facet.getKey().equals("events")) {
                    TermsFacet eventsFacet = (TermsFacet) facet.getValue();
                    rp.setEvents(parseEventAggregationOutput("Event", eventsFacet, scoreStats, aliasLookup,
                            entityTypeFilterStrings, assocVerbFilterStrings));
                }
                if (facet.getKey().equals("facts")) {
                    TermsFacet factsFacet = (TermsFacet) facet.getValue();
                    rp.setFacts(parseEventAggregationOutput("Fact", factsFacet, scoreStats, aliasLookup,
                            entityTypeFilterStrings, assocVerbFilterStrings));
                }
                //TESTED x2

                if (facet.getKey().equals("sourceTags")) {
                    TermsFacet tagsFacet = (TermsFacet) facet.getValue();
                    rp.setSourceMetaTags(tagsFacet.getEntries());
                }
                if (facet.getKey().equals("sourceTypes")) {
                    TermsFacet typesFacet = (TermsFacet) facet.getValue();
                    rp.setSourceMetaTypes(typesFacet.getEntries());
                }
                if (facet.getKey().equals("sourceKeys")) {
                    TermsFacet keysFacet = (TermsFacet) facet.getValue();
                    rp.setSources(keysFacet.getEntries());
                }
                //TESTED x3

                // Moments (basic functionality)

                if (facet.getKey().startsWith("moments.")) {
                    DateHistogramFacet momentFacet = (DateHistogramFacet) facet.getValue();
                    if (null == moments) {
                        moments = new HashMap<String, List<? extends Object>>();
                    }
                    moments.put(facet.getKey().substring(8), momentFacet.getEntries());
                } //TESTED

            } //(end loop over generated facets)   

        if ((null != aggs) && (null != aggs.asMap()))
            for (Map.Entry<String, Aggregation> agg : aggs.asMap().entrySet()) {

                if (agg.getKey().equals("moments")) {
                    if (null == moments) {
                        moments = new HashMap<String, List<? extends Object>>();
                    }

                    DateHistogram val = (DateHistogram) agg.getValue();

                    //TODO (INF-2688): Finalize format 
                    BasicDBList dbl = new BasicDBList();
                    for (DateHistogram.Bucket dateBucket : val.getBuckets()) {
                        if (dateBucket.getKeyAsNumber().longValue() > 0) {
                            BasicDBObject dataBucketDbo = new BasicDBObject();
                            dataBucketDbo.put("time", dateBucket.getKeyAsNumber().longValue());
                            dataBucketDbo.put("count", dateBucket.getDocCount());
                            for (Map.Entry<String, Aggregation> dateAggs : dateBucket.getAggregations().asMap()
                                    .entrySet()) {
                                if (dateAggs.getKey().equals("geo")) {

                                    BasicDBList dbl_geo = new BasicDBList();
                                    MultiBucketsAggregation geoVal = (MultiBucketsAggregation) dateAggs.getValue();

                                    long nHighestCount = Long.MIN_VALUE;
                                    for (MultiBucketsAggregation.Bucket geoBucket : geoVal.getBuckets()) {
                                        String geohash = geoBucket.getKey().substring(2);
                                        double[] loc = GeoHashUtils.decode(geohash);
                                        GeoAggregationPojo geoObj = new GeoAggregationPojo(loc[0], loc[1]);
                                        BasicDBObject geoDbo = new BasicDBObject(4);
                                        geoDbo.put("lat", geoObj.lat);
                                        geoDbo.put("lon", geoObj.lon);
                                        geoDbo.put("count", geoBucket.getDocCount());
                                        geoDbo.put("type", GeoOntologyMapping
                                                .decodeOntologyCode(geoBucket.getKey().charAt(0)));
                                        dbl_geo.add(geoDbo);

                                        if (geoBucket.getDocCount() > nHighestCount) { // (the counts can be modified by the add command above)
                                            nHighestCount = geoBucket.getDocCount();
                                        }
                                    }
                                    dataBucketDbo.put("maxGeoCount", nHighestCount);
                                    dataBucketDbo.put("geo", dbl_geo);
                                }
                            }
                            dbl.add(dataBucketDbo);
                        }
                    }
                    moments.put("times", dbl);
                }
            } //(end loop over generated aggregations)      

        if ((null != moments) && !moments.isEmpty()) {
            rp.setMoments(moments, QueryHandler.getInterval(aggOutParams.moments.timesInterval, 'm'));
        }

    }//TESTED

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////

    // OUTPUT PARSING - UTILS:

    public static void parseOutputAggregation(AdvancedQueryPojo.QueryOutputPojo.AggregationOutputPojo aggregation,
            AliasLookupTable aliasLookup, boolean geoLowAccuracy, String[] entTypeFilterStrings,
            String[] assocVerbFilterStrings, SearchRequestBuilder searchSettings,
            BoolFilterBuilder parentFilterObj) {
        // 1.] Go through aggregation list

        // 1.1] Apply "simple specifications" if necessary

        // Geo

        if ((null != aggregation) && (null != aggregation.geoNumReturn) && (aggregation.geoNumReturn > 0)) {
            CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("geo")
                    .field(DocumentPojo.locs_).size(aggregation.geoNumReturn);
            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);
        } //(TESTED)

        // Temporal

        if ((null != aggregation) && (null != aggregation.timesInterval)) {
            if (aggregation.timesInterval.contains("m")) {
                aggregation.timesInterval = "month";
            }
            CrossVersionFacetBuilder.DateHistogramFacetBuilder fb = CrossVersionFacetBuilders
                    .dateHistogramFacet("time").field(DocumentPojo.publishedDate_)
                    .interval(aggregation.timesInterval);
            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);

            //TODO (INF-2688): if using certain types of moments then don't want this?
        } //(TESTED)

        // Temporal Moments

        if ((null != aggregation) && (null != aggregation.moments)) {
            if (null == aggregation.moments.timesInterval) {
                if (null != aggregation.timesInterval) {
                    aggregation.moments.timesInterval = aggregation.timesInterval;
                } else {
                    aggregation.moments.timesInterval = "m";
                }
            }
            if (aggregation.moments.timesInterval.contains("m")) {
                aggregation.moments.timesInterval = "month";
            }

            //TODO (INF-2688): Other cross filter type things
            if (!geoLowAccuracy && (null != aggregation.moments.geoNumReturn)
                    && (aggregation.moments.geoNumReturn > 0)) {
                DateHistogramBuilder timeAgg = AggregationBuilders.dateHistogram("moments")
                        .field(DocumentPojo.publishedDate_)
                        .interval(new Interval(aggregation.moments.timesInterval));
                TermsBuilder geoAgg = AggregationBuilders.terms("geo").field(DocumentPojo.locs_)
                        .size(aggregation.moments.geoNumReturn);
                timeAgg.subAggregation(geoAgg);
                searchSettings.addAggregation(timeAgg);
            }

            if (null != aggregation.moments.entityList) {
                for (String entIndex : aggregation.moments.entityList) {

                    CrossVersionFacetBuilder.DateHistogramFacetBuilder fb = CrossVersionFacetBuilders
                            .dateHistogramFacet("moments." + entIndex).field(DocumentPojo.publishedDate_)
                            .interval(aggregation.moments.timesInterval);

                    EntityFeaturePojo alias = null;
                    if (null != aliasLookup) {
                        alias = aliasLookup.getAliases(entIndex);
                    }
                    if (null == alias) { // no alias
                        fb = fb.facetFilter(FilterBuilders.nestedFilter(DocumentPojo.entities_,
                                FilterBuilders.termFilter(EntityPojo.index_, entIndex)));
                    } //TESTED
                    else {
                        QueryFilterBuilder qfb = null;
                        if ((null != alias.getSemanticLinks()) && !alias.getSemanticLinks().isEmpty()) {
                            BoolQueryBuilder qb = QueryBuilders.boolQuery();
                            for (String textAlias : alias.getSemanticLinks()) {
                                qb = qb.should(CrossVersionQueryBuilders.matchPhraseQuery(DocumentPojo.fullText_,
                                        textAlias));
                            }
                            qfb = FilterBuilders.queryFilter(qb);
                        } //TESTED
                        if (!alias.getAlias().isEmpty()) {
                            NestedFilterBuilder nfb = FilterBuilders.nestedFilter(DocumentPojo.entities_,
                                    FilterBuilders.termsFilter(EntityPojo.index_, entIndex, alias.getAlias()));
                            if (null == qfb) {
                                fb = fb.facetFilter(nfb);
                            } //TESTED
                            else {
                                BoolFilterBuilder bfb = FilterBuilders.boolFilter().should(nfb).should(qfb);
                                fb = fb.facetFilter(bfb);
                            } //TESTED
                        } else if (null != qfb) {
                            fb = fb.facetFilter(qfb);
                        } //TESTED
                    } //TESTED

                    // Gross raw handling for facets
                    if (null != parentFilterObj) {
                        fb = fb.facetFilter(parentFilterObj);
                    }
                    searchSettings.addFacet(fb);
                }
            } //(end list over entities)
        } //TESTED

        // Entities - due to problems with significance, handled on a document by document basis, see Significance helper class

        // Associations (Events/Facts)

        // Association verb category filter   
        StringBuilder verbCatRegex = null;
        StringBuilder entTypeRegex = null;

        if (((null != aggregation) && (null != aggregation.eventsNumReturn) && (aggregation.eventsNumReturn > 0))
                || ((null != aggregation) && (null != aggregation.factsNumReturn)
                        && (aggregation.factsNumReturn > 0))) {
            if (null != entTypeFilterStrings) {
                boolean bNegative = false;
                if ('-' != entTypeFilterStrings[0].charAt(0)) { // positive filtering
                    entTypeRegex = new StringBuilder("(?:");
                } else {
                    bNegative = true;
                    entTypeRegex = new StringBuilder("(?!");
                    // (this is a lookahead but will be fine because of the .*/ in front of it)
                }
                for (String entType : entTypeFilterStrings) {
                    if (bNegative && ('-' == entType.charAt(0))) {
                        entType = entType.substring(1);
                    }
                    entType = entType.replace("|", "%7C");
                    entTypeRegex.append(".*?/").append(Pattern.quote(entType.toLowerCase())).append('|');
                    // (can't match greedily because of the 2nd instance of entity type)
                }
                entTypeRegex.setLength(entTypeRegex.length() - 1); // (remove trailing |)
                entTypeRegex.append(")");
                if (bNegative) {
                    entTypeRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead)               
                }

            } //TESTED 

            if (null != assocVerbFilterStrings) {
                boolean bNegative = false;
                if ('-' != assocVerbFilterStrings[0].charAt(0)) { // positive filtering
                    verbCatRegex = new StringBuilder("\\|(?:");
                } else {
                    bNegative = true;
                    verbCatRegex = new StringBuilder("\\|(?!");
                    // (this is a lookahead but will be fine because of the "^[^|]*\\" in front of it)

                    // eg say I have -VERB then subject|VERB|object will match because if the 
                }
                for (String assocVerbFilterString : assocVerbFilterStrings) {
                    if (bNegative && ('-' == assocVerbFilterString.charAt(0))) {
                        assocVerbFilterString = assocVerbFilterString.substring(1);
                    }
                    assocVerbFilterString = assocVerbFilterString.replace("|", "%7C");
                    verbCatRegex.append(Pattern.quote(assocVerbFilterString)).append('|');
                }
                verbCatRegex.setLength(verbCatRegex.length() - 1); // (remove trailing |)
                verbCatRegex.append(")");
                if (bNegative) {
                    verbCatRegex.append("[^|]*"); // (now the actual verb, if a -ve lookahead)
                }
            } //TESTED
        }
        //TESTED (all combinations of 1/2 people, 1/2 verbs)         

        if ((null != aggregation) && (null != aggregation.eventsNumReturn) && (aggregation.eventsNumReturn > 0)) {
            StringBuffer regex = new StringBuffer("^Event\\|");
            if (null != entTypeRegex) {
                regex.append(entTypeRegex);
            } else {
                regex.append("[^|]*");
            }
            if (null != verbCatRegex) {
                regex.append(verbCatRegex);
            } else if (null != entTypeRegex) {
                regex.append("\\|[^|]*");
            } else {
                regex.append(".*");
            }
            if (null != entTypeRegex) {
                regex.append("\\|").append(entTypeRegex);
                regex.append(".*");
            } else {
                regex.append("\\|.*");
            }
            //DEBUG
            //System.out.println("REGEX==" + regex.toString());         
            //TESTED (all combinations of 1/2 people, 1/2 verbs)         

            CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("events")
                    .field(AssociationPojo.assoc_index_).size(aggregation.eventsNumReturn)
                    .nested(DocumentPojo.associations_);
            fb.regex(regex.toString());

            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);
        }
        if ((null != aggregation) && (null != aggregation.factsNumReturn) && (aggregation.factsNumReturn > 0)) {
            StringBuffer regex = new StringBuffer("^Fact\\|");
            if (null != entTypeRegex) {
                regex.append(entTypeRegex);
            } else {
                regex.append("[^|]*");
            }
            if (null != verbCatRegex) {
                regex.append(verbCatRegex);
            } else if (null != entTypeRegex) {
                regex.append("\\|[^|]*");
            } else {
                regex.append(".*");
            }
            if (null != entTypeRegex) {
                regex.append("\\|").append(entTypeRegex);
                regex.append(".*");
            } else {
                regex.append("\\|.*");
            }
            //DEBUG
            //System.out.println("REGEX==" + regex.toString());         
            //TESTED (all combinations of 1/2 people, 1/2 verbs)         

            CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("facts")
                    .field(AssociationPojo.assoc_index_).size(aggregation.factsNumReturn)
                    .nested(DocumentPojo.associations_);
            fb.regex(regex.toString());

            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);
        }

        // Source management/monitoring

        if ((null != aggregation) && (null != aggregation.sourceMetadata) && (aggregation.sourceMetadata > 0)) {
            CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("sourceTags")
                    .field(DocumentPojo.tags_).size(aggregation.sourceMetadata).facetFilter(parentFilterObj);
            CrossVersionFacetBuilder.TermsFacetBuilder fb1 = CrossVersionFacetBuilders.termsFacet("sourceTypes")
                    .field(DocumentPojo.mediaType_).size(aggregation.sourceMetadata).facetFilter(parentFilterObj);
            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
                fb1 = fb1.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);
            searchSettings.addFacet(fb1);
        }

        if ((null != aggregation) && (null != aggregation.sources) && (aggregation.sources > 0)) {
            CrossVersionFacetBuilder.TermsFacetBuilder fb = CrossVersionFacetBuilders.termsFacet("sourceKeys")
                    .field(DocumentPojo.sourceKey_).size(aggregation.sources);
            // Gross raw handling for facets
            if (null != parentFilterObj) {
                fb = fb.facetFilter(parentFilterObj);
            }
            searchSettings.addFacet(fb);
        }

    } //TESTED

    // 3.1] Utility to parse individual aggregation (facet) element

    private static Pattern eventIndexParser = Pattern
            .compile("([^|]+/[^/|]+)?\\|([^|]+)?\\|([^|]+/[^|/]+)?\\|(.+)?");

    private static List<BasicDBObject> parseEventAggregationOutput(String sEventOrFact, TermsFacet facet,
            ScoringUtils scoreStats, AliasLookupTable aliasLookup, String[] entityTypeFilterStrings,
            String[] assocVerbFilterStrings) {
        ArrayList<BasicDBObject> facetList = new ArrayList<BasicDBObject>(facet.getEntries().size());

        // (These 2 might be needed if we alias and there are filter strings specified)
        HashSet<String> entTypeFilter = null;
        //TEST CASES:
        //      String term1 = "mark kelly/person|family relation|gabrielle giffords/person|";
        //      String term2 = "|family relation|gabrielle giffords/person|";
        //      String term3 = "mark kelly/person||gabrielle giffords/person|";
        //      String term4 = "mark kelly/person|family relation||";
        //      String term5 = "mark kelly/person|family relation|gabrielle giffords/person|loca,tion/city";
        //      List<String> terms = Arrays.asList(term1, term2, term3, term4, term5);

        @SuppressWarnings("unused")
        int nFacetEl = 0; // (this will get used later)

        for (TermsFacet.Entry facetEl : facet.getEntries()) {
            //DEBUG
            //System.out.println("TERM= " + FacetUtils.getTerm(facetEl));

            String term = FacetUtils.getTerm(facetEl).substring(sEventOrFact.length() + 1); // (step over "Fact|" or "Event|"
            //TEST CASES:
            //         if (nFacetEl < terms.size()) {
            //            term = terms.get(nFacetEl);
            //         }

            // Parse the string
            Matcher m = eventIndexParser.matcher(term);
            if (m.matches()) {
                BasicDBObject json = new BasicDBObject();
                json.put(AssociationPojo.assoc_type_, sEventOrFact);
                String sEnt1_index = m.group(1);
                if (null != sEnt1_index) {
                    sEnt1_index = sEnt1_index.replaceAll("%7C", "|");
                }
                String sVerbCat = m.group(2);
                if (null != sVerbCat)
                    json.put(AssociationPojo.verb_category_, sVerbCat.replaceAll("%7C", "|"));
                String sEnt2_index = m.group(3);
                if (null != sEnt2_index) {
                    sEnt2_index = sEnt2_index.replaceAll("%7C", "|");
                }
                String sGeoIndex = m.group(4);
                if (null != sGeoIndex) {
                    sGeoIndex = sGeoIndex.replaceAll("%7C", "|");
                }
                json.put(AssociationPojo.doccount_, facetEl.getCount());

                // Add significance if possible
                if ((null == scoreStats)
                        || !scoreStats.calcAssocationSignificance(sEnt1_index, sEnt2_index, sGeoIndex, json)) {
                    // These fields are optional:
                    //json.put("entity1_sig", 0.0);
                    //json.put("entity2_sig", 0.0);
                    //json.put("geo_sig", 0.0);
                    // Mandatory:
                    json.put(AssociationPojo.assoc_sig_, 0.0);
                }

                boolean bTransformedByAlias = false; // when true need to re-check vs entity type filter

                // Now write the last few values (adjusted for aliases if necessary) into the JSON object
                if (null != sEnt1_index) {
                    if (null != aliasLookup) {
                        EntityFeaturePojo alias = aliasLookup.getAliasMaster(sEnt1_index);
                        if (null != alias) {
                            sEnt1_index = alias.getIndex();
                            if (sEnt1_index.equalsIgnoreCase("discard")) {
                                continue;
                            } //TESTED
                            bTransformedByAlias = true;
                        }
                    }
                    json.put(AssociationPojo.entity1_index_, sEnt1_index);
                }
                if (null != sEnt2_index) {
                    if (null != aliasLookup) {
                        EntityFeaturePojo alias = aliasLookup.getAliasMaster(sEnt2_index);
                        if (null != alias) {
                            sEnt2_index = alias.getIndex();
                            if (sEnt2_index.equalsIgnoreCase("discard")) {
                                continue;
                            } //TESTED (cut and paste of ent index1)
                            bTransformedByAlias = true;
                        }
                    }
                    json.put(AssociationPojo.entity2_index_, sEnt2_index);
                }
                if (null != sGeoIndex) {
                    if (null != aliasLookup) {
                        EntityFeaturePojo alias = aliasLookup.getAliasMaster(sGeoIndex);
                        if (null != alias) {
                            sGeoIndex = alias.getIndex();
                            if (sGeoIndex.equalsIgnoreCase("discard")) {
                                if ((sEnt1_index != null) && (sEnt2_index != null)) {
                                    sGeoIndex = null; // event/fact is still valid even without the geo                           
                                } //TESTED
                                else
                                    continue; // event/fact now meaningless
                            }
                            bTransformedByAlias = true;
                        }
                    }
                    json.put(AssociationPojo.geo_index_, sGeoIndex);
                }
                //TESTED

                //Whenever aliases are applied, need to re-check whether is this now a filter item
                //ideally have a single code block for doing this in scoringutils_association.
                if (bTransformedByAlias) {
                    if ((null == entTypeFilter) && (null != entityTypeFilterStrings)) {
                        entTypeFilter = new HashSet<String>();
                    }
                    // (only create the map once, and only if needed)

                    boolean bKeep = recheckFiltersAfterTransform(json, aliasLookup, entityTypeFilterStrings,
                            entTypeFilter);

                    if (!bKeep) {
                        continue; // ie just bypass the facetList.add and the nFacetEl
                    }
                } //TESTED

                facetList.add(json);
            }
            nFacetEl++;
        }
        return facetList;
    }//TESTED (see cases above - difficult to make this test case standalone because of TermsFacet.Entry)

    //////////////////////////////////

    // Utility:

    private static boolean recheckFiltersAfterTransform(BasicDBObject json, AliasLookupTable aliasLookup,
            String[] entityTypeFilterStrings, HashSet<String> entTypeFilter) {
        // (approximate copy paste from ScoringUtils to initialize these objects:)
        boolean bEntTypeFilterPositive = true; // (will recreate this every time since it's so cheap and passsing by ref is such a pain in Java)

        if (null != entityTypeFilterStrings) {

            if ('-' == entityTypeFilterStrings[0].charAt(0)) {
                bEntTypeFilterPositive = false;
            }
            if (entTypeFilter.isEmpty()) {// (first time through per call only)
                for (String entityType : entityTypeFilterStrings) {
                    if (!bEntTypeFilterPositive && ('-' == entityType.charAt(0))) {
                        entityType = entityType.substring(1);
                    }
                    entTypeFilter.add(entityType.toLowerCase());
                }
            }
        }
        // (Only need to re-filter on entities)
        return ScoringUtils_Associations.filterAndAliasAssociation(json, null, false, bEntTypeFilterPositive, true,
                entTypeFilter, null);
    }//TESTED

}