ubic.pubmedgate.resolve.ResolutionRDFModel.java Source code

Java tutorial

Introduction

Here is the source code for ubic.pubmedgate.resolve.ResolutionRDFModel.java

Source

/*
 * The WhiteText project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.pubmedgate.resolve;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.BAMSandAllen.JenaUtil;
import ubic.BAMSandAllen.Util;
import ubic.BAMSandAllen.Vocabulary;
import ubic.pubmedgate.Config;
import ubic.pubmedgate.resolve.RDFResolvers.BagOfStemsRDFMatcher;
import ubic.pubmedgate.resolve.RDFResolvers.BagOfWordsRDFMatcher;
import ubic.pubmedgate.resolve.RDFResolvers.RDFResolver;
import ubic.pubmedgate.resolve.RDFResolvers.RDFResolverImpl;
import ubic.pubmedgate.resolve.RDFResolvers.SimpleExactRDFMatcher;
import ubic.pubmedgate.resolve.RDFResolvers.SimpleMappingRDFMatcher;
import ubic.pubmedgate.resolve.RDFResolvers.StemRDFMatcher;
import ubic.pubmedgate.resolve.RDFResolvers.ThreeLetterRDFMatcher;
import ubic.pubmedgate.resolve.evaluation.LoadEvaluationSpreadSheets;
import ubic.pubmedgate.resolve.mentionEditors.BracketRemoverMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.CytoPrefixMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.DirectionRemoverMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.DirectionSplittingMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.HemisphereStripMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.MentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.NDotExpanderMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.NucleusOfTheRemoverMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.OfTheRemoverMentionEditor;
import ubic.pubmedgate.resolve.mentionEditors.RegionSuffixRemover;
import ubic.pubmedgate.statistics.NamedEntityStats;

import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Selector;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.rdf.model.impl.StatementImpl;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.ReasonerRegistry;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;

public class ResolutionRDFModel {
    protected static Log log = LogFactory.getLog(ResolutionRDFModel.class);

    protected Model model;
    boolean reason;

    public ResolutionRDFModel(Model model, boolean reason) throws Exception {
        modelLoad(model, reason);
    }

    /**
     * Returns a resource in this RDF model.
     * 
     * @param r
     * @return
     */
    public Resource getResource(Resource r) {
        return model.createResource(r.getURI());
    }

    public ResolutionRDFModel() throws Exception {
        this(Config.config.getString("resolve.Lexicon.RDF"));
    }

    public ResolutionRDFModel(boolean empty) throws Exception {
        if (empty) {
            model = ModelFactory.createDefaultModel();
        } else {

        }
    }

    public ResolutionRDFModel(String filename) throws Exception {
        Model modelFresh = ModelFactory.createDefaultModel();
        modelFresh.read(new FileInputStream(filename), null);
        modelLoad(modelFresh, true);
    }

    protected void modelLoad(Model loadedModel, boolean reason) throws Exception {
        this.model = loadedModel;
        this.reason = reason;
        log.info("Loading ontology");
        // model.read( Vocabulary.getLexiconURI() );

        // FIX -should be on the internet
        model.read(Config.config.getString("whitetext.brainlinks.templocation"));

        model.setNsPrefix("lexiconLinks", Vocabulary.getLexiconURI());
        model.setNsPrefix("TermSpace", Vocabulary.getLexiconSpaceURI());

        reason();
        getStats();
    }

    /*
     * Given a main name label, find the corresponding Neuroname Node
     */
    public Resource getNNNodeByLabel(String label) {
        StmtIterator iterator = model.listStatements(null, RDF.type, Vocabulary.neuroname);
        while (iterator.hasNext()) {
            Statement s = iterator.nextStatement();
            Resource r = s.getSubject();
            String NNlabel = r.getProperty(RDFS.label).getLiteral().getString();
            if (NNlabel.equals(label)) {
                return r;
            }
        }
        return null;
    }

    public Set<Resource> getLinkedConcepts(Resource neuroterm) {
        Set<Resource> result = new HashSet<Resource>();
        // StmtIterator iterator = neuroterm.listProperties();
        StmtIterator iterator = model.listStatements(null, null, neuroterm);
        while (iterator.hasNext()) {
            Statement s = iterator.nextStatement();
            if (s.getSubject().isResource()) {
                Resource sbj = (Resource) s.getSubject().as(Resource.class);
                if (sbj.hasProperty(RDF.type, Vocabulary.neuroOntologyEntry)) {
                    result.add(sbj);
                }
            }
        }
        return result;
    }

    public String toStringNew(Resource r) {
        // print a resource
        String NNlabel = r.getProperty(RDFS.label).getLiteral().getString();
        String result = "\"" + NNlabel + "\" ";// + r.toString();

        Set<Resource> targets = new HashSet<Resource>();

        StmtIterator iterator = r.listProperties();
        while (iterator.hasNext()) {
            Statement s = iterator.nextStatement();
            if (s.getObject().isResource()) {
                Resource obj = (Resource) s.getObject().as(Resource.class);
                if (obj.hasProperty(RDF.type, Vocabulary.neuroterm)) {
                    targets.add(obj);
                }
            }

        }
        for (Resource target : targets) {
            String label = target.getProperty(RDFS.label).getLiteral().getString();
            result += "\n   " + label + " ";
            String connecting = getConnectingPredicatesShortNames(r, target).toString();
            result += connecting;
            result += " " + getLinkedConcepts(target);
        }
        return result;
    }

    public String toString(Resource r) {
        // print a resource
        String NNlabel = r.getProperty(RDFS.label).getLiteral().getString();
        String result = NNlabel + ", " + r.toString();

        StmtIterator iterator = r.listProperties();
        while (iterator.hasNext()) {
            Statement s = iterator.nextStatement();
            String preds = "";
            if (s.getObject().isResource()) {
                Resource obj = (Resource) s.getObject().as(Resource.class);
                if (obj.hasProperty(RDF.type, Vocabulary.neuroterm)) {

                    result += "\n   " + obj.getProperty(RDFS.label).getLiteral().getString() + " ("
                            + s.getPredicate().getLocalName() + ")";
                    preds += s.getPredicate().getLocalName();
                    // get the concepts linked to that neuroterm
                    for (Resource rr : getLinkedConcepts(obj)) {
                        log.info(rr.getProperty(RDFS.label).getLiteral().getString() + " " + rr.getURI());
                    }
                }
            }
        }
        return result;
    }

    public Set<Resource> getTerms() {
        Set<Resource> terms = JenaUtil.getSubjects(model.listStatements(null, RDF.type, Vocabulary.neuroterm));
        return terms;
    }

    public Set<Resource> getMentions() {
        Set<Resource> mentions = JenaUtil
                .getSubjects(model.listStatements(null, RDF.type, Vocabulary.neuromention));
        return mentions;
    }

    public void reason() {
        // don't reason if it cause model copy issues (Neuronames Loader)
        if (!reason)
            return;
        Reasoner reasoner = ReasonerRegistry.getRDFSReasoner();
        InfModel infModel = ModelFactory.createInfModel(reasoner, model);
        // log.info( "Reasoning added:" + ( infModel.size() - model.size() ) + " RDF statements" );
        // log.info( "Valid? " + infModel.validate().isValid() );
        model = infModel;
    }

    public Set<Resource> getConnectionPartnerMentions() {
        Set<Resource> connectionPartners = JenaUtil
                .getSubjects(model.listStatements(null, Vocabulary.annotated_connection_partner, (RDFNode) null));
        return connectionPartners;
    }

    public Set<Resource> getUnMatchedMentions() {
        Set<Resource> matched = getMatchedMentions();
        Set<Resource> allMentions = getMentions();
        allMentions.removeAll(matched);
        return allMentions;
    }

    /**
     * Counts duplicates twice
     * 
     * @param mentions
     * @return
     */
    public int getPMIDCount(Set<Resource> mentions) {
        return getPMIDs().size();
    }

    public Set<Resource> getPMIDs() {
        StmtIterator nodeIt = model.listStatements(null, Vocabulary.in_PMID, (Resource) null);
        return JenaUtil.getObjects(nodeIt);
    }

    public Set<Resource> getPMIDs(Set<Resource> mentions) {
        Set<Resource> pmids = new HashSet<Resource>();
        for (Resource r : mentions) {
            pmids.addAll(getPMIDs(r));
        }
        return pmids;
    }

    public Set<Resource> getPMIDsForSpecies(String species) {
        Resource taxon = model.createResource("http://bio2rdf.org/taxon:" + species);
        ResIterator resIt = model.listResourcesWithProperty(Vocabulary.mentions_species, taxon);
        return resIt.toSet();
    }

    public Set<Resource> getSpeciesForPMID(Resource pmid) {
        NodeIterator nodeIt = model.listObjectsOfProperty(pmid, Vocabulary.mentions_species);
        Set<Resource> result = new HashSet<Resource>();
        while (nodeIt.hasNext()) {
            result.add((Resource) nodeIt.nextNode().as(Resource.class));
        }
        return result;
    }

    public Set<Resource> getConcepts() {
        return getConceptsBySource(Vocabulary.neuroOntologyEntry);
    }

    public Set<Resource> getNIFSTDConcepts() {
        return getConceptsBySource(Vocabulary.BIRNLexname);
    }

    public Set<Resource> getBredeConcepts() {
        return getConceptsBySource(Vocabulary.bredeName);
    }

    public Set<Resource> getBAMSConcepts() {
        return getConceptsBySource(Vocabulary.BAMSName);
    }

    public Set<Resource> getConceptsBySource(Resource type) {
        Set<Resource> concepts = JenaUtil.getSubjects(model.listStatements(null, RDF.type, type));
        return concepts;
    }

    public Set<Resource> getABAConcepts() {
        return getConceptsBySource(Vocabulary.ABAName);
    }

    // Avian Brain connectivity database
    public Set<Resource> getABCDConcepts() {
        return getConceptsBySource(Vocabulary.ABCDName);
    }

    public Set<Resource> getNNConcepts() {
        return getConceptsBySource(Vocabulary.neuroname);
    }

    public Set<Resource> getMatchedMentions() {
        // remove all mentions with all node linked to a neuroterm, so if it's a neuromention linked to a term then
        // it's good to return
        Set<Resource> allMatched = JenaUtil
                .getSubjects(model.listStatements(null, Vocabulary.match, (RDFNode) null));
        return allMatched;
    }

    public boolean containsExactConnection(Resource mention, Resource neuroterm) {
        return model.contains(new StatementImpl(mention, Vocabulary.string_match_ignorecase, neuroterm));
    }

    public boolean accepted(Resource mention, Resource neuroConcept) {
        return model.contains(new StatementImpl(mention, Vocabulary.evaluation_accept, neuroConcept));
    }

    public boolean rejected(String mentionString, Resource neuroConcept) {
        return rejected(makeMentionNode(mentionString), neuroConcept);
    }

    public Resource makeMentionNode(String mentionString) {
        return Vocabulary.makeMentionNode(mentionString, model);
    }

    public boolean rejected(Resource mention, Resource neuroConcept) {
        return model.contains(new StatementImpl(mention, Vocabulary.evaluation_reject, neuroConcept));
    }

    public boolean specToGen(Resource mention, Resource neuroConcept) {
        return model.contains(new StatementImpl(mention, Vocabulary.evaluation_specific_to_general, neuroConcept));
    }

    public Set<Resource> getLinkedResources(Resource subject) {
        return JenaUtil.getObjects(subject.listProperties());
    }

    /**
     * Realy slow, a faster method is to use getLinkedResources then intersect that set with all terms.
     * 
     * @param subject
     * @return
     */
    public Set<Resource> getLinkedNeuroTerms(Resource subject) {
        Set<Resource> result = new HashSet<Resource>();
        StmtIterator iterator = subject.listProperties();
        while (iterator.hasNext()) {
            Statement s = iterator.nextStatement();
            if (s.getObject().isResource()) {
                Resource obj = (Resource) s.getObject().as(Resource.class);

                if (obj.hasProperty(RDF.type, Vocabulary.neuroterm)) {
                    result.add(obj);
                    // targets.add( JenaUtil.getLabel( obj ) );
                }
            }
        }
        return result;
    }

    /**
     * Return the concepts that were evaluated against the mention
     * 
     * @param mention
     * @return
     */
    public Set<Resource> getMentionEvaluations(Resource mention) {
        return JenaUtil.getObjects(model.listStatements(mention, Vocabulary.evaluation_result, (RDFNode) null));
    }

    // convert a concept set to it's terms
    public Set<Resource> getTermsFromConcepts(Resource concept) {
        Set<Resource> concepts = new HashSet<Resource>();
        concepts.add(concept);
        return getTermsFromConcepts(concepts);
    }

    // convert a concept set to it's terms
    public Set<Resource> getTermsFromConcepts(Set<Resource> concepts) {
        Set<Resource> result = new HashSet<Resource>();
        for (Resource r : concepts) {
            result.addAll(JenaUtil.getObjects(model.listStatements(r, null, (RDFNode) null)));
        }
        // intersect with all neuroterms
        Set<Resource> terms = getTerms();
        result.retainAll(terms);
        return result;
    }

    // get concepts that have no matched terms
    public Set<Resource> getUnMatchedConceptsOld(Set<Resource> concepts) {
        Set<Resource> result = new HashSet<Resource>();
        for (Resource concept : concepts) {
            Set<Resource> terms = getTermsFromConcepts(concept);
            Set<Resource> unMatchedTerms = getUnMatchedTerms(terms);
            // if all terms are unmatched
            if (terms.equals(unMatchedTerms))
                result.add(concept);
        }
        return result;
    }

    // get concepts that have no accepted matched terms
    public Set<Resource> getUnMatchedConcepts(Set<Resource> concepts) {
        Set<Resource> result = new HashSet<Resource>();
        for (Resource concept : concepts) {
            // if this concept has one triple from mention to evaluation
            Set<Resource> acceptedMentions = JenaUtil
                    .getSubjects(model.listStatements(null, Vocabulary.evaluation_accept, concept));
            // some maybe evaled but not actually in the model!!
            Set<Resource> terms = getTermsFromConcepts(concept);
            Set<Resource> mentions = getMentionsFromTerms(terms);

            // if there are no mentions that are accepted then
            if (Util.intersectSize(acceptedMentions, mentions) == 0) {
                result.add(concept);
            }
        }
        return result;
    }

    // get terms that have no matched mentions
    public Set<Resource> getUnMatchedTerms(Set<Resource> terms) {
        Set<Resource> result = new HashSet<Resource>();
        for (Resource term : terms) {
            Set<Resource> mentions = getMentionsFromTerms(term);
            if (mentions.isEmpty())
                result.add(term);
        }
        return result;
    }

    /**
     * converts a term set to it's linked mentions
     */
    public Set<Resource> getMentionsFromTerms(Resource term) {
        Set<Resource> terms = new HashSet<Resource>();
        terms.add(term);
        return getMentionsFromTerms(terms);
    }

    public Set<Resource> getMentionsFromTerms(Set<Resource> terms) {
        Set<Resource> result = new HashSet<Resource>();
        for (Resource r : terms) {
            result.addAll(JenaUtil.getSubjects(model.listStatements(null, Vocabulary.match, r)));
        }
        return result;
    }

    public Set<Resource> getConceptsFromTerms(Resource parameterTerm) {
        Set<Resource> parameterTerms = new HashSet<Resource>();
        parameterTerms.add(parameterTerm);
        return getConceptsFromTerms(parameterTerms);
    }

    public Set<Resource> getConceptsFromTerms(Resource parameterTerm, Set<Resource> allConcepts) {
        Set<Resource> parameterTerms = new HashSet<Resource>(1);
        parameterTerms.add(parameterTerm);
        return getConceptsFromTerms(parameterTerms, allConcepts);
    }

    public Set<Resource> getConceptsFromTerms(Set<Resource> parameterTerms, Set<Resource> allConcepts) {
        Set<Resource> result = new HashSet<Resource>();

        if (parameterTerms.isEmpty())
            return result;

        if (allConcepts == null) {
            allConcepts = getConcepts();
        }

        GroupSelector selector = new GroupSelector(allConcepts, parameterTerms);
        result = JenaUtil.getSubjects(model.listStatements(selector));

        return result;
    }

    public Set<Resource> getConceptsFromTerms(Set<Resource> parameterTerms) {
        return getConceptsFromTerms(parameterTerms, null);
    }

    @Deprecated
    // public void createMatches() {
    //
    // // get all neuromentions
    // // StmtIterator mentions = model.listStatements( null, RDF.type, Vocabulary.neuromention );
    // // Filter hasMatchFilter = new Filter() {
    // // public boolean accept( Object t ) {
    // // Statement s = ( Statement ) t;
    // //
    // // if ( s.getProperty( arg0 ) ) return true;
    // // }
    // // };
    //
    // // get all neuroterms
    // StmtIterator termStatements = model.listStatements( null, RDF.type, Vocabulary.neuroterm );
    // Set<Statement> terms = termStatements.toSet();
    //
    // List<Resolver> resolvers = new LinkedList<Resolver>();
    // resolvers.add( new SimpleExactMatcher() );
    // resolvers.add( new BagOfWordsResolver() );
    // resolvers.add( new StemResolver() );
    //
    // boolean hit;
    // int hitCount = 0;
    // // do the iterations, very complex, number of mentions * number of terms * number of matchers, ugh
    // outer: while ( mentions.hasNext() ) {
    // hit = false;
    //
    // Resource mention = mentions.nextStatement().getSubject();
    // String mentionText = JenaUtil.getLabel( mention );
    // // log.info( mentionText );
    // for ( Statement term : terms ) {
    // Resource termResource = term.getSubject();
    // String termText = JenaUtil.getLabel( termResource );
    // for ( Resolver resolver : resolvers ) {
    // if ( resolver.matches( mentionText, termText ) ) {
    // log.info( mentionText );
    // hit = true;
    // // make a link between this mention and the term it matched with
    // mention.addProperty( resolver.getProperty(), termResource );
    // log.info( "Match with " + resolver.getName() + ": " + mentionText + " = " + termText );
    // }
    // }
    // }
    // if ( hit ) hitCount++;
    // }
    // log.info( "Number of matched mentions: " + hitCount );
    // }
    public boolean hasNNID(String id) {
        return !model.contains(model.createResource(Vocabulary.getNNURI() + id), RDF.type, Vocabulary.neuroname);
    }

    public Set<Resource> getPMIDs(Resource neuromention) {
        Set<Resource> result = new HashSet<Resource>();
        StmtIterator statements = neuromention.listProperties(Vocabulary.in_PMID);
        while (statements.hasNext()) {
            Statement s = statements.nextStatement();
            RDFNode n = s.getObject();
            result.add((Resource) n.as(Resource.class));
            // result.add( ( Resource ) statements.nextStatement().getLiteral().getString() );
        }
        return result;
    }

    public Set<Resource> getMentionsInPMID(Resource pmid) {
        Set<Resource> result = new HashSet<Resource>();
        ResIterator resIt = model.listResourcesWithProperty(Vocabulary.in_PMID, pmid);
        while (resIt.hasNext()) {
            Resource r = resIt.nextResource();
            result.add(r);
        }
        return result;
    }

    public String getNCBIPMIDLink(Resource neuroterm) {
        return getNCBIPMIDLink(neuroterm, Integer.MAX_VALUE);
    }

    public String getNCBIPMIDLink(Resource neuromention, int charLimit) {
        Set<Resource> pmids = getPMIDs(neuromention);
        return getNCBIPMIDLink(pmids, charLimit);
    }

    public String getNCBIPMIDLink(Set<Resource> pmids) {
        return getNCBIPMIDLink(pmids, Integer.MAX_VALUE);
    }

    public String getComaSepPMIDs(Set<Resource> pmids, int charLimit) {
        String result = "";
        for (Resource pmid : pmids) {
            String url = pmid.getURI();
            url = url.replace(Vocabulary.getpubmedURIPrefix(), "");
            if ((result.length() + url.length()) > (charLimit)) {
                break;
            }
            result += url + ",";
        }
        result = result.substring(0, result.length() - 1);
        return result;
    }

    public String getNCBIPMIDLink(Set<Resource> pmids, int charLimit) {
        String endfix = "&dispmax=100";// &dopt=citation -mesh terms
        String prefix = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=";
        return prefix + getComaSepPMIDs(pmids, charLimit - endfix.length() - prefix.length()) + endfix;
    }

    public void writeOut() throws Exception {
        writeOut(Config.config.getString("resolve.Lexicon.resolution.RDF"));
    }

    public void writeOut(String fileName) throws Exception {
        // model.write( new FileWriter( fileName ) );
        model.write(new FileOutputStream(fileName));
    }

    public void loadManualMatches() {
        LoadManualMappings mapper = new LoadManualMappings();
        mapper.addToModel(model);
        reason();
    }

    public void loadExactAutomaticEvaluations() throws Exception {
        LoadEvaluationSpreadSheets sheetExact = new LoadEvaluationSpreadSheets(model);
        sheetExact.loadAutomaticExact();
        log.info("Finished parsing spreadsheet");
        reason();
        log.info("Done reasoning");
    }

    public void loadAutomaticEvaluations() throws Exception {
        LoadEvaluationSpreadSheets sheet = new LoadEvaluationSpreadSheets(model);
        sheet.loadAutomatic();

        log.info("Finished parsing spreadsheet");
        reason();
        log.info("Done reasoning");
    }

    public void loadManualEvaluations() throws Exception {
        boolean createMentions = false;
        loadManualEvaluations(createMentions);
    }

    public void loadManualEvaluations(boolean createMentions) throws Exception {
        LoadEvaluationSpreadSheets sheet = new LoadEvaluationSpreadSheets(model);
        sheet.setCreateRDFMentions(createMentions);
        sheet.loadManual();
        log.info("Finished parsing spreadsheet");
        reason();
        log.info("Done reasoning");
    }

    public void countMatches() throws Exception {
        // load in sparql file
        File f = new File(Config.config.getString("whitetext.sparql.directory") + "CountMappedTerms.txt");

        byte[] data = new byte[(int) f.length()];
        new DataInputStream(new FileInputStream(f)).readFully(data);
        String queryString = new String(data);
        // log.info( queryString );

        Query q = QueryFactory.create(queryString);
        QueryExecution qexec = QueryExecutionFactory.create(q, model);

        ResultSet results = qexec.execSelect();
        log.info("Query executed");
        // put it into a set
        int totalMatched = 0;
        int uniqueMatched = 0;

        while (results.hasNext()) {
            QuerySolution qTemp = results.nextSolution();
            int count = qTemp.getLiteral("$count").getInt();
            String mentionLabel = qTemp.getLiteral("?mentionlabel").toString();
            // log.info( mentionLabel + " -> " + count );
            uniqueMatched++;
            totalMatched += count;
        }
        log.info("Unique matched mentions = " + uniqueMatched);
        log.info("Total matched mentions = " + totalMatched);
    }

    public void getTermToConceptStats() {
        Set<Resource> terms = getTerms();
        int onlyOne = 0;
        int total = 0;
        int max = 0;
        Resource maxR = null;
        for (Resource term : terms) {
            int size = getConceptsFromTerms(term).size();
            total += size;
            if (size == 1)
                onlyOne++;
            if (size > max) {
                max = size;
                maxR = term;
            }
        }
        log.info("total term to concept pairs:" + total);
        log.info("only one concept:" + onlyOne);
        log.info("max:" + max + " " + getConceptsFromTerms(maxR));
    }

    /**
     * Print number of abstracts that have at least one matched mention
     * 
     * @throws Exception
     */
    public void printCorpusCoverage() throws Exception {
        Set<Resource> mentions = getMatchedMentions();
        // Set<Resource> mentions = JenaUtil.getSubjects( model.listStatements( null, Vocabulary.evaluation_accept,
        // ( RDFNode ) null ) );

        Set<Resource> allPMIDS = new HashSet<Resource>();
        for (Resource mention : mentions) {
            allPMIDS.addAll(getPMIDs(mention));
        }

        log.info("Size of pmids that have at least one resolved mention:" + allPMIDS.size());
    }

    public void printLexiconCoverageStats() throws Exception {
        log.info("Unmatched concepts:" + getUnMatchedConceptsOld(getConcepts()).size());
        log.info(" Unmatched terms:" + getUnMatchedTerms(getTerms()).size());
        log.info("  NeuroNames:" + getUnMatchedConceptsOld(getNNConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getNNConcepts())).size());
        log.info("  BIRNLex:" + getUnMatchedConceptsOld(getNIFSTDConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getNIFSTDConcepts())).size());
        log.info("  Brede:" + getUnMatchedConceptsOld(getBredeConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getBredeConcepts())).size());
        log.info("  BAMS:" + getUnMatchedConceptsOld(getBAMSConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getBAMSConcepts())).size());
        log.info("  ABA:" + getUnMatchedConceptsOld(getABAConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getABAConcepts())).size());
        log.info("  ABCD:" + getUnMatchedConceptsOld(getABCDConcepts()).size());
        log.info("   linked terms:" + getUnMatchedTerms(getTermsFromConcepts(getABCDConcepts())).size());

        log.info("Of the evaluated sets:");
        log.info("Unmatched concepts:" + getUnMatchedConcepts(getConcepts()).size());
        log.info("  NeuroNames:" + getUnMatchedConcepts(getNNConcepts()).size());
        log.info("  BIRNLex:" + getUnMatchedConcepts(getNIFSTDConcepts()).size());
        log.info("  Brede:" + getUnMatchedConcepts(getBredeConcepts()).size());
        log.info("  BAMS:" + getUnMatchedConcepts(getBAMSConcepts()).size());
        log.info("  ABA:" + getUnMatchedConcepts(getABAConcepts()).size());
        log.info("  ABCD:" + getUnMatchedConcepts(getABCDConcepts()).size());

    }

    public long getStatementSize() throws Exception {
        return model.size();
    }

    public void getStats() throws Exception {
        log.info("Model size: " + model.size() + " statements");
        Set<Resource> terms = getTerms();
        log.info("Neuroterm nodes:" + terms.size());
        log.info("Neuroconcept nodes:" + getConcepts().size());
        log.info(" linked terms:" + getTermsFromConcepts(getConcepts()).size());
        log.info("  NeuroNames:" + getNNConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getNNConcepts()).size());
        log.info("  BIRNLex:" + getNIFSTDConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getNIFSTDConcepts()).size());
        log.info("  Brede:" + getBredeConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getBredeConcepts()).size());
        log.info("  BAMS:" + getBAMSConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getBAMSConcepts()).size());
        log.info("  ABA:" + getABAConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getABAConcepts()).size());
        log.info("  ABCD:" + getABCDConcepts().size());
        log.info("   linked terms:" + getTermsFromConcepts(getABCDConcepts()).size());

        Set<Resource> mentions = getMentions();
        log.info("Neuromention nodes:" + mentions.size());
        log.info("  Matched mentions (mention to term):" + getMatchedMentions().size());
        log.info("        abstracts:" + sumAbstractFrequencies(getMatchedMentions()));
        log.info("        occurrences:" + sumMentionFrequencies(getMatchedMentions()));

        log.info("  Unmatched mentions:" + getUnMatchedMentions().size());
        log.info("          abstracts:" + sumAbstractFrequencies(getUnMatchedMentions()));
        log.info("          occurrences:" + sumMentionFrequencies(getUnMatchedMentions()));

    }

    private void createVennDiagram() throws Exception {
        File vennFile = new File("/grp/java/workspace/PubMedIDtoGate/LexiconVenn.txt");
        // clear old data
        vennFile.delete();
        Set<Resource> NNTerms = getTermsFromConcepts(getNNConcepts());
        Set<Resource> BIRNTerms = getTermsFromConcepts(getNIFSTDConcepts());
        Set<Resource> BredeTerms = getTermsFromConcepts(getBredeConcepts());
        Set<Resource> BAMSTerms = getTermsFromConcepts(getBAMSConcepts());
        Set<Resource> ABATerms = getTermsFromConcepts(getABAConcepts());
        // write to Venn Diagram
        Util.addToVennMasterFile(vennFile, NNTerms, "NeuroNames");
        Util.addToVennMasterFile(vennFile, BredeTerms, "Brede");
        Util.addToVennMasterFile(vennFile, BIRNTerms, "BIRNLex");
        Util.addToVennMasterFile(vennFile, BAMSTerms, "BAMS");
        Util.addToVennMasterFile(vennFile, ABATerms, "ABA");

        Collection NNminusBIRN = Util.subtract(NNTerms, BIRNTerms);
        Util.addToVennMasterFile(vennFile, NNminusBIRN, "Neuronames minus BIRNLex");

        Collection NNminusBIRNminusBAMS = Util.subtract(NNminusBIRN, BAMSTerms);
        Util.addToVennMasterFile(vennFile, NNminusBIRNminusBAMS, "Neuronames minus BIRNLex minus BAMS");

        Collection temp;
        temp = Util.subtract(BAMSTerms, NNTerms);
        Util.addToVennMasterFile(vennFile, temp, "BAMS minus neuronames");

        temp = Util.subtract(ABATerms, NNTerms);
        Util.addToVennMasterFile(vennFile, temp, "Allen minus neuronames");

        Collection BAMSminusAll = Util.subtract(BAMSTerms, NNTerms, BIRNTerms, BredeTerms, ABATerms);
        Util.addToVennMasterFile(vennFile, BAMSminusAll, "BAMS minus all");

        Collection intsersectALL = Util.intersect(NNTerms, BIRNTerms, BredeTerms, BAMSTerms, ABATerms);
        Util.addToVennMasterFile(vennFile, intsersectALL, "Intersect all");

    }

    public void getConnectedStats() {
        Set<Resource> mentions = getConnectionPartnerMentions();
        log.info("  Connection partner mentions:" + mentions.size());
        log.info("        occurances:" + sumMentionFrequencies(mentions));
        mentions.removeAll(getUnMatchedMentions());
        log.info("    Matched mentions:" + mentions.size());
        log.info("          occurances:" + sumMentionFrequencies(mentions));
    }

    public void runOnUnMatched(RDFResolver resolver) {
        int old = getUnMatchedMentions().size();
        Set<Resource> mentions = getMentions();
        Model resolvedStatements = resolver.resolve(getUnMatchedMentions());
        model = model.union(resolvedStatements);
        reason();
        log.info("Newly Matched mentions:" + (old - getUnMatchedMentions().size()));
    }

    public int sumAbstractFrequencies(Set<Resource> mentions) {
        int result = 0;
        for (Resource mention : mentions) {
            Statement s = mention.getProperty(Vocabulary.number_of_abstracts);
            if (s != null)
                result += s.getInt();
        }
        return result;
    }

    public int sumMentionFrequencies(Set<Resource> mentions) {
        int result = 0;

        for (Resource mention : mentions) {
            Statement s = mention.getProperty(Vocabulary.number_of_occurances);
            if (s != null)
                result += s.getInt();
            // int size = mention.listProperties(Vocabulary.number_of_occurances).toList().size();
            // if (size ==2 ) {
            // log.info( "Error Greater than two " + mention.getURI() );
            // }

        }
        return result;
    }

    // public void createMatches2() throws Exception {
    // Set<Resource> mentions = getMentions();
    // Set<Resource> terms = getTerms();
    //
    // List<RDFResolver> resolvers = new LinkedList<RDFResolver>();
    // resolvers.add( new SimpleExactRDFMatcher( terms ) );
    // resolvers.add( new BagOfWordsRDFMatcher( terms ) );
    // resolvers.add( new StemRDFMatcher( terms ) );
    // resolvers.add( new DirectionSplittingRDFMatcher( terms ) );
    // resolvers.add( new AdjectiveStripRDFResolver( terms ) );
    //
    // for ( RDFResolver resolver : resolvers ) {
    // Model resolvedStatements = resolver.resolve( mentions );
    // log.info( resolver.getName() + " added " + resolvedStatements.size() + " resolution statements" );
    // model = model.union( resolvedStatements );
    // }
    // reason();
    // }

    public void addMentionEditorToResolvers(Collection<RDFResolver> resolvers, MentionEditor me) {
        log.info("Adding mention editor: " + me.getName());
        for (RDFResolver resolver : resolvers) {
            resolver.addMentionEditor(me);
        }
    }

    public void runResolver(RDFResolver resolver, Set<Resource> mentions) {
        LinkedList<RDFResolver> resolvers = new LinkedList<RDFResolver>();
        resolvers.add(resolver);
        runResolvers(resolvers, mentions);
    }

    public void runResolvers(Collection<RDFResolver> resolvers, Set<Resource> mentions) {
        for (RDFResolver resolver : resolvers) {
            Model resolvedStatements = resolver.resolve(mentions);
            log.info(resolver.getName() + " added " + resolvedStatements.size() + " resolution statements");
            model = model.union(resolvedStatements);
        }
        reason();
        log.info("Resolution Matched mentions:" + getMatchedMentions().size());
        log.info("                 occurances:" + sumMentionFrequencies(getMatchedMentions()));
    }

    public void connectionPartnerMatches() throws Exception {
        Set<Resource> terms = getTerms();

        List<RDFResolver> resolvers = new LinkedList<RDFResolver>();
        resolvers.add(new SimpleExactRDFMatcher(terms));
        resolvers.add(new BagOfWordsRDFMatcher(terms));
        resolvers.add(new StemRDFMatcher(terms));
        resolvers.add(new BagOfStemsRDFMatcher(terms));
        // resolvers.add( new ThreeLetterRDFMatcher( terms ) );

        // run resolvers
        // runResolvers( resolvers, getConnectionPartnerMentions() );
        log.info("Done normal");
        log.info("First stage counts");
        getStats();

        // split direction conjunctions
        addMentionEditorToResolvers(resolvers, new DirectionSplittingMentionEditor());
        addMentionEditorToResolvers(resolvers, new HemisphereStripMentionEditor());
        addMentionEditorToResolvers(resolvers, new CytoPrefixMentionEditor());
        addMentionEditorToResolvers(resolvers, new BracketRemoverMentionEditor());
        addMentionEditorToResolvers(resolvers, new NDotExpanderMentionEditor());
        addMentionEditorToResolvers(resolvers, new OfTheRemoverMentionEditor());

        runResolvers(resolvers, getConnectionPartnerMentions());
        getStats();
        getConnectedStats();

    }

    public void createMatches3() throws Exception {

        Set<Resource> terms = getTerms();

        List<RDFResolver> resolvers = getAllResolvers();

        // run resolvers
        runResolvers(resolvers, getMentions());
        log.info("Done normal");
        log.info("First stage counts");
        getStats();

        // split direction conjunctions
        addMentionEditorToResolvers(resolvers, new DirectionSplittingMentionEditor());
        log.info("1");
        runResolvers(resolvers, getUnMatchedMentions());

        // strip uninformitive adjectives (left, right..)
        addMentionEditorToResolvers(resolvers, new HemisphereStripMentionEditor());
        log.info("2");
        runResolvers(resolvers, getUnMatchedMentions());

        // three was here - annoying/cyto prefix

        addMentionEditorToResolvers(resolvers, new BracketRemoverMentionEditor());
        log.info("4");
        runResolvers(resolvers, getUnMatchedMentions());

        addMentionEditorToResolvers(resolvers, new NDotExpanderMentionEditor());
        log.info("5");
        runResolvers(resolvers, getUnMatchedMentions());

        addMentionEditorToResolvers(resolvers, new OfTheRemoverMentionEditor());
        log.info("6");
        runResolvers(resolvers, getUnMatchedMentions());

        // removed based on evaluation
        // addMentionEditorToResolvers( resolvers, new BroadmannPrefixAdderMentionEditor() );
        // log.info( "7" );
        // runResolvers( resolvers, getUnMatchedMentions() );

        // / NEW!!
        addMentionEditorToResolvers(resolvers, new RegionSuffixRemover());
        log.info("NEW");
        runResolvers(resolvers, getUnMatchedMentions());

        printMentions(getMatchedMentions(), 20);

        log.info("Losing information stages below");
        // simpleMatcher.setProperty( Vocabulary.simple_mapping_match_after_loss );

        addMentionEditorToResolvers(resolvers, new CytoPrefixMentionEditor());
        log.info("3");
        runResolvers(resolvers, getUnMatchedMentions());

        addMentionEditorToResolvers(resolvers, new DirectionRemoverMentionEditor());
        log.info("7");
        runResolvers(resolvers, getUnMatchedMentions());

        addMentionEditorToResolvers(resolvers, new NucleusOfTheRemoverMentionEditor());
        log.info("8");
        runResolvers(resolvers, getUnMatchedMentions());

        addMentionEditorToResolvers(resolvers, new DirectionRemoverMentionEditor());
        runResolvers(resolvers, getUnMatchedMentions());
        log.info("End stage");
        getStats();

    }

    protected void printMentions(Set<Resource> mentions, int number) {
        List<Resource> mentionList = new LinkedList<Resource>(mentions);
        Collections.shuffle(mentionList);
        for (int i = 0; i < number; i++) {
            System.out.println(toStringNew(mentionList.get(i)));
        }
    }

    public void testMentionEditors(String testString) throws Exception {
        RDFResolverImpl resolver = new SimpleExactRDFMatcher(getTerms());
        resolver.addMentionEditor(new DirectionSplittingMentionEditor());
        resolver.addMentionEditor(new HemisphereStripMentionEditor());
        resolver.addMentionEditor(new BracketRemoverMentionEditor());
        resolver.addMentionEditor(new OfTheRemoverMentionEditor());
        // resolver.addMentionEditor( new BroadmannPrefixAdderMentionEditor() );
        resolver.addMentionEditor(new CytoPrefixMentionEditor());
        resolver.addMentionEditor(new RegionSuffixRemover());
        resolver.addMentionEditor(new DirectionRemoverMentionEditor());
        resolver.addMentionEditor(new NucleusOfTheRemoverMentionEditor());
        resolver.addMentionEditor(new DirectionRemoverMentionEditor());

        log.info("Original:" + testString);
        log.info(resolver.processMentionString(testString));
    }

    public Set<Resource> getConnectingPredicates(Resource mention, Resource neuroTerm) {
        Set<Resource> predicates = new HashSet<Resource>();
        StmtIterator predIterator = model.listStatements(mention, null, neuroTerm);
        while (predIterator.hasNext()) {
            Statement s = predIterator.nextStatement();
            predicates.add(s.getPredicate());
        }
        return predicates;
    }

    public Set<String> getConnectingPredicatesShortNames(Resource mention, Resource neuroTerm) {
        Set<String> predicates = new HashSet<String>();
        for (Resource pred : getConnectingPredicates(mention, neuroTerm)) {
            predicates.add(pred.getLocalName());
        }
        predicates.remove("match");
        return predicates;
    }

    public Map<String, String> getStatsToMap() throws Exception {
        Map<String, String> result = new HashMap<String, String>();
        result.put("concepts", "" + getConcepts().size());
        result.put("terms", "" + getTerms().size());

        Set<Resource> mentions = getMentions();
        result.put("mentions", "" + mentions.size());
        result.put("abstracts freq", "" + sumAbstractFrequencies(mentions));
        result.put("mentions freq", "" + sumMentionFrequencies(mentions));

        result.put("matched mentions", "" + getMatchedMentions().size());
        result.put("matched abstracts freq", "" + sumAbstractFrequencies(getMatchedMentions()));
        result.put("matched mentions freq", "" + sumMentionFrequencies(getMatchedMentions()));
        result.put("unmatched mentions", "" + getUnMatchedMentions().size());
        result.put("unmatched abstracts freq", "" + sumAbstractFrequencies(getUnMatchedMentions()));
        result.put("unmatched mentions freq", "" + sumMentionFrequencies(getUnMatchedMentions()));
        return result;
    }

    public static void tryAllFour() throws Exception {
        ResolutionRDFModel resolutionModel = new ResolutionRDFModel();
        resolutionModel = new ResolutionRDFModel();
        resolutionModel.runResolver(new SimpleExactRDFMatcher(resolutionModel.getTerms()),
                resolutionModel.getMentions());

        resolutionModel = new ResolutionRDFModel();
        resolutionModel.runResolver(new StemRDFMatcher(resolutionModel.getTerms()), resolutionModel.getMentions());

        resolutionModel = new ResolutionRDFModel();
        resolutionModel.runResolver(new BagOfWordsRDFMatcher(resolutionModel.getTerms()),
                resolutionModel.getMentions());

        resolutionModel = new ResolutionRDFModel();
        resolutionModel.runResolver(new BagOfStemsRDFMatcher(resolutionModel.getTerms()),
                resolutionModel.getMentions());

        resolutionModel = new ResolutionRDFModel();
        resolutionModel.runResolver(new ThreeLetterRDFMatcher(resolutionModel.getTerms()),
                resolutionModel.getMentions());

    }

    /**
     * Get all mentions for a specific species - uses RDF not GATE.
     * 
     * @param species for example 9986
     * @return
     */
    public Set<Resource> getMentionsForSpecies(String species) {
        Set<Resource> mentions = new HashSet<Resource>();
        Resource taxon = model.createResource("http://bio2rdf.org/taxon:" + species);
        ResIterator resIt = model.listResourcesWithProperty(Vocabulary.mentions_species, taxon);
        for (Resource r = resIt.nextResource(); resIt.hasNext(); r = resIt.nextResource()) {
            // log.info( r.getURI() );
            mentions.addAll(getMentionsInPMID(r));
        }
        return mentions;
    }

    public List<RDFResolver> getAllResolvers() {
        Set<Resource> terms = getTerms();
        List<RDFResolver> resolvers = new LinkedList<RDFResolver>();
        resolvers.add(new SimpleExactRDFMatcher(terms));
        resolvers.add(new BagOfWordsRDFMatcher(terms));
        resolvers.add(new StemRDFMatcher(terms));
        resolvers.add(new BagOfStemsRDFMatcher(terms));
        // resolvers.add( new ThreeLetterRDFMatcher( terms ) );
        resolvers.add(new SimpleMappingRDFMatcher(terms));
        return resolvers;
    }

    public Set<Resource> resolveToConcepts(String mention, RDFResolver resolver, Set<Resource> allTerms,
            Set<Resource> allConcepts) {
        Set<Resource> terms = resolveToTerms(mention, resolver, allTerms);

        Set<Resource> concepts = getConceptsFromTerms(terms, allConcepts);
        return concepts;
    }

    public Set<Resource> resolveToTerms(String mention, RDFResolver resolver, Set<Resource> allTerms) {
        Resource r = makeMentionNode(mention);

        // r is now linked to the terms
        Model resolvedStatements = resolver.resolve(r);

        model = model.add(resolvedStatements);

        Set<Resource> terms = getLinkedResources(r);

        terms.retainAll(allTerms);
        return terms;
    }

    public Set<String> resolve(String mention, RDFResolver resolver, Set<Resource> allTerms,
            Set<Resource> allConcepts) {
        Set<String> result = new HashSet<String>();
        Set<Resource> concepts = resolveToConcepts(mention, resolver, allTerms, allConcepts);
        for (Resource concept : concepts) {
            result.add(JenaUtil.getLabel(concept));
        }

        return result;
    }

    public Set<Property> getAllProperties() {
        Set<Property> properties = new HashSet<Property>();
        for (RDFResolver r : getAllResolvers()) {
            Property p = r.getProperty();
            properties.add(p);
        }
        return properties;
    }

    public static void main(String[] args) throws Exception {
        String filename;
        filename = Config.config.getString("resolve.Lexicon.RDF");
        // filename = "/home/lfrench/WhiteText/rdf/LexiconRDF.allComp (only Mallet predictions on unseen corp).rdf";
        ResolutionRDFModel resolutionModel = new ResolutionRDFModel(filename);

        resolutionModel.createVennDiagram();
        resolutionModel.getMentionsForSpecies("9606");
        System.exit(1);
        // resolutionModel.getStats();

        // System.exit( 1 );
        // resolutionModel.testMentionEditors( "inferior and adjacent portion of the lateral pulvinar subdivisions" );
        // resolutionModel.testMentionEditors( "dorsal and lateral hippocampus" );
        // resolutionModel.testMentionEditors( "rostral part of the medial accessory olive" );
        // resolutionModel.testMentionEditors( "ventral lateral nucleus (vl) of the thalamus" );
        // resolutionModel.testMentionEditors( "areas th" );

        // log.info( resolutionModel.getNNNodeByLabel( "transverse occipital sulcus (H)" ) );
        // System.out.println( resolutionModel.toString( resolutionModel.getNNNodeByLabel( "frontal region" ) ) );
        // System.exit( 1 );

        // tryAllFour();
        // System.exit( 1 );

        resolutionModel.loadManualMatches();

        // loading in evaluations!
        // resolutionModel.loadManualEvaluations();
        // resolutionModel.loadAutomaticEvaluations();

        resolutionModel.getStats();

        resolutionModel.createMatches3();
        resolutionModel.getStats();
        // resolutionModel.writeOut( Config.config.getString( "resolve.Lexicon.resolution.RDF" ) );
        resolutionModel.writeOut(
                "/home/lfrench/WhiteText/rdf/ResolutionRDF.allComp (only Mallet predictions on unseen corp).allResolvers.rdf");
        // resolutionModel.printMentions( resolutionModel.getUnMatchedMentions(), 20 );

        // print top and bottom 20
        // Set<Resource> x = getMatchedMentions();
        // x.removeAll( newMatches2 );
        // log.info( "newsize: " + x.size() );
        // printMentions( x, 20 );
        //

        // resolutionModel.connectionPartnerMatches();

        log.info("Done creatematches 3 run");
        // MappingSpreadSheet sheet = new MappingSpreadSheet( "testForStatsEvalRemovedExactKept.xls" );
        // sheet.populate( resolutionModel );
        // sheet.save();

        System.exit(1);

        log.info("Tagl could start");
        Set<Resource> mentions = new HashSet<Resource>(resolutionModel.getUnMatchedMentions());
        boolean phrase = true;
        NamedEntityStats.makeTagCloud(mentions, phrase);
        log.info("phrase false");
        phrase = false;
        NamedEntityStats.makeTagCloud(mentions, phrase);

        // // tokenize and print first word
        // for ( Resource mention : mentions ) {
        // String mentionString = JenaUtil.getLabel( mention );
        // }
        // log.info( NamedEntityStats.formatPhraseForCloud( mentionString ) );
        // StringTokenizer tokens = new StringTokenizer( mentionString, BagOfWordsRDFMatcher.delims, false );
        // String lastToken = null;
        // while ( tokens.hasMoreTokens() ) {
        // lastToken = tokens.nextToken();
        // }
        // // log.info( tokens.nextToken() );
        // // log.info( lastToken );
        //
        // }
        // resolutionModel.getStats();
        // log.info( "Added:" + mentions.size() );
        // // NNmodel.writeOut();
    }
}

class GroupSelector implements Selector {
    Set<Resource> objects;
    Set<Resource> subjects;

    public GroupSelector(Set<Resource> subjects, Set<Resource> objects) {
        this.objects = objects;
        this.subjects = subjects;
    }

    boolean isSimple(Statement s) {
        return false;
    }

    public boolean test(Statement s) {
        return subjects.contains(s.getSubject()) && objects.contains(s.getObject());
    }

    public RDFNode getObject() {
        return null;
    }

    public Property getPredicate() {
        return null;
    }

    public Resource getSubject() {
        return null;
    }

    public boolean isSimple() {
        return false;
    }
};