Java tutorial
// $Id: PsiInteractorExtractor.java,v 1.14 2009-07-01 14:21:57 cerami Exp $ //------------------------------------------------------------------------------ /** Copyright (c) 2006 Memorial Sloan-Kettering Cancer Center. ** ** Code written by: Ethan Cerami ** Authors: Ethan Cerami, Gary Bader, Chris Sander ** ** This library is free software; you can redistribute it and/or modify it ** under the terms of the GNU Lesser General Public License as published ** by the Free Software Foundation; either version 2.1 of the License, or ** any later version. ** ** This library is distributed in the hope that it will be useful, but ** WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF ** MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. The software and ** documentation provided hereunder is on an "as is" basis, and ** Memorial Sloan-Kettering Cancer Center ** has no obligations to provide maintenance, support, ** updates, enhancements or modifications. In no event shall ** Memorial Sloan-Kettering Cancer Center ** be liable to any party for direct, indirect, special, ** incidental or consequential damages, including lost profits, arising ** out of the use of this software and its documentation, even if ** Memorial Sloan-Kettering Cancer Center ** has been advised of the possibility of such damage. See ** the GNU Lesser General Public License for more details. ** ** You should have received a copy of the GNU Lesser General Public License ** along with this library; if not, write to the Free Software Foundation, ** Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. **/ package org.mskcc.pathdb.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.QueryScorer; import org.exolab.castor.xml.MarshalException; import org.exolab.castor.xml.ValidationException; import org.mskcc.dataservices.schemas.psi.Entry; import org.mskcc.dataservices.schemas.psi.EntrySet; import org.mskcc.dataservices.schemas.psi.InteractorList; import org.mskcc.dataservices.schemas.psi.ProteinInteractorType; import org.mskcc.pathdb.model.ProteinWithWeight; import org.mskcc.pathdb.util.xml.XmlStripper; import org.mskcc.pathdb.xdebug.XDebug; import java.io.IOException; import java.io.StringWriter; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Set; /** * Extracts Matching Interactors from the PSI-MI Entry Set. * * @author Ethan Cerami */ public class PsiInteractorExtractor { private Set interactors; private Query query; private StandardAnalyzer analyzer; private IndexReader reader; private Highlighter highLighter; private EntrySet entrySet; private XDebug xdebug; private static final String START_SPAN_TAG = "<SPAN STYLE='background-color: "; private static final String END_TAG = "'>"; private static final String COLOR = "yellow"; private static final String END_SPAN_TAG = "</SPAN>"; /** * Constructor * * @param entrySet PSI-MI Entry Set Object. * @param queryStr Query String. * @param xdebug XDebug Object. * @throws IOException Input Output Exception. * @throws ParseException Parsing Exception. * @throws ValidationException Validation Exception. * @throws MarshalException Marshaling Exception. */ public PsiInteractorExtractor(EntrySet entrySet, String queryStr, XDebug xdebug) throws IOException, ParseException, ValidationException, MarshalException { try { this.xdebug = xdebug; this.entrySet = entrySet; interactors = new HashSet(); analyzer = new StandardAnalyzer(); reader = IndexReader.open(LuceneConfig.getLuceneDirectory()); if (queryStr != null) { QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer); query = queryParser.parse(queryStr); query = query.rewrite(reader); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); highLighter = new Highlighter(htmlFormatter, new QueryScorer(query)); checkAllEntries(); } } finally { if (reader != null) { reader.close(); } } } /** * Checks all Entries for Matching Interactors. */ private void checkAllEntries() throws MarshalException, ValidationException, IOException { xdebug.logMsg(this, "Checking all interactors for matches: " + query.toString()); for (int i = 0; i < entrySet.getEntryCount(); i++) { Entry entry = entrySet.getEntry(i); InteractorList interactorList = entry.getInteractorList(); for (int j = 0; j < interactorList.getProteinInteractorCount(); j++) { checkInteractor(interactorList.getProteinInteractor(j)); } } } /** * Checks Specified Interactor. */ private void checkInteractor(ProteinInteractorType protein) throws IOException, ValidationException, MarshalException { // Extract Interactor Field Text (with XML Tags Stripped Out) StringWriter writer = new StringWriter(); protein.marshal(writer); String interactorText = XmlStripper.stripTags(writer.toString(), false, false); xdebug.logMsg(this, "Checking interactor: " + interactorText); if (interactorText != null) { String highLight = null; // If not null, we have a match if (highLight != null) { xdebug.logMsg(this, "... Match Found: " + highLight); ProteinWithWeight proteinWithWeight = new ProteinWithWeight(protein, 0); interactors.add(proteinWithWeight); } else { xdebug.logMsg(this, "... No Match Found"); } } } /** * Gets a Sorted List of Interactors. * * @return ArrayList of ProteinWithWeight Objects. */ public ArrayList getSortedInteractors() { ArrayList list = new ArrayList(interactors); Collections.sort(list, new ProteinWithWeight()); return list; } }