Java tutorial
/** * created by Michael Gerlich, Jan 31, 2013 - 3:48:39 PM * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package de.ipbhalle.metfusion.main; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.rmi.RemoteException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.exception.InvalidSmilesException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.io.MDLV2000Writer; import org.openscience.cdk.isomorphism.UniversalIsomorphismTester; import org.openscience.cdk.smiles.SmilesParser; import org.openscience.cdk.smiles.smarts.SMARTSQueryTool; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import com.chemspider.www.CommonSearchOptions; import com.chemspider.www.EComplexity; import com.chemspider.www.EIsotopic; import com.chemspider.www.ERequestStatus; import com.chemspider.www.ExtendedCompoundInfo; import com.chemspider.www.MassSpecAPISoapProxy; import com.chemspider.www.OpenBabelLocator; import com.chemspider.www.OpenBabelSoap; import com.chemspider.www.SearchSoapProxy; import com.chemspider.www.SubstructureSearchOptions; import de.ipbhalle.metfusion.utilities.MassBank.MassBankUtilities; import de.ipbhalle.metfusion.utilities.output.SDFOutputHandler; import de.ipbhalle.metfusion.wrapper.Result; import de.ipbhalle.metfusion.wrapper.ResultSubstructure; import de.ipbhalle.metfusion.wrapper.SDFDatabase; public class SubstructureSearch implements Runnable { private List<String> includes; private List<String> excludes; private String token; private String molecularFormula; private MetFusionBatchFileHandler batchFileHandler; private boolean formulaFirst; private boolean sdfFirst; private String sdfFile; public final boolean substrucPresent = Boolean.TRUE; public final boolean substrucAbsent = Boolean.FALSE; private ExtendedCompoundInfo[] chemspiderInfo; private List<ResultSubstructure> resultsOriginal; private List<ResultSubstructure> resultsRemaining; public SubstructureSearch(List<String> includes, List<String> excludes, String token, String formula, MetFusionBatchFileHandler batchFileHandler) { this.includes = includes; this.excludes = excludes; this.setToken(token); this.setMolecularFormula(formula); this.batchFileHandler = batchFileHandler; this.formulaFirst = false; } public SubstructureSearch(List<String> includes, List<String> excludes, String token, String formula, MetFusionBatchFileHandler batchFileHandler, boolean queryDatabaseViaFormula, boolean sdfFirst, String sdfFile) { this(includes, excludes, token, formula, batchFileHandler); this.formulaFirst = formula.isEmpty() ? false : queryDatabaseViaFormula; this.sdfFirst = sdfFirst; this.sdfFile = sdfFile; } private void queryIncludes() { if (sdfFirst) { // read in SDF for both Original and remaining SDFDatabase sdf = new SDFDatabase("substrucSDF", sdfFile); sdf.run(); List<Result> results = sdf.getResults(); resultsOriginal = new ArrayList<ResultSubstructure>(); // all results from SDF are used for (Result result : results) { resultsOriginal.add(new ResultSubstructure(result, true)); } resultsRemaining = resultsOriginal; for (int i = 0; i < includes.size(); i++) { resultsRemaining = filterCandidates(resultsRemaining, includes.get(i), substrucPresent); System.out.println("includes == " + includes.size()); System.out.println("[" + i + "] -> remaining = " + resultsRemaining.size()); } } else if (formulaFirst) { resultsOriginal = queryDatabaseWithFormula(molecularFormula); resultsRemaining = skipNonUsed(resultsOriginal); System.out.println("includes == 1 \toriginal = " + resultsOriginal.size()); System.out.println("includes == 1 \tskipNonUsed = " + resultsRemaining.size()); System.out.println("includes == " + includes.size()); for (int i = 0; i < includes.size(); i++) { resultsRemaining = filterCandidates(resultsRemaining, includes.get(i), substrucPresent); System.out.println("[" + i + "] -> remaining = " + resultsRemaining.size()); } } else { if (includes.size() == 1) { // only one substructure filter resultsOriginal = queryDatabase(includes.get(0)); //resultsOriginal = queryDatabaseWithFormula(molecularFormula); resultsRemaining = skipNonUsed(resultsOriginal); System.out.println("includes == 1 \toriginal = " + resultsOriginal.size()); System.out.println("includes == 1 \tskipNonUsed = " + resultsRemaining.size()); } else if (includes.size() > 1) { for (int i = 0; i < includes.size(); i++) { if (i == 0) { resultsOriginal = queryDatabase(includes.get(0)); //resultsOriginal = queryDatabaseWithFormula(molecularFormula); resultsRemaining = skipNonUsed(resultsOriginal); System.out.println("\toriginal = " + resultsOriginal.size()); System.out.println("\tskipNonUsed = " + resultsRemaining.size()); } else { resultsRemaining = filterCandidates(resultsRemaining, includes.get(i), substrucPresent); } System.out.println("includes == " + includes.size()); System.out.println("[" + i + "] -> remaining = " + resultsRemaining.size()); } } else { System.err.println("Empty substructure!"); return; } } } private List<ResultSubstructure> skipNonUsed(List<ResultSubstructure> current) { List<ResultSubstructure> remaining = new ArrayList<ResultSubstructure>(); for (ResultSubstructure rs : current) { if (rs.isUsed()) remaining.add(rs); } return remaining; } private List<ResultSubstructure> queryDatabaseWithFormula(String formula) { List<ResultSubstructure> candidates = new ArrayList<ResultSubstructure>(); MassSpecAPISoapProxy msp = new MassSpecAPISoapProxy(); String[] ids = null; int[] CSIDs = null; try { ids = msp.searchByFormula2(formula); CSIDs = new int[ids.length]; for (int i = 0; i < CSIDs.length; i++) { CSIDs[i] = Integer.parseInt(ids[i]); } } catch (RemoteException e) { System.err.println("Error querying with formula [" + formula + "]!"); return candidates; } try { chemspiderInfo = msp.getExtendedCompoundInfoArray(CSIDs, token); } catch (RemoteException e) { System.err.println("Error retrieving compound info array!"); return candidates; } boolean writeSDF = true; String filename = batchFileHandler.getBatchFile().getName(); int idx = filename.lastIndexOf("."); String ending = ".sdf"; filename = filename.substring(0, idx) + "_original" + ending; File originalSDF = new File(batchFileHandler.getBatchFile().getParent(), filename); MDLV2000Writer writer = null; try { writer = new MDLV2000Writer(new FileOutputStream(originalSDF)); } catch (FileNotFoundException e1) { System.err.println("File [" + originalSDF.getAbsolutePath() + "] not found for original SDF writer!"); writeSDF = false; } SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); System.out.println("# matches -> " + chemspiderInfo.length); for (int i = 0; i < chemspiderInfo.length; i++) { System.out.println(chemspiderInfo[i].getCSID() + "\t" + chemspiderInfo[i].getSMILES()); IAtomContainer ac = null; boolean used = false; try { ac = sp.parseSmiles(chemspiderInfo[i].getSMILES()); used = true; } catch (InvalidSmilesException ise) { ac = null; System.err.println("skipping " + chemspiderInfo[i].getCSID()); } candidates.add(new ResultSubstructure(chemspiderInfo[i], ac, used)); if (used && writeSDF) { try { Map<Object, Object> props = ac.getProperties(); props.put("CSID", chemspiderInfo[i].getCSID()); props.put("SMILES", chemspiderInfo[i].getSMILES()); props.put("name", chemspiderInfo[i].getCommonName()); props.put("ALogP", chemspiderInfo[i].getALogP()); props.put("XLogP", chemspiderInfo[i].getXLogP()); props.put("InChI", chemspiderInfo[i].getInChI()); props.put("InChIKey", chemspiderInfo[i].getInChIKey()); props.put("MF", chemspiderInfo[i].getMF()); ac.setProperties(props); writer.write(ac); } catch (CDKException e) { System.err.println("Error writing " + chemspiderInfo[i].getCSID() + " to file [" + originalSDF.getAbsolutePath() + "]!"); } } } try { writer.close(); } catch (IOException e) { System.err.println("Error finalizing original SDF output file!"); } return candidates; } private List<ResultSubstructure> queryDatabase(String substrucPresent) { List<ResultSubstructure> candidates = new ArrayList<ResultSubstructure>(); // convert input SMILES to MOL format for ChemSpider service SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); // sp.setPreservingAromaticity(false); // String mol = ""; // String s = ""; // try { // IMolecule temp = sp.parseSmiles(substrucPresent); // System.out.println("aromatic Hueckel? -> " + CDKHueckelAromaticityDetector.detectAromaticity(temp)); // System.out.println("aromatic double bond? -> " + DoubleBondAcceptingAromaticityDetector.detectAromaticity(temp)); // // create coordinates // StructureDiagramGenerator sdg = new StructureDiagramGenerator(); // sdg.setMolecule(temp); // sdg.generateCoordinates(); // IMolecule layedOutMol = sdg.getMolecule(); // // // // byte[] b = null; // ByteArrayOutputStream bos = new ByteArrayOutputStream(); // MDLV2000Writer writer = new MDLV2000Writer(bos); // IOSetting[] ios = writer.getIOSettings(); // for (int i = 0; i < ios.length; i++) { // System.out.println(ios[i].getName() + "\t" + ios[i].getSetting()); // } // Properties customSettings = new Properties(); // customSettings.setProperty("ForceWriteAs2DCoordinates", "true"); // customSettings.setProperty("WriteAromaticBondTypes", "true"); // PropertiesListener listener = new PropertiesListener(customSettings); // writer.addChemObjectIOListener(listener); // // writer.write(layedOutMol); // writer.close(); // b = bos.toByteArray(); // mol = new String(b, "UTF-8"); // System.out.println(mol); // MassBankUtilities mbu = new MassBankUtilities(); // IAtomContainer test2 = mbu.getContainer(mol); // //IAtomContainer test2 = mbu.getContainerUnmodified("c1cccc2nnnc12", "/home/mgerlich/projects/metfusion_tp/BTs/"); // System.out.println("aromatic Hueckel? -> " + CDKHueckelAromaticityDetector.detectAromaticity(test2)); // System.out.println("aromatic? -> " + DoubleBondAcceptingAromaticityDetector.detectAromaticity(test2)); // SmilesGenerator sg = new SmilesGenerator(true); // s = sg.createSMILES(layedOutMol); // System.out.println("old smiles -> " + substrucPresent); // System.out.println("smiles -> " + s); // } catch (InvalidSmilesException e2) { // // TODO Auto-generated catch block // e2.printStackTrace(); // } catch (CDKException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } catch (IOException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } OpenBabelLocator obl = new OpenBabelLocator(); String obmol = ""; // try { // OpenBabelSoap obsoap = obl.getOpenBabelSoap(); // obmol = obsoap.convert(substrucPresent, "smi", "mol"); // System.out.println("obmol\n" + obmol); // } catch (ServiceException e2) { // // TODO Auto-generated catch block // e2.printStackTrace(); // } catch (RemoteException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } MassSpecAPISoapProxy chemSpiderProxy = new MassSpecAPISoapProxy(); SearchSoapProxy ssp = new SearchSoapProxy(); SubstructureSearchOptions sso = new SubstructureSearchOptions(substrucPresent, false); //sso.setMatchTautomers(false); //sso.setMolecule(substrucPresent); CommonSearchOptions cso = new CommonSearchOptions(EComplexity.Single, EIsotopic.NotLabeled, false, false); //cso.setComplexity(EComplexity.Single); //cso.setIsotopic(EIsotopic.NotLabeled); // NotLabeled when using Formula search // cso.setComplexity(EComplexity.Any); // cso.setIsotopic(EIsotopic.Any); //cso.setHasSpectra(false); //cso.setHasPatents(false); String transactionID = ""; ERequestStatus ers = null; try { transactionID = ssp.substructureSearch(sso, cso, token); System.out.println("transaction id -> " + transactionID); ers = ssp.getAsyncSearchStatus(transactionID, token); while (ers.equals(ERequestStatus.Processing)) { Thread.sleep(2000); ers = ssp.getAsyncSearchStatus(transactionID, token); } } catch (RemoteException e1) { e1.printStackTrace(); return candidates; } catch (InterruptedException e) { e.printStackTrace(); return candidates; } if (ers.equals(ERequestStatus.Failed)) { System.out.println("failed"); return candidates; } if (ers.equals(ERequestStatus.ResultReady)) { int[] CSIDs = null; System.out.println("woohoo"); try { CSIDs = ssp.getAsyncSearchResult(transactionID, token); } catch (RemoteException e) { System.err.println("Error retrieving information and parsing results."); String resultURL = "http://www.chemspider.com/Search.asmx/GetAsyncSearchResult?rid=%s&token=%s"; String format = String.format(resultURL, transactionID, token); try { URL u = new URL(format); URLConnection con = u.openConnection(); InputStream is = con.getInputStream(); String ids = IOUtils.toString(is); is.close(); Document doc = Jsoup.parse(ids); Elements elem = doc.getElementsByTag("int"); CSIDs = new int[elem.size()]; for (int i = 0; i < CSIDs.length; i++) { CSIDs[i] = Integer.parseInt(elem.get(i).text().trim()); } } catch (MalformedURLException e1) { System.err.println("Wrong URL for retrieving results!\n" + format); } catch (IOException e1) { System.err.println("Error parsing results!"); } } if (CSIDs == null || CSIDs.length == 0) return candidates; System.out.println("#CSIDs -> " + CSIDs.length); int arrLength = CSIDs.length; int splitLength = 1000; // if(CSIDs.length > splitLength) // CSIDs = Arrays.copyOf(CSIDs, splitLength); int[] temp = new int[1]; int numSplits = arrLength / splitLength; int remaining = arrLength % splitLength; if (numSplits == 0) { try { chemspiderInfo = chemSpiderProxy.getExtendedCompoundInfoArray(CSIDs, token); } catch (RemoteException e) { System.err.println("Error retrieving information and parsing results."); return candidates; } } else { int pos = 0; int current = 0; List<ExtendedCompoundInfo> eci = new ArrayList<ExtendedCompoundInfo>(); for (int i = 0; i < numSplits; i++) { System.out.println("split [" + i + "] from " + numSplits); temp = Arrays.copyOfRange(CSIDs, pos, pos + splitLength); ExtendedCompoundInfo[] part; try { part = chemSpiderProxy.getExtendedCompoundInfoArray(temp, token); } catch (RemoteException e1) { System.err .println("Error retrieving information and parsing results for split [" + i + "]."); pos = pos + splitLength; continue; } for (int j = 0; j < part.length; j++) { eci.add(part[j]); //chemspiderInfo[current] = part[j]; current++; } pos = pos + splitLength; try { Thread.sleep(5000); } catch (InterruptedException e) { System.err.println("Error while thread sleep!"); } } // add remaining stuff if (remaining > 0) { temp = Arrays.copyOfRange(CSIDs, pos, pos + remaining); ExtendedCompoundInfo[] part; try { part = chemSpiderProxy.getExtendedCompoundInfoArray(temp, token); } catch (RemoteException e) { System.err.println("Error retrieving information and parsing results."); return candidates; } for (int j = 0; j < part.length; j++) { eci.add(part[j]); //chemspiderInfo[current] = part[j]; current++; } } // copy list into array chemspiderInfo = new ExtendedCompoundInfo[eci.size()]; for (int i = 0; i < chemspiderInfo.length; i++) { chemspiderInfo[i] = eci.get(i); } } // chemspiderInfo = chemSpiderProxy.getExtendedCompoundInfoArray(CSIDs, token); // chemspiderInfo = new ExtendedCompoundInfo[CSIDs.length]; // for (int i = 0; i < chemspiderInfo.length; i++) { // chemspiderInfo[i] = chemSpiderProxy.getExtendedCompoundInfo(CSIDs[i], token); // } boolean writeSDF = true; String filename = batchFileHandler.getBatchFile().getName(); int idx = filename.lastIndexOf("."); String ending = ".sdf"; filename = filename.substring(0, idx) + "_original" + ending; File originalSDF = new File(batchFileHandler.getBatchFile().getParent(), filename); MDLV2000Writer writer = null; try { writer = new MDLV2000Writer(new FileOutputStream(originalSDF)); } catch (FileNotFoundException e1) { System.err .println("File [" + originalSDF.getAbsolutePath() + "] not found for original SDF writer!"); writeSDF = false; } if (writeSDF) { } System.out.println("# matches -> " + chemspiderInfo.length); for (int i = 0; i < chemspiderInfo.length; i++) { System.out.println(chemspiderInfo[i].getCSID() + "\t" + chemspiderInfo[i].getSMILES()); IAtomContainer ac = null; boolean used = false; try { // TODO check for kekule on new CDK SmilesParser to retain all candidates ac = sp.parseSmiles(chemspiderInfo[i].getSMILES()); used = true; } catch (InvalidSmilesException ise) { ac = null; used = false; System.err.println("skipping " + chemspiderInfo[i].getCSID()); } candidates.add(new ResultSubstructure(chemspiderInfo[i], ac, used)); if (used && writeSDF) { try { Map<Object, Object> props = ac.getProperties(); props.put("CSID", chemspiderInfo[i].getCSID()); props.put("SMILES", chemspiderInfo[i].getSMILES()); props.put("name", chemspiderInfo[i].getCommonName()); props.put("ALogP", chemspiderInfo[i].getALogP()); props.put("XLogP", chemspiderInfo[i].getXLogP()); props.put("InChI", chemspiderInfo[i].getInChI()); props.put("InChIKey", chemspiderInfo[i].getInChIKey()); props.put("MF", chemspiderInfo[i].getMF()); ac.setProperties(props); writer.write(ac); } catch (CDKException e) { System.err.println("Error writing " + chemspiderInfo[i].getCSID() + " to file [" + originalSDF.getAbsolutePath() + "]!"); } } } try { writer.close(); } catch (IOException e) { System.err.println("Error finalizing original SDF output file!"); } } return candidates; } private List<ResultSubstructure> filterCandidates(List<ResultSubstructure> candidates, String substructure, boolean include) { System.out.println("substructure filter -> " + substructure); System.out.println("include -> " + include); SMARTSQueryTool sqt = null; try { sqt = new SMARTSQueryTool(substructure); } catch (CDKException e) { System.err.println("Wrong smarts -> " + substructure); return candidates; } List<ResultSubstructure> remaining = new ArrayList<ResultSubstructure>(); // filter out container that contain strucAbsent boolean matches = false; for (ResultSubstructure rs : candidates) { if (!rs.isUsed()) continue; // SMARTS matching try { // TODO check SMILES with sp.kekulise(false); and match on sqt with toUpper(substructure) // thus removing aromaticity for nnn-cases matches = sqt.matches(rs.getMol()); //System.out.println("matches -> " + matches); if ((matches && include) || (!matches && !include)) { // keep container remaining.add(rs); System.out.println(rs.getId() + "\t" + rs.getSmiles()); } // else discard container } catch (CDKException e) { System.err.println("error while matching"); continue; } catch (NullPointerException e) { System.err.println("[" + rs.getId() + "] -> container is null?"); continue; } } System.out.println("#candidates -> " + candidates.size()); System.out.println("#remaining -> " + remaining.size()); return remaining; } private List<ResultSubstructure> filterCandidatesByMolecularFormula(List<ResultSubstructure> candidates) { if (molecularFormula.isEmpty()) // return unmodified candidate list if no molecular formula is present return candidates; IMolecularFormula filter = MolecularFormulaManipulator.getMolecularFormula(molecularFormula, DefaultChemObjectBuilder.getInstance()); List<ResultSubstructure> remaining = new ArrayList<ResultSubstructure>(); for (ResultSubstructure rs : candidates) { IAtomContainer ac = rs.getMol(); try { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(ac); // CDKHydrogenAdder hAdder = CDKHydrogenAdder.getInstance(ac.getBuilder()); // hAdder.addImplicitHydrogens(ac); // AtomContainerManipulator.convertImplicitToExplicitHydrogens(ac); } catch (CDKException e) { // TODO Auto-generated catch block e.printStackTrace(); } IMolecularFormula toCheck = MolecularFormulaManipulator.getMolecularFormula(ac); // if(MolecularFormulaManipulator.compare(filter, toCheck)) // remaining.add(rs); // else { // System.out.println("Filter formula [" + molecularFormula + "] does not match candidate formula [" + // MolecularFormulaManipulator.getHillString(toCheck) + "]."); // } String csFormula = rs.getInfo().getMF(); if (csFormula == null) csFormula = ""; csFormula = csFormula.replaceAll("[_{}]+", ""); //if(MolecularFormulaManipulator.getHillString(filter).equals(MolecularFormulaManipulator.getHillString(toCheck))) { //if(molecularFormula.trim().equals(rs.getInfo().getMF().trim())) { if (molecularFormula.trim().compareTo(csFormula) == 0) { remaining.add(rs); // System.out.println("[" + rs.getId() + "] -> filter formula [" + molecularFormula + "] does match candidate formula [" + // MolecularFormulaManipulator.getHillString(toCheck) + "]."); } else { // System.err.println(rs.getId() + " formula " + MolecularFormulaManipulator.getHillString(toCheck) + // " does not match " + MolecularFormulaManipulator.getHillString(filter)); System.err.println(molecularFormula + " does not match " + csFormula); } // alternative: mit elements und getAtomCount/getElementCount prfen ob alle Elemente in filter // <= Elemente in toCheck sind } return remaining; } @Override public void run() { // retrieve candidates and filter candidates -> substrucPresent queryIncludes(); // filter remaining candidates -> substrucAbsent List<ResultSubstructure> current = resultsRemaining; System.out.println("begin exclude filtering"); System.out.println("original -> " + resultsOriginal.size()); System.out.println("remaining -> " + resultsRemaining.size()); if (excludes != null && excludes.size() > 0) { for (String ex : excludes) { current = filterCandidates(current, ex, substrucAbsent); } System.out.println("finally remaining filtered -> " + current.size()); } if (current.isEmpty()) { System.err.println("Nothing left!"); //return; } // filter remaining stuff for molecular formula, if present if (!molecularFormula.isEmpty()) { current = filterCandidatesByMolecularFormula(current); System.out.println("#remaining after molecular formula filter -> " + current.size()); } this.resultsRemaining = current; // after substrucPresent and substrucAbsent filtering System.out.println("resultsRemaining -> " + resultsRemaining.size()); // use remaining candidates as intermediate entry to MetFrag? // or create SDF and invoke MetFrag SDF fragmentation? /** * TODO: ChemSpider IDs als Input fr MetFrag, danach regulr MetFusionBatchMode */ } public static void main(String[] args) { String token = "eeca1d0f-4c03-4d81-aa96-328cdccf171a"; //String token = "a1004d0f-9d37-47e0-acdd-35e58e34f603"; //test(); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/MetFusion_ChemSp_mfs/136m0498_MSMS.mf"); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/MetFusion_ChemSp_mfs/148m0859_MSMS.mf"); // File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/MetFusion_ChemSp_mfs/164m0445a_MSMS.mf"); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/MetFusion_ChemSp_mfs/192m0757a_MSMS.mf"); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/MetFusion_ChemSp_mfs/naringenin.mf"); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/Known_BT_MSMS_ChemSp/1MeBT_MSMS.mf"); //File file = new File("/home/mgerlich/projects/metfusion_tp/BTs/Unknown_BT_MSMS_ChemSp/mf_with_substruct_formula/150m0655a_MSMS.mf"); File file = new File( "C:/Users/Michael/Dropbox/Eawag_IPB_Shared_MassBank/BTs/Unknown_BT_MSMS_ChemSp/mf_with_substruct_formula/192m0757b_MSMS.mf"); MetFusionBatchFileHandler mbf = new MetFusionBatchFileHandler(file); try { mbf.readFile(); } catch (IOException e) { //System.out.println(e.getMessage()); System.err.println( "Error reading from MetFusion settings file [" + file.getAbsolutePath() + "]. Aborting!"); System.exit(-1); } MetFusionBatchSettings settings = mbf.getBatchSettings(); List<String> absent = settings.getSubstrucAbsent(); List<String> present = settings.getSubstrucPresent(); for (String s : present) { System.out.println("present -> " + s); } for (String s : absent) { System.out.println("absent -> " + s); } String formula = settings.getMfFormula(); System.out.println("formula -> " + formula); boolean useFormulaAsQuery = true; boolean useSDF = false; String sdfFile = ""; if (useSDF) { sdfFile = "C:/Users/Michael/Dropbox/Eawag_IPB_Shared_MassBank/BTs/Unknown_BT_MSMS_ChemSp/mf_with_substruct_formula/results_afterFormulaQuery/192m0757b_MSMS.sdf"; if (sdfFile.isEmpty()) { // TODO alternatively use SDF file from query file? System.err.println("SDF file needs to be specified! Exiting."); System.exit(-1); } } SubstructureSearch ss = new SubstructureSearch(present, absent, token, formula, mbf, useFormulaAsQuery, useSDF, sdfFile); ss.run(); List<ResultSubstructure> remaining = ss.getResultsRemaining(); List<Result> resultsForSDF = new ArrayList<Result>(); StringBuilder sb = new StringBuilder(); String sep = ","; for (ResultSubstructure rs : remaining) { sb.append(rs.getId()).append(sep); Result r = new Result(rs.getPort(), rs.getId(), rs.getName(), rs.getScore()); r.setMol(rs.getMol()); r.setSmiles(rs.getSmiles()); r.setInchi(rs.getInchi()); r.setInchikey(rs.getInchikey()); resultsForSDF.add(r); } String ids = sb.toString(); String fileSep = System.getProperty("file.separator"); if (!ids.isEmpty()) { ids = ids.substring(0, ids.length() - 1); System.out.println("ids -> " + ids); settings.setMfDatabaseIDs(ids); String filename = file.getName(); String prefix = filename.substring(0, filename.lastIndexOf(".")); filename = filename.replace(prefix, prefix + "_ids"); String dir = file.getParent(); System.out.println("dir -> " + dir); if (!dir.endsWith(fileSep)) dir += fileSep; File output = new File(file.getParent(), filename); mbf.writeFile(output, settings); SDFOutputHandler so = new SDFOutputHandler(dir + prefix + ".sdf"); boolean writeOK = so.writeOriginalResults(resultsForSDF, false); if (!writeOK) System.err.println("Error writing SDF [" + so.getFilename()); } } public static void test() { String token = "eeca1d0f-4c03-4d81-aa96-328cdccf171a"; MassSpecAPISoapProxy chemSpiderProxy = new MassSpecAPISoapProxy(); try { ExtendedCompoundInfo cpdInfo = chemSpiderProxy.getExtendedCompoundInfo(905, token); System.out.println(cpdInfo.getCommonName()); chemSpiderProxy.searchByMass2(272.04d, 0.001d); } catch (RemoteException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } SearchSoapProxy ssp = new SearchSoapProxy(); SubstructureSearchOptions sso = new SubstructureSearchOptions(); sso.setMatchTautomers(false); //sso.setMolecule("CC(=O)Oc1ccccc1C(=O)O"); //sso.setMolecule("O=C(\\C1=C(/O)\\C(=C(\\O)C(C1=O)(C\\C=C(/C)C)C\\C=C(/C)C)C\\C=C(/C)C)C(C)C"); sso.setMolecule("Cc1cccc2nnnc12"); CommonSearchOptions cso = new CommonSearchOptions(); cso.setComplexity(EComplexity.Any); cso.setIsotopic(EIsotopic.Any); // NotLabeled when using Formula search cso.setHasSpectra(false); cso.setHasPatents(false); String transactionID = ""; ERequestStatus ers = null; try { transactionID = ssp.substructureSearch(sso, cso, token); System.out.println("transaction id -> " + transactionID); ers = ssp.getAsyncSearchStatus(transactionID, token); while (ers.equals(ERequestStatus.Processing)) { Thread.sleep(2000); ers = ssp.getAsyncSearchStatus(transactionID, token); } } catch (RemoteException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (ers.equals(ERequestStatus.Failed)) { System.out.println("failed"); System.exit(-1); } //String strucAbsent = "CC=C(C)C"; String strucAbsent = "O=CO"; // O=CO[H] SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer out = null; try { out = sp.parseSmiles(strucAbsent); } catch (InvalidSmilesException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (ers.equals(ERequestStatus.ResultReady)) { System.out.println("woohoo"); try { int[] CSIDs = ssp.getAsyncSearchResult(transactionID, token); ExtendedCompoundInfo[] info = chemSpiderProxy.getExtendedCompoundInfoArray(CSIDs, token); System.out.println("# matches -> " + info.length); List<IAtomContainer> containersList = new ArrayList<IAtomContainer>(); for (int i = 0; i < info.length; i++) { System.out.println( info[i].getCSID() + "\t" + info[i].getCommonName() + "\t" + info[i].getSMILES()); IAtomContainer ac = null; try { ac = sp.parseSmiles(info[i].getSMILES()); } catch (InvalidSmilesException ise) { continue; } containersList.add(ac); } // String sdf = chemSpiderProxy.getRecordsSdf(transactionID, token); // InputStream in = IOUtils.toInputStream(sdf); // MDLReader reader = new MDLReader(in); // ChemFile chemFile = (ChemFile) reader.read((ChemObject) new ChemFile()); // List<IAtomContainer> containersList = ChemFileManipulator.getAllAtomContainers(chemFile); System.out.println("# mols -> " + containersList.size()); SMARTSQueryTool sqt = new SMARTSQueryTool(strucAbsent); // filter out container that contain strucAbsent for (IAtomContainer container : containersList) { // MCSS search List<IAtomContainer> mcsslist = UniversalIsomorphismTester.getOverlaps(container, out); int maxmcss = -9999999; IAtomContainer maxac = null; for (int j = 0; j < mcsslist.size(); j++) { IAtomContainer a = (IAtomContainer) mcsslist.get(j); if (a.getAtomCount() > maxmcss) { // TODO: leave out candidates that match the substructure !!! maxmcss = a.getAtomCount(); maxac = a; } if (a.getAtomCount() == out.getAtomCount()) { // matching number of atoms between MCSS and structure to leave out System.out.println("#atoms in MCSS matches substrucAbsent -> filter out"); break; } } System.out.println("maxac -> " + maxac.getAtomCount()); // SMARTS matching boolean matches = sqt.matches(container); // , true if (matches) { // leave out container System.out.println("matches"); int nmatch = sqt.countMatches(); List<List<Integer>> mappings = sqt.getMatchingAtoms(); for (int i = 0; i < nmatch; i++) { List<Integer> atomIndices = mappings.get(i); System.out.println("#atom indices -> " + atomIndices.size()); } } else { // keep container System.out.println("no match"); } } } catch (RemoteException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (CDKException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else System.out.println("oh no"); } public List<String> getIncludes() { return includes; } public void setIncludes(List<String> includes) { this.includes = includes; } public List<String> getExcludes() { return excludes; } public void setExcludes(List<String> excludes) { this.excludes = excludes; } public String getToken() { return token; } public void setToken(String token) { this.token = token; } public ExtendedCompoundInfo[] getChemspiderInfo() { return chemspiderInfo; } public void setChemspiderInfo(ExtendedCompoundInfo[] chemspiderInfo) { this.chemspiderInfo = chemspiderInfo; } public List<ResultSubstructure> getResultsOriginal() { return resultsOriginal; } public void setResultsOriginal(List<ResultSubstructure> resultsOriginal) { this.resultsOriginal = resultsOriginal; } public List<ResultSubstructure> getResultsRemaining() { return resultsRemaining; } public void setResultsRemaining(List<ResultSubstructure> resultsRemaining) { this.resultsRemaining = resultsRemaining; } public String getMolecularFormula() { return molecularFormula; } public void setMolecularFormula(String molecularFormula) { this.molecularFormula = molecularFormula; } public MetFusionBatchFileHandler getBatchFileHandler() { return batchFileHandler; } public void setBatchFileHandler(MetFusionBatchFileHandler batchFileHandler) { this.batchFileHandler = batchFileHandler; } public boolean isFormulaFirst() { return formulaFirst; } public void setFormulaFirst(boolean formulaFirst) { this.formulaFirst = formulaFirst; } public boolean isSdfFirst() { return sdfFirst; } public void setSdfFirst(boolean sdfFirst) { this.sdfFirst = sdfFirst; } public String getSdfFile() { return sdfFile; } public void setSdfFile(String sdfFile) { this.sdfFile = sdfFile; } }