net.bioclipse.pubchem.business.PubChemManager.java Source code

Java tutorial

Introduction

Here is the source code for net.bioclipse.pubchem.business.PubChemManager.java

Source

/*******************************************************************************
 * Copyright (c) 2006-2009  Egon Willighagen <egonw@users.sf.net>
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contact: http://www.bioclipse.net/
 ******************************************************************************/
package net.bioclipse.pubchem.business;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.PatternSyntaxException;

import net.bioclipse.cdk.business.CDKManager;
import net.bioclipse.cdk.domain.ICDKMolecule;
import net.bioclipse.core.business.BioclipseException;
import net.bioclipse.core.domain.IMolecule;
import net.bioclipse.core.domain.RecordableList;
import net.bioclipse.managers.business.IBioclipseManager;
import net.bioclipse.rdf.business.IRDFStore;
import net.bioclipse.rdf.business.RDFManager;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.ValidityException;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.NullProgressMonitor;

public class PubChemManager implements IBioclipseManager {

    private final static String EUTILS_URL_BASE = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils";
    private final static String PUBCHEM_URL_BASE = "http://pubchem.ncbi.nlm.nih.gov/";
    private final static String PUBCHEMRDF_URL_BASE = "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/";

    private final static String TOOL = "bioclipse.net";

    private static final CDKManager cdk = new CDKManager();
    private static final RDFManager rdf = new RDFManager();

    public String getManagerName() {
        return "pubchem";
    }

    public IFile loadCompound(int cid, IFile target, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        return loadCompoundAny(cid, target, monitor, "DisplayXML");
    }

    public IFile loadCompound3d(int cid, IFile target, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        return loadCompoundAny(cid, target, monitor, "3DDisplaySDF");
    }

    private IFile loadCompoundAny(int cid, IFile target, IProgressMonitor monitor, String type)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null) {
            monitor = new NullProgressMonitor();
        }

        if (target == null) {
            throw new BioclipseException("Cannot save to a NULL file.");
        }

        String molString = downloadAsString(cid, type, monitor);

        if (target.exists()) {
            target.setContents(new ByteArrayInputStream(molString.getBytes()), true, false, null);
        } else {
            target.create(new ByteArrayInputStream(molString.getBytes()), false, null);
        }

        monitor.done();
        return target;
    }

    public IFile loadCompoundRDF(int cid, IFile target, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (target == null) {
            throw new BioclipseException("Cannot save to a NULL file.");
        }
        if (monitor == null)
            monitor = new NullProgressMonitor();

        String downloadURI = PUBCHEMRDF_URL_BASE + "CID" + cid;
        String rdfString = downloadAsString(downloadURI, "application/rdf+xml", monitor);

        if (target.exists()) {
            target.setContents(new ByteArrayInputStream(rdfString.getBytes()), true, false, null);
        } else {
            target.create(new ByteArrayInputStream(rdfString.getBytes()), false, null);
        }

        monitor.done();
        return target;
    }

    private String replaceSpaces(String molecule2) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < molecule2.length(); i++) {
            if (Character.isWhitespace(molecule2.charAt(i))) {
                result.append("+");
            } else {
                result.append(molecule2.charAt(i));
            }
        }
        return result.toString();
    }

    public List<Integer> search(String query, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null) {
            monitor = new NullProgressMonitor();
        }
        int max = 50;

        List<Integer> results = new ArrayList<Integer>();
        monitor.beginTask("Searching PubChem for '" + query + "'...", 1);

        String db = "pccompound";
        query = replaceSpaces(query);

        String esearch = EUTILS_URL_BASE + "/esearch.fcgi?" + "db=" + db + "&retmax=" + max + "&usehistory=y&tool="
                + TOOL + "&term=" + query;

        System.out.println("URL: " + esearch);
        URL queryURL = new URL(esearch);
        URLConnection connection = queryURL.openConnection();

        monitor.subTask("Processing search results...");
        Builder parser = new Builder();
        Document doc;
        try {
            doc = parser.build(connection.getInputStream());
            Nodes countNodes = doc.query("/eSearchResult/Count");
            if (countNodes.size() > 0) {
                System.out.println(countNodes.get(0).toString());
            } else {
                monitor.done();
                return results;
            }

            Nodes cidNodes = doc.query("/eSearchResult/IdList/Id");

            for (int cidCount = 0; cidCount < cidNodes.size(); cidCount++) {
                String cidStr = cidNodes.get(cidCount).getValue();
                int cid = Integer.parseInt(cidStr);
                results.add(cid);
            }
        } catch (ValidityException e) {
            e.printStackTrace();
        } catch (ParsingException e) {
            e.printStackTrace();
        } finally {
            monitor.done();
        }

        return results;
    }

    public IMolecule download(Integer cid, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        monitor.beginTask("Downloading Compound from PubChem...", 2);
        String molstring = downloadAsString(cid, monitor);
        if (monitor.isCanceled())
            return null;
        if (molstring == null || molstring.isEmpty()) {
            throw new BioclipseException("Could not read molecule from" + cid);
        }
        org.openscience.cdk.io.formats.IChemFormat format = cdk.getFormat("PubChemCompoundXMLFormat");
        ICDKMolecule molecule = cdk.loadMolecule(new ByteArrayInputStream(molstring.getBytes()), format, null,
                new NullProgressMonitor());
        // ICDKMolecule molecule = cdk.fromString(molstring);
        monitor.worked(1);
        return molecule;
    }

    public IMolecule download3d(Integer cid, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        monitor.beginTask("Downloading Compound 3D from PubChem...", 2);
        String molstring = download3dAsString(cid, monitor);
        if (monitor.isCanceled())
            return null;

        // convert the returned SD file into a MDL molfile by stripping the
        // $$$$ and beyond
        molstring = molstring.substring(0, molstring.indexOf("$$$$"));

        ICDKMolecule molecule = cdk.fromString(molstring);
        monitor.worked(1);
        return molecule;
    }

    private String downloadAsString(Integer cid, String type, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null)
            monitor = new NullProgressMonitor();

        monitor.subTask("Downloading CID " + cid);
        String efetch = PUBCHEM_URL_BASE + "summary/summary.cgi?cid=" + cid + "&disopt=" + type;
        return downloadAsString(efetch, null, monitor);
    }

    private String downloadAsString(String URL, String accepts, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null)
            monitor = new NullProgressMonitor();

        String fileContent = "";
        try {
            monitor.subTask("Downloading from " + URL);
            HttpClient client = new HttpClient();
            GetMethod method = new GetMethod(URL);
            if (accepts != null) {
                method.setRequestHeader("Accept", accepts);
                method.setRequestHeader("Content-Type", accepts);
            }
            client.executeMethod(method);

            InputStream responseStream = method.getResponseBodyAsStream();
            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
            int nRead;
            byte[] data = new byte[16384];
            while ((nRead = responseStream.read(data, 0, data.length)) != -1) {
                buffer.write(data, 0, nRead);
            }
            buffer.flush();
            responseStream.close();
            method.releaseConnection();
            fileContent = new String(buffer.toByteArray());
            monitor.worked(1);
        } catch (PatternSyntaxException exception) {
            exception.printStackTrace();
            throw new BioclipseException("Invalid Pattern.", exception);
        } catch (MalformedURLException exception) {
            exception.printStackTrace();
            throw new BioclipseException("Invalid URL.", exception);
        }
        return fileContent;
    }

    public String downloadAsString(Integer cid, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        return downloadAsString(cid, "DisplayXML", monitor);
    }

    public String download3dAsString(Integer cid, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        return downloadAsString(cid, "3DDisplaySDF", monitor);
    }

    public List<IMolecule> download(List<Integer> cids, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null)
            monitor = new NullProgressMonitor();
        monitor.beginTask("Downloading Compounds from PubChem...", cids.size());
        List<IMolecule> results = new RecordableList<IMolecule>();
        for (Integer cid : cids) {
            if (monitor.isCanceled())
                return null;
            results.add(download(cid, monitor));
            monitor.worked(1);
        }
        return results;
    }

    public List<IMolecule> download3d(List<Integer> cids, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null)
            monitor = new NullProgressMonitor();
        monitor.beginTask("Downloading Compounds 3D from PubChem...", cids.size());
        List<IMolecule> results = new RecordableList<IMolecule>();
        for (Integer cid : cids) {
            if (monitor.isCanceled())
                return null;
            results.add(download3d(cid, monitor));
            monitor.worked(1);
        }
        return results;
    }

    public IRDFStore downloadRDF(Integer cid, IRDFStore store, IProgressMonitor monitor)
            throws IOException, BioclipseException, CoreException {
        if (monitor == null)
            monitor = new NullProgressMonitor();

        String downloadURI = PUBCHEMRDF_URL_BASE + "CID" + cid;
        String rdfContent = downloadAsString(downloadURI, "application/rdf+xml", monitor);
        rdf.importFromString(store, rdfContent, "RDF/XML", monitor);
        return store;
    }
}