org.ut.biolab.medsavant.server.ontology.OBOParser.java Source code

Introduction

Here is the source code for org.ut.biolab.medsavant.server.ontology.OBOParser.java
Source

/**
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.ut.biolab.medsavant.server.ontology;

import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.ut.biolab.medsavant.shared.model.OntologyTerm;
import org.ut.biolab.medsavant.shared.model.OntologyType;
import org.ut.biolab.medsavant.shared.util.RemoteFileCache;

/**
 * Parses an OBO (Open Biomedical Ontology) file and loads it into a database table.
 *
 * @author tarkvara
 */
public class OBOParser {

    private static final Log LOG = LogFactory.getLog(OBOParser.class);

    private final OntologyType ontology;
    private int lineNum;

    public OBOParser(OntologyType ont) {
        this.ontology = ont;
    }

    Map<String, OntologyTerm> load(URL source) throws FileNotFoundException, IOException {
        BufferedReader reader = new BufferedReader(new FileReader(RemoteFileCache.getCacheFile(source)));

        Map<String, OntologyTerm> terms = new HashMap<String, OntologyTerm>();
        String line;
        while ((line = reader.readLine()) != null) {
            lineNum++;
            if (line.equals("[Term]")) {
                OntologyTerm t = parseNextTerm(reader);
                if (t != null) {
                    if (t.getID() == null || t.getName() == null) {
                        LOG.info(String.format("Defective ontology term at line %d: %s/%s", lineNum, t.getID(),
                                t.getName()));
                        break;
                    } else {
                        if (t.getID().startsWith("results")) {
                            LOG.info(String.format("Loading \"%s\" at line %d.", line, lineNum));
                        }
                        terms.put(t.getID(), t);
                    }
                }
            }
        }

        return terms;
    }

    private OntologyTerm parseNextTerm(BufferedReader reader) throws IOException {

        String line;
        String id = null, name = null, description = null;
        List<String> altIDs = new ArrayList<String>();
        List<String> parentIDs = new ArrayList<String>();
        while ((line = reader.readLine()) != null) {
            if (line.startsWith("[")) {
                // We've hit the next term (or typedef).  Rewind to the beginning of the line.
                reader.reset();
                break;
            } else {
                lineNum++;
                if (line.length() > 0 && line.charAt(0) != '!') {
                    int colonPos = line.indexOf(":");
                    if (colonPos > 0) {
                        String key = line.substring(0, colonPos);
                        String value = line.substring(colonPos + 1);
                        int commentPos = indexOf(value, '!');
                        if (commentPos > 0) {
                            value = value.substring(0, commentPos);
                        }
                        value = value.trim();
                        if (key.equals("id")) {
                            id = value;
                        } else if (key.equals("alt_id")) {
                            altIDs.add(value);
                        } else if (key.equals("name")) {
                            name = value;
                        } else if (key.equals("def")) {
                            // Def consists of a quote-delimited string followed by some (ignored) reference material.
                            int quotePos = indexOf(value, '\"');
                            if (quotePos >= 0) {
                                value = value.substring(quotePos + 1);
                                quotePos = indexOf(value, '\"');
                                if (quotePos >= 0) {
                                    value = value.substring(0, quotePos);
                                    description = value.replace("\\", "");
                                }
                            }
                        } else if (key.equals("is_a")) {
                            parentIDs.add(value);
                        } else if (key.equals("is_obsolete") && value.equals("true")) {
                            return null;
                        }
                    }
                }
                reader.mark(10); // So we can rewind if the line is the next [Term].
            }
        }
        return new OntologyTerm(ontology, id, name, description, altIDs.toArray(new String[0]),
                parentIDs.toArray(new String[0]));
    }

    /**
     * Like the normal String.indexOf method, but pays attention to escape characters.
     * @param s the string to be searched
     * @param c the character we're looking for
     */
    private static int indexOf(String s, char c) {
        int pos = s.indexOf(c);
        while (pos > 0 && s.charAt(pos - 1) == '\\') {
            pos = s.indexOf(c, pos + 1);
        }
        return pos;
    }
}