aml.util.MeSHParser.java Source code

Java tutorial

Introduction

Here is the source code for aml.util.MeSHParser.java

Source

/******************************************************************************
* Copyright 2013-2014 LASIGE                                                  *
*                                                                             *
* Licensed under the Apache License, Version 2.0 (the "License"); you may     *
* not use this file except in compliance with the License. You may obtain a   *
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0           *
*                                                                             *
* Unless required by applicable law or agreed to in writing, software         *
* distributed under the License is distributed on an "AS IS" BASIS,           *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.    *
* See the License for the specific language governing permissions and         *
* limitations under the License.                                              *
*                                                                             *
*******************************************************************************
* Parses the MeSH xml file into a Lexicon.                                    *
* WARNING: Requires the MeSH xml and dtd files, which are not released with   *
* AgreementMakerLight                                                         * 
*                                                                             *
* @author Daniel Faria, Cludia Duarte                                        *
* @date 12-08-2014                                                            *
* @version 2.1                                                                *
******************************************************************************/
package aml.util;

import java.io.File;
import java.util.Iterator;
import java.util.Vector;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import aml.ontology.Lexicon;
import aml.settings.LexicalType;

public class MeSHParser {
    public static void main(String[] args) throws Exception {
        Vector<String> concepts = new Vector<String>();
        Lexicon lexicon = new Lexicon();

        SAXReader reader = new SAXReader();
        File f = new File("store/knowledge/mesh.xml");
        Document doc = reader.read(f);
        Element root = doc.getRootElement();

        Iterator<?> records = root.elementIterator("DescriptorRecord");
        int index = 0;
        while (records.hasNext()) {
            Element concList = ((Element) records.next()).element("ConceptList");
            Iterator<?> conc = concList.elementIterator("Concept");
            while (conc.hasNext()) {
                Element c = (Element) conc.next();
                String conceptName = c.element("ConceptName").elementText("String");
                concepts.add(conceptName);
                lexicon.add(index, conceptName, LexicalType.LABEL, "", 0.90);

                String casN1Name = c.elementText("CASN1Name");
                if (casN1Name != null)
                    lexicon.add(index, casN1Name, LexicalType.FORMULA, "", 0.85);

                Element termList = c.element("TermList");
                Iterator<?> terms = termList.elementIterator("Term");
                while (terms.hasNext()) {
                    Element t = (Element) terms.next();
                    String termName = t.elementText("String");
                    if (!conceptName.equals(termName))
                        lexicon.add(index, termName, LexicalType.EXACT_SYNONYM, "", 0.85);
                }
                index++;
            }
        }
        lexicon.save("store/knowledge/mesh.lexicon");
    }
}