Java tutorial
/* * Copyright: (c) 2004-2010 Mayo Foundation for Medical Education and * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the * triple-shield Mayo logo are trademarks and service marks of MFMER. * * Except as contained in the copyright notice above, or as used to identify * MFMER as the author of this software, the trade names, trademarks, service * marks, or product names of the copyright holder shall not be used in * advertising, promotion or otherwise in connection with this software without * prior written authorization of the copyright holder. * * Licensed under the Eclipse Public License, Version 1.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.eclipse.org/legal/epl-v10.html * */ package edu.mayo.informatics.lexgrid.convert.directConversions; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URI; import java.sql.Date; import java.sql.SQLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.HashMap; import java.util.List; import java.util.Map; import org.LexGrid.LexBIG.DataModel.NCIHistory.NCIChangeEvent; import org.LexGrid.LexBIG.DataModel.NCIHistory.types.ChangeType; import org.LexGrid.LexBIG.Utility.logging.LgMessageDirectorIF; import org.LexGrid.util.SimpleMemUsageReporter; import org.LexGrid.util.SimpleMemUsageReporter.Snapshot; import org.LexGrid.versions.SystemRelease; import org.apache.commons.lang.StringUtils; import org.lexevs.locator.LexEvsServiceLocator; import edu.mayo.informatics.lexgrid.convert.exceptions.ConnectionFailure; public class UMLSHistoryFileToSQL { /** Holds reference to message director. */ private LgMessageDirectorIF message_; /** Holds the token which seperates the fields in a flat file. */ private static String token_ = "|"; /** Holds a boolean value failOnAllErrors */ private boolean failOnAllErrors_ = true; /** Holds reference to a map containing concept name and description. */ private Map<String, String> mrconsoConceptName_ = new HashMap<String, String>(); /** Holds the reference for the DB table prefix */ private Map<String, Date> systemReleaseDates_ = new HashMap<String, Date>(); /** Holds string constant "http://nlm.gov" */ private static final String releaseAgency = "http://nlm.gov"; private static final String metaURN = "urn:oid:2.16.840.1.113883.3.26.1.2"; private String codingSchemeUri; /** * NCI Thesaurus History File to SQL Converter. * * @param filePath * location of the delimited file (local file or URL) * @param token * parsing token, if null default is "|" * @param sqlServer * location of the SQLLite server * @param sqlDriver * driver class * @param sqlUsername * username for server authentication * @param sqlPassword * password for server authentication * @param messageDirector * log message output * @throws SQLException * @throws Exception */ public UMLSHistoryFileToSQL(String codingSchemeUri, boolean failOnAllErrors, LgMessageDirectorIF messageDirector, String token) throws SQLException { message_ = messageDirector; failOnAllErrors_ = failOnAllErrors; if (StringUtils.isNotBlank(token)) { token_ = token; } this.codingSchemeUri = codingSchemeUri; } /** * This method reads the History data from specified metaFolderPath and * loads it into the database; * * @param metaFolderPath * @throws SQLException */ public void loadUMLSHistory(URI folderPath) throws Exception { BufferedReader reader = getReader(folderPath.resolve("MRCUI.RRF")); try { String line = reader.readLine(); int lineNo = 0; readMrConso(folderPath); message_.info("Loading History info..."); while (line != null) { ++lineNo; if (line.startsWith("#") || line.length() == 0) { line = reader.readLine(); continue; } List<String> elements = deTokenizeString(line, token_); try { loadSystemReleaseInfo(elements); } catch (Exception e) { if (failOnAllErrors_) { // this call rethrow the exception message_.fatalAndThrowException( "Exception while loading System Release Info @ line : " + lineNo, e); } else { message_.error("Error occured in line: " + lineNo, e); // go to next line, continue. line = reader.readLine(); continue; } } try { loadUMLSHistoryInfo(elements); } catch (Exception e) { if (failOnAllErrors_) { // this call rethrow the exception message_.fatalAndThrowException("Exception while loadUMLSHistoryInfo @ line : " + lineNo, e); } else { message_.error("Error occured in line: " + lineNo, e); // go to next line, continue. line = reader.readLine(); continue; } } line = reader.readLine(); } } finally { reader.close(); } } private Date getSystemReleaseDate(String releaseId) throws Exception { String sYear = releaseId.substring(0, 4); int year = new Integer(sYear).intValue(); String sMonth = releaseId.substring(4); int mon = 0; Calendar cal = Calendar.getInstance(); if ("AA".equalsIgnoreCase(sMonth)) { mon = Calendar.JANUARY; } else if ("AB".equalsIgnoreCase(sMonth)) { mon = Calendar.APRIL; } else if ("AC".equalsIgnoreCase(sMonth)) { mon = Calendar.JULY; } else if ("AD".equalsIgnoreCase(sMonth)) { mon = Calendar.OCTOBER; } else { try { int i = Integer.parseInt(sMonth); switch (i) { case 1: mon = Calendar.JANUARY; break; case 2: mon = Calendar.FEBRUARY; break; case 3: mon = Calendar.MARCH; break; case 4: mon = Calendar.APRIL; break; case 5: mon = Calendar.MAY; break; case 6: mon = Calendar.JUNE; break; case 7: mon = Calendar.JULY; break; case 8: mon = Calendar.AUGUST; break; case 9: mon = Calendar.SEPTEMBER; break; case 10: mon = Calendar.OCTOBER; break; case 11: mon = Calendar.NOVEMBER; break; case 12: mon = Calendar.DECEMBER; break; default: throw new Exception("Release ID is not in required format: " + sMonth); } } catch (NumberFormatException e) { throw new Exception("Release ID is not in required format." + sMonth); } } cal.set(year, mon, 01, 0, 0, 0); cal.set(Calendar.MILLISECOND, 0); return new Date(cal.getTimeInMillis()); } /** * Method reads MRDOC RRF file and loads RELEASE data into systemRelease DB * table. * * @param historyInfo * @param elements * @throws Exception */ private void loadSystemReleaseInfo(List<String> elements) throws Exception { String releaseId = elements.get(1); Date releaseDate = getSystemReleaseDate(releaseId); if (!systemReleaseDates_.keySet().contains(releaseId)) { systemReleaseDates_.put(releaseId, releaseDate); SystemRelease systemRelease = new SystemRelease(); systemRelease.setReleaseURI(metaURN + ":" + releaseId); systemRelease.setReleaseId(releaseId); systemRelease.setReleaseDate(releaseDate); systemRelease.setReleaseAgency(releaseAgency); LexEvsServiceLocator.getInstance().getDatabaseServiceManager().getNciHistoryService() .insertSystemRelease(codingSchemeUri, systemRelease); } } /** * Methods reads DELETED and MERGED history data from CUI RRF files and * loads into conceptHistory DB table. * * @param metaFolderPath * @throws Exception */ private void loadUMLSHistoryInfo(List<String> elements) throws Exception { NCIChangeEvent nciChangeEvent = new NCIChangeEvent(); nciChangeEvent.setConceptcode(elements.get(0)); if (mrconsoConceptName_.keySet().contains(elements.get(0))) { nciChangeEvent.setConceptName(mrconsoConceptName_.get(elements.get(0))); } else { nciChangeEvent.setConceptName("Not Available."); } if ("DEL".equalsIgnoreCase(elements.get(2)) || "SUBX".equalsIgnoreCase(elements.get(2)) || "RB".equalsIgnoreCase(elements.get(2)) || "RN".equalsIgnoreCase(elements.get(2)) || "RO".equalsIgnoreCase(elements.get(2))) { nciChangeEvent.setEditaction(ChangeType.RETIRE); } else if ("SY".equalsIgnoreCase(elements.get(2))) { nciChangeEvent.setEditaction(ChangeType.MERGE); } else { throw new Exception("Relation field is not in required format."); } if (systemReleaseDates_.keySet().contains(elements.get(1))) { nciChangeEvent.setEditDate(systemReleaseDates_.get(elements.get(1))); } else { throw new Exception("Couldn't find Edit Date for the concept'" + elements.get(0) + "'"); } nciChangeEvent.setReferencecode(elements.get(5)); if (mrconsoConceptName_.keySet().contains(elements.get(5))) { nciChangeEvent.setReferencename(mrconsoConceptName_.get(elements.get(5))); } else { nciChangeEvent.setReferencename("Not Available."); } LexEvsServiceLocator.getInstance().getDatabaseServiceManager().getNciHistoryService() .insertNCIChangeEvent(codingSchemeUri, nciChangeEvent); } /** * Method returns a BufferedReader for the passes URI. * * @param filePath * @return * @throws MalformedURLException * @throws IOException */ private static BufferedReader getReader(URI filePath) throws MalformedURLException, IOException { BufferedReader reader = null; if (filePath.getScheme().equals("file")) { reader = new BufferedReader(new FileReader(new File(filePath))); } else { reader = new BufferedReader(new InputStreamReader(filePath.toURL().openConnection().getInputStream())); } return reader; } /** * This method de-tokenizes the give string from the passed string token. * * @param str * @param token * @return */ private static List<String> deTokenizeString(String str, String token) { int beginIndex = 0; int endIndex = str.indexOf(token); List<String> elementList = new ArrayList<String>(); while (endIndex > -1) { elementList.add(str.substring(beginIndex, endIndex)); beginIndex = endIndex + 1; endIndex = str.indexOf(token, beginIndex); } return elementList; } /** * This method converts a date in string format to java.sql.Date Format. * * @param sDate * @param format * @return * @throws Exception */ public static Date convertStringToDate(String sDate, String format) throws Exception { java.util.Date dateUtil = null; SimpleDateFormat dateformat = new SimpleDateFormat(format); try { dateUtil = dateformat.parse(sDate); } catch (ParseException e) { throw new Exception("Exception while parsing the date: " + e.getMessage()); } return new Date(dateUtil.getTime()); } /** * Private method, reads MRCONSO.RRF file and loads the concept descriptions * in a Map. * * @param metaFolderPath * @throws Exception */ private void readMrConso(URI metaFolderPath) throws Exception { if (metaFolderPath == null) { if (failOnAllErrors_) { message_.fatalAndThrowException("URI unspecified for 'MRCONSO.RRF' file."); } } Snapshot snap1 = SimpleMemUsageReporter.snapshot(); message_.info("Reading 'MRCONSO.RRF'..."); BufferedReader mrconsoFile = null; try { mrconsoFile = getReader(metaFolderPath.resolve("MRCONSO.RRF")); String line = mrconsoFile.readLine(); int lineNo = 0; while (line != null) { ++lineNo; if (line.startsWith("#") || line.length() == 0) { line = mrconsoFile.readLine(); continue; } List<String> elements = deTokenizeString(line, token_); if (elements.size() > 14 && "y".equalsIgnoreCase(elements.get(6))) { if (!mrconsoConceptName_.keySet().contains(elements.get(0))) { mrconsoConceptName_.put(elements.get(0), elements.get(14)); } } line = mrconsoFile.readLine(); } } catch (MalformedURLException e) { message_.error("Exceptions while reading MRCONSO.RRF: " + e.getMessage()); } catch (IOException e) { message_.error("Exceptions while reading MRCONSO.RRF: " + e.getMessage()); } finally { mrconsoFile.close(); } Snapshot snap2 = SimpleMemUsageReporter.snapshot(); message_.info("Done reading 'MRCONSO.RRF': Time taken: " + SimpleMemUsageReporter.formatTimeDiff(snap2.getTimeDelta(snap1))); } public static void validateFile(URI fileLocation, String token, boolean validateLevel) throws Exception { BufferedReader reader = null; int lineNo = 1; if (token == null) { token = token_; } // test MRCUI.RRF URI mrCUIFile = fileLocation.resolve("MRCUI.RRF"); if (mrCUIFile == null) { throw new ConnectionFailure("Did not find the expected MRCUI.RRF file in the location provided."); } if (mrCUIFile.getScheme().equals("file")) { new FileReader(new File(mrCUIFile)).close(); } else { new InputStreamReader(mrCUIFile.toURL().openConnection().getInputStream()).close(); } try { reader = getReader(mrCUIFile); String line = reader.readLine(); lineNo = 1; boolean notAMonth = false; while (line != null) { if (line.startsWith("#") || line.length() == 0) { line = reader.readLine(); continue; } if (validateLevel && lineNo > 10) { break; } List<String> elements = deTokenizeString(line, token_); if (elements.size() != 7) { throw new Exception( "MRCUI.RRF " + "(" + "Line:" + lineNo + ")" + " is not in the required format."); } if (!elements.get(0).toLowerCase().startsWith("c")) { throw new Exception("MRCUI.RRF " + "(" + "Line:" + lineNo + "): " + "The concept(" + elements.get(0) + ") is not in the required format."); } try { String month = elements.get(1).substring(4); int i = Integer.parseInt(month); if (i < 0 || i > 12) { throw new Exception(); } else { notAMonth = false; } } catch (Exception e) { notAMonth = true; } if (!elements.get(1).endsWith("AA") && !elements.get(1).endsWith("AB") && !elements.get(1).endsWith("AC") && !elements.get(1).endsWith("AD") && notAMonth) { throw new Exception("MRCUI.RRF " + "(" + "Line:" + lineNo + "): " + "The Release id (" + elements.get(1) + ") is not in the required format."); } lineNo++; line = reader.readLine(); } } catch (MalformedURLException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } finally { reader.close(); } // test MRCONSO.RRF URI mrCONSOFile = fileLocation.resolve("MRCONSO.RRF"); if (mrCONSOFile == null) { throw new ConnectionFailure("Did not find the expected MRCONSO.RRF file in the location provided."); } if (mrCONSOFile.getScheme().equals("file")) { new FileReader(new File(mrCONSOFile)).close(); } else { new InputStreamReader(mrCONSOFile.toURL().openConnection().getInputStream()).close(); } try { reader = getReader(mrCONSOFile); String line = reader.readLine(); lineNo = 1; while (line != null) { if (line.startsWith("#") || line.length() == 0) { line = reader.readLine(); continue; } if (validateLevel && lineNo > 10) { break; } List<String> elements = deTokenizeString(line, token_); if (elements.size() != 18) { throw new Exception( "MRCONSO.RRF " + "(" + "Line:" + lineNo + ")" + " is not in the required format."); } lineNo++; line = reader.readLine(); } } catch (MalformedURLException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } finally { reader.close(); } } }