Java tutorial
/* Copyright 2015 Cardiovascular Research Grid * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * All rights reserved * * @author Stephen J Granite (Email: sgranite@jhu.edu) */ package org.cvrgrid.hl7.fileparse; /* * This is the main class to take PICU HL7 data, filter it based upon location, generate Global Unique * Identifiers (GUIDs) for the subjects, load the time series information into OpenTSDB and maintain a * lookup file for the GUIDs and a log file for HL7 files processed with the original HL7 files. * It uses information in a local server.properties file. This information tells the tool: 1) where * to get HL7 translation information from, 2) where to maintain the lookup and the log files and 3) w * here the OpenTSDB server is located, to store the time-series data with the GUID information in a tag. * * The tool itself is a Java command line tool, intended to be placed in a service job that runs at a * specific interval. The tool does not need to be co-located with the data, as the server.properties * specifies the path information for the data. The tool begins by loading values for HL7 translation. * Then the tool loads information for existing subjects from the lookup. Then the tool loads information * from the log file, to know what HL7 files to process. After all this pre-loading, the tool then scours * the root directory for HL7 data. During this process, the tool checks to see if the data was already * processing, through comparison to the log file's contents. If there are no new data, the tool stops. * Otherwise, the tool parses the new data, loading the time series into OpenTSDB with GUIDs stored in a * subjectId tag. While loading the time series, the tool appends information to the lookup and the log * datasets. Upon completion of processing, the tool generates new lookup and log files, to sit for the * next processing check cycle. * * The tool requires the Apache POI libraries to work with Excel files, the Apache Camel and HL7 API * libraries to work with HL7 and the CVRG OpenTSDB client to work with OpenTSDB. All these dependencies * are stored in the pom.xml. */ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.TreeSet; import org.apache.commons.lang.StringUtils; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.cvrgrid.hl7.fileparse.model.HL7Measurements; import edu.jhu.cvrg.timeseriesstore.model.IncomingDataPoint; import edu.jhu.cvrg.timeseriesstore.opentsdb.TimeSeriesStorer; import org.cvrgrid.hl7.fileparse.model.OpenTSDBConfiguration; import org.cvrgrid.hl7.fileparse.model.PatientInfo; import ca.uhn.hl7v2.model.Message; import ca.uhn.hl7v2.model.v23.group.ORU_R01_OBSERVATION; import ca.uhn.hl7v2.model.v23.message.ORU_R01; import ca.uhn.hl7v2.util.Hl7InputStreamMessageIterator; import ca.uhn.hl7v2.util.Terser; public class PicuDataLoader { private String configFilename = "/resources/server.properties"; private OpenTSDBConfiguration openTSDBConfiguration = new OpenTSDBConfiguration(); /** * Constructor for this code intended to set all the variables based upon the properties file. */ public PicuDataLoader() { try { OpenTSDBConfiguration openTSDBConfiguration = new OpenTSDBConfiguration(); Properties serverProperties = new Properties(); InputStream stream = PicuDataLoader.class.getResourceAsStream(this.getConfigFilename()); serverProperties.load(stream); openTSDBConfiguration.setOpenTSDBUrl(serverProperties.getProperty("openTSDBUrl")); openTSDBConfiguration.setApiPut(serverProperties.getProperty("apiPut")); openTSDBConfiguration.setApiQuery(serverProperties.getProperty("apiQuery")); openTSDBConfiguration.setAwareSupportedParams(serverProperties.getProperty("awareSupportedParams")); openTSDBConfiguration.setIdMatch(serverProperties.getProperty("idMatch")); openTSDBConfiguration.setIdMatchSheet(serverProperties.getProperty("idMatchSheet")); openTSDBConfiguration.setProcessedFile(serverProperties.getProperty("processedFile")); openTSDBConfiguration.setRootDir(serverProperties.getProperty("rootDir")); openTSDBConfiguration.setFolderPath(serverProperties.getProperty("folderPath")); openTSDBConfiguration.setStudyString(serverProperties.getProperty("studyString")); this.setOpenTSDBConfiguration(openTSDBConfiguration); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) throws Exception { PicuDataLoader picuDataLoader = new PicuDataLoader(); SimpleDateFormat fromUser = new SimpleDateFormat("yyyyMMddHHmmss"); SimpleDateFormat myFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); OpenTSDBConfiguration openTSDBConfiguration = picuDataLoader.getOpenTSDBConfiguration(); String urlString = openTSDBConfiguration.getOpenTSDBUrl(); HL7Measurements hl7Measurements = new HL7Measurements(); HashMap<String, String> measurementNames = hl7Measurements.getMeasurementNames(); XSSFWorkbook wb = readFile(openTSDBConfiguration.getAwareSupportedParams()); XSSFSheet sheet = wb.getSheetAt(0); for (int r = 1; r < 280; r++) { XSSFRow row = sheet.getRow(r); if (row == null) { continue; } String key = row.getCell(2).getStringCellValue(); String value = row.getCell(1).getStringCellValue(); value = value.replaceAll(":", "/"); measurementNames.put(key, value); } HashMap<String, PatientInfo> idMatch = new HashMap<String, PatientInfo>(); File f = new File(openTSDBConfiguration.getIdMatch()); if (f.exists()) { wb = readFile(openTSDBConfiguration.getIdMatch()); sheet = wb.getSheetAt(0); for (int r = 1; r < sheet.getLastRowNum() + 1; r++) { XSSFRow row = sheet.getRow(r); PatientInfo patInfo = new PatientInfo(); patInfo.setPicuSubject(row.getCell(1).getBooleanCellValue()); patInfo.setFirstName(row.getCell(3).getStringCellValue()); patInfo.setLastName(row.getCell(4).getStringCellValue()); patInfo.setBirthDateTime(row.getCell(5).getStringCellValue()); patInfo.setGender(row.getCell(6).getStringCellValue()); patInfo.setBirthplace(row.getCell(7).getStringCellValue()); patInfo.setEarliestDataPoint(row.getCell(8).getStringCellValue()); LinkedList<String> locations = new LinkedList<String>(); String lSet = row.getCell(10).getStringCellValue(); lSet = lSet.replaceAll("\\[", ""); lSet = lSet.replaceAll("\\]", ""); String[] locationSet = lSet.split(","); for (String location : locationSet) { locations.add(location.trim()); } patInfo.setLocations(locations); LinkedList<String> variables = new LinkedList<String>(); String vSet = row.getCell(12).getStringCellValue(); vSet = vSet.replaceAll("\\[", ""); vSet = vSet.replaceAll("\\]", ""); String[] variableSet = vSet.split(","); for (String variable : variableSet) { variables.add(variable.trim()); } patInfo.setVariables(variables); idMatch.put(patInfo.getHash(), patInfo); } } System.out.println("Existing Subject Count: " + idMatch.size()); String processedFile = openTSDBConfiguration.getProcessedFile(); String rootDir = openTSDBConfiguration.getRootDir(); ArrayList<String> processedFiles = new ArrayList<String>(); File processedFileContents = new File(processedFile); getProcessedFiles(processedFileContents, processedFiles); ArrayList<String> messageFiles = new ArrayList<String>(); File rootDirContents = new File(rootDir); getDirectoryContents(rootDirContents, processedFiles, messageFiles); XSSFWorkbook workbook; XSSFSheet sheetOut, sheetOut2; if (processedFiles.size() > 1) { workbook = readFile(openTSDBConfiguration.getIdMatch()); sheetOut = workbook.getSheetAt(0); sheetOut2 = workbook.getSheetAt(1); } else { workbook = new XSSFWorkbook(); sheetOut = workbook.createSheet("idMatch"); sheetOut2 = workbook.createSheet(openTSDBConfiguration.getIdMatchSheet()); } for (String filePath : messageFiles) { System.out.println(" File: " + filePath); FileReader reader = new FileReader(filePath); Hl7InputStreamMessageIterator iter = new Hl7InputStreamMessageIterator(reader); while (iter.hasNext()) { HashMap<String, String> tags = new HashMap<String, String>(); Message next = iter.next(); ORU_R01 oru = new ORU_R01(); oru.parse(next.encode()); PatientInfo patInfo = new PatientInfo(); if (Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 5, 0, 2, 1) != null) patInfo.setFirstName(Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 5, 0, 2, 1).trim()); if (Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 5, 0, 1, 1) != null) patInfo.setLastName(Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 5, 0, 1, 1).trim()); if (Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 7, 0, 1, 1) != null) patInfo.setBirthDateTime( Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 7, 0, 1, 1).trim()); if (Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 8, 0, 1, 1) != null) patInfo.setGender(Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 8, 0, 1, 1).trim()); if (Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 23, 0, 1, 1) != null) patInfo.setBirthplace(Terser.get(oru.getRESPONSE().getPATIENT().getPID(), 23, 0, 1, 1).trim()); LinkedList<String> locations = new LinkedList<String>(); LinkedList<String> variables = new LinkedList<String>(); if (idMatch.get(patInfo.getHash()) != null) { patInfo = idMatch.get(patInfo.getHash()); locations = patInfo.getLocations(); variables = patInfo.getVariables(); } if (!locations .contains(Terser.get(oru.getRESPONSE().getPATIENT().getVISIT().getPV1(), 3, 0, 1, 1))) { locations.add(Terser.get(oru.getRESPONSE().getPATIENT().getVISIT().getPV1(), 3, 0, 1, 1)); if (locations.peekLast().startsWith("ZB04")) patInfo.setPicuSubject(true); } tags.put("subjectId", patInfo.getHash()); String time = Terser.get(oru.getRESPONSE().getORDER_OBSERVATION().getOBR(), 7, 0, 1, 1); Date timepoint = fromUser.parse(time); String reformattedTime = myFormat.format(timepoint); if (patInfo.getEarliestDataPoint().equalsIgnoreCase("")) { patInfo.setEarliestDataPoint(reformattedTime); } List<ORU_R01_OBSERVATION> observations = oru.getRESPONSE().getORDER_OBSERVATION() .getOBSERVATIONAll(); for (ORU_R01_OBSERVATION observation : observations) { String seriesName = Terser.get(observation.getOBX(), 3, 0, 1, 1); if (measurementNames.get(seriesName) != null) { seriesName = measurementNames.get(seriesName); } else { seriesName = seriesName.replaceFirst("\\d", "#"); seriesName = measurementNames.get(seriesName); } StringBuffer buff = new StringBuffer(); String[] tokens = seriesName.split(" "); for (String i : tokens) { i = i.replaceAll("\\(", ""); i = i.replaceAll("\\)", ""); buff.append(StringUtils.capitalize(i)); } String measurementValue = Terser.get(observation.getOBX(), 5, 0, 1, 1); String units = Terser.get(observation.getOBX(), 6, 0, 1, 1); if (units != null) { units = units.replaceAll(":", ""); units = units.replaceAll("cm_h2o", "cmH2O"); units = units.replaceAll("\\(min/m2\\)", "MinPerMeterSquared"); units = units.replaceAll("l", "liters"); units = units.replaceAll("mliters", "milliliters"); units = units.replaceAll("g.m", "gramMeters"); units = units.replaceAll("dyn.sec.cm-5", "dyneSecondsPerQuinticCentimeter"); units = units.replaceAll("dyneSecondsPerQuinticCentimeter.m2", "dyneSecondsPerQuinticCentimeterPerMeterSquared"); units = units.replaceAll("m2", "MeterSquared"); units = units.replaceAll("min", "Min"); units = units.replaceAll("/", "Per"); units = units.replaceAll("%", "percent"); units = units.replaceAll("#", "Count"); units = units.replaceAll("celiters", "Celsius"); units = units.replaceAll("mm\\(hg\\)", "mmHg"); } else { units = "percent"; } seriesName = "vitals." + StringUtils.uncapitalize(units); seriesName += "." + StringUtils.uncapitalize(buff.toString()); seriesName = seriesName.trim(); if (!variables.contains(StringUtils.uncapitalize(buff.toString()))) variables.add(StringUtils.uncapitalize(buff.toString())); IncomingDataPoint dataPoint = new IncomingDataPoint(seriesName, timepoint.getTime(), measurementValue, tags); TimeSeriesStorer.storeTimePoint(urlString, dataPoint); } patInfo.setLocations(locations); patInfo.setVariables(variables); idMatch.put(patInfo.getHash(), patInfo); } System.out.println(" Subject Count: " + idMatch.size()); int rowNum = 0; Set<String> keys = idMatch.keySet(); TreeSet<String> sortedKeys = new TreeSet<String>(keys); for (String key : sortedKeys) { XSSFRow row = sheetOut.createRow(rowNum); XSSFRow row2 = sheetOut2.createRow(rowNum); XSSFCell cell, cell2; if (rowNum == 0) { cell = row.createCell(0); cell.setCellValue("Count"); cell = row.createCell(1); cell.setCellValue("PICU Subject?"); cell = row.createCell(2); cell.setCellValue("Hash"); cell = row.createCell(3); cell.setCellValue("First Name"); cell = row.createCell(4); cell.setCellValue("Last Name"); cell = row.createCell(5); cell.setCellValue("Birth Date/Time"); cell = row.createCell(6); cell.setCellValue("Gender"); cell = row.createCell(7); cell.setCellValue("Birthplace"); cell = row.createCell(8); cell.setCellValue("First Time Point"); cell = row.createCell(9); cell.setCellValue("Location Count"); cell = row.createCell(10); cell.setCellValue("Locations"); cell = row.createCell(11); cell.setCellValue("Variable Count"); cell = row.createCell(12); cell.setCellValue("Variables"); cell2 = row2.createCell(0); cell2.setCellValue("Count"); cell2 = row2.createCell(1); cell2.setCellValue("PICU Subject?"); cell2 = row2.createCell(2); cell2.setCellValue("Hash"); cell2 = row2.createCell(3); cell2.setCellValue("First Name"); cell2 = row2.createCell(4); cell2.setCellValue("Last Name"); cell2 = row2.createCell(5); cell2.setCellValue("Birth Date/Time"); cell2 = row2.createCell(6); cell2.setCellValue("Gender"); cell2 = row2.createCell(7); cell2.setCellValue("Birthplace"); cell2 = row2.createCell(8); cell2.setCellValue("First Time Point"); cell2 = row2.createCell(9); cell2.setCellValue("Location Count"); cell2 = row2.createCell(10); cell2.setCellValue("Locations"); cell2 = row2.createCell(11); cell2.setCellValue("Variable Count"); cell2 = row2.createCell(12); cell2.setCellValue("Variables"); } else { cell = row.createCell(0); cell.setCellValue(rowNum); cell = row.createCell(1); cell.setCellValue(idMatch.get(key).isPicuSubject()); cell = row.createCell(2); cell.setCellValue(key); cell = row.createCell(3); cell.setCellValue(idMatch.get(key).getFirstName()); cell = row.createCell(4); cell.setCellValue(idMatch.get(key).getLastName()); cell = row.createCell(5); cell.setCellValue(idMatch.get(key).getBirthDateTime()); cell = row.createCell(6); cell.setCellValue(idMatch.get(key).getGender()); cell = row.createCell(7); cell.setCellValue(idMatch.get(key).getBirthplace()); cell = row.createCell(8); cell.setCellValue(idMatch.get(key).getEarliestDataPoint()); cell = row.createCell(9); cell.setCellValue(idMatch.get(key).getLocations().size()); cell = row.createCell(10); cell.setCellValue(idMatch.get(key).getLocations().toString()); cell = row.createCell(11); cell.setCellValue(idMatch.get(key).getVariables().size()); cell = row.createCell(12); cell.setCellValue(idMatch.get(key).getVariables().toString()); if (idMatch.get(key).isPicuSubject()) { cell2 = row2.createCell(0); cell2.setCellValue(rowNum); cell2 = row2.createCell(1); cell2.setCellValue(idMatch.get(key).isPicuSubject()); cell2 = row2.createCell(2); cell2.setCellValue(key); cell2 = row2.createCell(3); cell2.setCellValue(idMatch.get(key).getFirstName()); cell2 = row2.createCell(4); cell2.setCellValue(idMatch.get(key).getLastName()); cell2 = row2.createCell(5); cell2.setCellValue(idMatch.get(key).getBirthDateTime()); cell2 = row2.createCell(6); cell2.setCellValue(idMatch.get(key).getGender()); cell2 = row2.createCell(7); cell2.setCellValue(idMatch.get(key).getBirthplace()); cell2 = row2.createCell(8); cell2.setCellValue(idMatch.get(key).getEarliestDataPoint()); cell2 = row2.createCell(9); cell2.setCellValue(idMatch.get(key).getLocations().size()); cell2 = row2.createCell(10); cell2.setCellValue(idMatch.get(key).getLocations().toString()); cell2 = row2.createCell(11); cell2.setCellValue(idMatch.get(key).getVariables().size()); cell2 = row2.createCell(12); cell2.setCellValue(idMatch.get(key).getVariables().toString()); } } rowNum++; } } if (messageFiles.size() > 0) { try { FileOutputStream out = new FileOutputStream(new File(openTSDBConfiguration.getIdMatch())); workbook.write(out); out.close(); System.out.println("Excel written successfully..."); PrintWriter writer = new PrintWriter(rootDir + "done.txt", "UTF-8"); for (String filePath : processedFiles) { writer.println(filePath); } for (String filePath : messageFiles) { writer.println(filePath); } writer.close(); System.out.println("done.txt written successfully..."); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } else { System.out.println("Nothing new to process..."); } } private static ArrayList<String> getProcessedFiles(File processedFileRecord, ArrayList<String> processedFiles) { try { FileReader fr = new FileReader(processedFileRecord); BufferedReader br = new BufferedReader(fr); String filePath; while ((filePath = br.readLine()) != null) { processedFiles.add(filePath); } br.close(); fr.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return processedFiles; } private static ArrayList<String> getDirectoryContents(File dir, ArrayList<String> processedFiles, ArrayList<String> messageFiles) { try { File[] files = dir.listFiles(); for (File file : files) { if (file.isDirectory()) { messageFiles = getDirectoryContents(file, processedFiles, messageFiles); } else { if ((file.getCanonicalPath().endsWith(".txt")) || (file.getCanonicalPath().endsWith(".msg"))) if (!(processedFiles.contains(file.getCanonicalPath()))) messageFiles.add(file.getCanonicalPath()); } } } catch (IOException e) { e.printStackTrace(); } return messageFiles; } /** * creates an {@link HSSFWorkbook} the specified OS filename. */ private static XSSFWorkbook readFile(String filename) throws IOException { return new XSSFWorkbook(new FileInputStream(filename)); } /** * @return the configFilename */ public String getConfigFilename() { return configFilename; } /** * @return the openTSDBConfiguration */ public OpenTSDBConfiguration getOpenTSDBConfiguration() { return openTSDBConfiguration; } /** * @param openTSDBConfiguration the openTSDBConfiguration to set */ public void setOpenTSDBConfiguration(OpenTSDBConfiguration openTSDBConfiguration) { this.openTSDBConfiguration = openTSDBConfiguration; } }