org.nema.medical.mint.dcm2mint.ProcessImportDir.java Source code

Introduction

Here is the source code for org.nema.medical.mint.dcm2mint.ProcessImportDir.java
Source

/*
 *   Copyright 2010 MINT Working Group
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */

package org.nema.medical.mint.dcm2mint;

import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.InputStreamBody;
import org.apache.http.entity.mime.content.StringBody;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.log4j.Logger;
import org.dcm4che2.data.DicomObject;
import org.dcm4che2.data.Tag;
import org.dcm4che2.data.TransferSyntax;
import org.dcm4che2.io.DicomInputStream;
import org.dcm4che2.io.StopTagInputHandler;
import org.nema.medical.mint.datadictionary.*;
import org.nema.medical.mint.jobs.HttpMessagePart;
import org.nema.medical.mint.metadata.StudyIO;
import org.nema.medical.mint.metadata.StudyMetadata;
import org.nema.medical.mint.utils.StudyTraversals;
import org.nema.medical.mint.utils.StudyValidation;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.*;
import java.net.URI;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentSkipListSet;

import static org.nema.medical.mint.utils.Iter.iter;

/**
 * @author Uli Bubenheimer
 */
public final class ProcessImportDir {
    private static final Logger LOG = Logger.getLogger(ProcessImportDir.class);

    private final Map<String, JobInfo> jobIDInfo = Collections.synchronizedMap(new HashMap<String, JobInfo>());
    private final boolean deletePhysicalInstanceFiles;
    private final boolean forceCreate;
    private final int binaryInlineThreshold;
    private MetadataType dicomMetadataType = null;
    private LevelAttributes studyLevelTags;
    private LevelAttributes seriesLevelTags;
    private static final XPath xPath = XPathFactory.newInstance().newXPath();
    private static final DocumentBuilder documentBuilder;
    static {
        final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        documentBuilderFactory.setCoalescing(true);
        documentBuilderFactory.setNamespaceAware(false);
        documentBuilderFactory.setValidating(false);
        try {
            documentBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
                    false);
            documentBuilder = documentBuilderFactory.newDocumentBuilder();
        } catch (final ParserConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    public ProcessImportDir(final File importDir, final URI serverURI, final boolean useXMLNotGPB,
            final boolean deletePhysicalInstanceFiles, final boolean forceCreate, final int binaryInlineThreshold)
            throws IOException {
        this.importDir = importDir;
        this.createURI = URI.create(serverURI + "/jobs/createstudy");
        this.queryURI = URI.create(serverURI + "/studies");
        this.updateURI = URI.create(serverURI + "/jobs/updatestudy");
        this.jobStatusURI = URI.create(serverURI + "/jobs/status");
        this.dicomDatadictionaryURI = URI.create(serverURI + "/types/DICOM");
        this.useXMLNotGPB = useXMLNotGPB;
        this.deletePhysicalInstanceFiles = deletePhysicalInstanceFiles;
        this.forceCreate = forceCreate;
        this.binaryInlineThreshold = binaryInlineThreshold;
    }

    public void processDir() throws IOException {
        //only re-initialize this if it hasn't been initialized.
        //Initialization cannot take place in the constructor because this
        //depends on the server to be already up and running. The server
        //startup creates this class so it would be a catch22.
        if (dicomMetadataType == null) {
            HttpGet httpGet = new HttpGet(dicomDatadictionaryURI);
            String response = httpClient.execute(httpGet, new BasicResponseHandler());
            InputStream in = new ByteArrayInputStream(response.getBytes());
            this.dicomMetadataType = DataDictionaryIO.parseFromXML(in);
            this.studyLevelTags = dicomMetadataType.getStudyAttributes();
            this.seriesLevelTags = dicomMetadataType.getSeriesAttributes();
        }
        LOG.info("Gathering files for allocation to studies.");
        final long fileGatherStart = System.currentTimeMillis();
        //As items may be removed from the set of handled files concurrently,
        //create a copy of a maximum set first; it won't hurt if it contains too many elements.
        final SortedSet<File> handledFilesCopy = new TreeSet<File>(handledFiles);
        final SortedSet<File> resultFiles = new TreeSet<File>();
        findPlainFilesRecursive(importDir, resultFiles, handledFilesCopy);
        handledFiles.addAll(resultFiles);
        int instancesProcessed = 0;
        final Map<String, Collection<File>> studyFileMap = new HashMap<String, Collection<File>>();
        for (final File plainFile : resultFiles) {
            try {
                final String studyUID;
                final DicomInputStream dcmStream = new DicomInputStream(plainFile);
                try {
                    //Only read & parse DICOM file up to StudyInstanceUID tag
                    dcmStream.setHandler(new StopTagInputHandler(Tag.StudyInstanceUID + 1));
                    final DicomObject dcmObj = dcmStream.readDicomObject();
                    studyUID = Dcm2MetaBuilder.extractStudyInstanceUID(dcmObj);
                    if (studyUID == null) {
                        throw new IOException(
                                "DICOM file without study instance UID - skipping: " + plainFile.getPath());
                    }
                } finally {
                    dcmStream.close();
                }

                Collection<File> dcmFileData = studyFileMap.get(studyUID);
                if (dcmFileData == null) {
                    dcmFileData = new ArrayList<File>();
                    studyFileMap.put(studyUID, dcmFileData);
                }
                dcmFileData.add(plainFile);
                ++instancesProcessed;
                if (instancesProcessed % 500 == 1) {
                    LOG.info("Inspected " + instancesProcessed + " instances.");
                }
            } catch (final IOException e) {
                //Not a valid DICOM file?!
                LOG.warn("Skipping file: " + plainFile);
                e.printStackTrace();
            }
        }
        final long fileGatherEnd = System.currentTimeMillis();
        LOG.info("Gathering " + instancesProcessed + " files and study allocation completed in "
                + String.format("%.1f", (fileGatherEnd - fileGatherStart) / 1000.0f) + " seconds.");

        instancesProcessed = 0;

        outerLoop: for (final Map.Entry<String, Collection<File>> studyFiles : studyFileMap.entrySet()) {
            final String studyUID = studyFiles.getKey();
            assert studyUID != null;
            final Collection<File> instanceFiles = studyFiles.getValue();
            final int instanceFileCount = instanceFiles.size();
            LOG.info("Creating MINT metadata for " + instanceFileCount + " instances of study instance UID "
                    + studyUID + ".");
            final long mintConvertStart = System.currentTimeMillis();
            final BinaryData binaryData = new BinaryDcmData();
            final MetaBinaryPairImpl metaBinaryPair = new MetaBinaryPairImpl();
            metaBinaryPair.getMetadata().setType("DICOM");
            metaBinaryPair.setBinaryData(binaryData);
            //Constrain processing
            metaBinaryPair.getMetadata().setStudyInstanceUID(studyUID);
            final Dcm2MetaBuilder builder = new Dcm2MetaBuilder(studyLevelTags, seriesLevelTags, metaBinaryPair);
            builder.setBinaryInlineThreshold(binaryInlineThreshold);
            final Iterator<File> instanceFileIter = instanceFiles.iterator();
            while (instanceFileIter.hasNext()) {
                final File instanceFile = instanceFileIter.next();
                final TransferSyntax transferSyntax;
                final DicomObject dcmObj;
                try {
                    final DicomInputStream dcmStream = new DicomInputStream(instanceFile);
                    try {
                        dcmObj = dcmStream.readDicomObject();
                        transferSyntax = dcmStream.getTransferSyntax();
                    } finally {
                        dcmStream.close();
                    }
                } catch (final IOException e) {
                    //Not a valid DICOM file?!
                    LOG.warn("Skipping file: " + instanceFile);
                    e.printStackTrace();
                    instanceFileIter.remove();
                    continue;
                } catch (final RuntimeException e) {
                    //Some near-catastrophic error
                    LOG.error("Fatal error while processing file: " + instanceFile);
                    throw e;
                } catch (final Error e) {
                    //Some catastrophic error
                    LOG.fatal("Fatal error while processing file: " + instanceFile);
                    throw e;
                }
                try {
                    builder.accumulateFile(instanceFile, dcmObj, transferSyntax);
                } catch (final UnsupportedOperationException e) {
                    LOG.error("Skipping study " + studyUID + ": DICOM syntax error in file " + instanceFile + ":",
                            e);
                    continue outerLoop;
                }
            }
            builder.finish();

            try {
                try {
                    StudyValidation.validateStudyMetadata(metaBinaryPair.getMetadata(), dicomMetadataType);
                    addToSendQueue(metaBinaryPair, instanceFiles);
                } catch (final StudyTraversals.TraversalException e) {
                    LOG.error("Skipping study " + studyUID + ": validation error in study metadata", e);
                }
            } catch (final IOException e) {
                //Catastrophic error
                throw new RuntimeException(e);
            }

            final long mintConvertEnd = System.currentTimeMillis();
            LOG.info("MINT creation for study instance UID " + studyUID + " (" + instanceFileCount
                    + " instances) completed in "
                    + String.format("%.1f", (mintConvertEnd - mintConvertStart) / 1000.0f) + " seconds.");
        }
    }

    private void addToSendQueue(final MetaBinaryPair studyData, final Collection<File> studyFiles)
            throws IOException {
        //Write metadata to disk so that we don't run out of memory while working on more studies
        final StudyMetadata study = studyData.getMetadata();
        final File studyMetaFile = File.createTempFile("metadata", useXMLNotGPB ? ".xml" : ".gpb");
        studyMetaFile.deleteOnExit();
        final OutputStream outStream = new BufferedOutputStream(new FileOutputStream(studyMetaFile));
        try {
            if (useXMLNotGPB) {
                StudyIO.writeToXML(study, outStream);
            } else {
                StudyIO.writeToGPB(study, outStream);
            }
        } finally {
            outStream.close();
        }

        final MetaBinaryFiles mbf = new MetaBinaryFiles();
        mbf.studyInstanceUID = study.getStudyInstanceUID();
        mbf.patientID = study.getValueForAttribute(Tag.PatientID);
        mbf.metadataFile = studyMetaFile;
        mbf.binaryData = studyData.getBinaryData();
        mbf.studyInstanceFiles = studyFiles;
        studySendQueue.add(mbf);
    }

    /**
     * @throws Exception
     */
    public void handleSends() throws Exception {
        if (!jobIDInfo.isEmpty()) {
            //To prevent disk churn leading to slowness on the server side, have it handle just one study at a time
            return;
        }
        final Iterator<MetaBinaryFiles> studySendIter = studySendQueue.iterator();
        if (studySendIter.hasNext()) {
            final MetaBinaryFiles sendData = studySendIter.next();
            studySendIter.remove();

            //Determine whether study exists and we need to perform an update
            final String studyInstanceUID = sendData.studyInstanceUID;
            LOG.info("Uploading study instance UID " + studyInstanceUID);
            final StudyQueryInfo studyQueryInfo;
            if (forceCreate) {
                studyQueryInfo = null;
            } else {
                final long existQueryStart = System.currentTimeMillis();
                studyQueryInfo = doesStudyExist(studyInstanceUID, sendData.patientID);
                final long existQueryEnd = System.currentTimeMillis();
                final StringBuilder msg = new StringBuilder(
                        "Completed querying server for existing study for study instance UID " + studyInstanceUID
                                + " in " + String.format("%.1f", (existQueryEnd - existQueryStart) / 1000.0f)
                                + " seconds. ");
                if (studyQueryInfo == null) {
                    msg.append("Study does not exist yet.");
                } else {
                    msg.append("Study exists, Study ID " + studyQueryInfo.studyUUID);
                }
                LOG.info(msg);
            }

            try {
                final long uploadStart = System.currentTimeMillis();
                final JobInfo jobInfo = send(sendData.metadataFile, sendData.binaryData,
                        sendData.studyInstanceFiles, studyQueryInfo);
                final long uploadEnd = System.currentTimeMillis();
                assert studyQueryInfo == null || studyQueryInfo.studyUUID.equals(jobInfo.studyID);
                LOG.info("Completed uploading MINT to server for study instance UID " + studyInstanceUID + " ("
                        + sendData.studyInstanceFiles.size() + " instances) in "
                        + String.format("%.1f", (uploadEnd - uploadStart) / 1000.0f) + " seconds, Job ID "
                        + jobInfo.getJobID() + ", Study ID " + jobInfo.getStudyID());
            } catch (final IOException e) {
                LOG.error("Skipping study " + studyInstanceUID + ": I/O error while uploading study to server:", e);
            } catch (final SAXException e) {
                LOG.error("Skipping study " + studyInstanceUID
                        + ": error while parsing server response to study upload.");
            } finally {
                sendData.metadataFile.delete();
            }
        }
    }

    public boolean sendingDone() {
        return studySendQueue.isEmpty() && jobIDInfo.isEmpty();
    }

    private StudyQueryInfo doesStudyExist(final String studyInstanceUID, final String patientID) throws Exception {
        final List<NameValuePair> qparams = new ArrayList<NameValuePair>();
        qparams.add(new BasicNameValuePair("studyInstanceUID", studyInstanceUID));
        qparams.add(new BasicNameValuePair("patientID", patientID == null ? "" : patientID));
        final String fullURI = queryURI + "?" + URLEncodedUtils.format(qparams, "UTF-8");
        final HttpGet httpGet = new HttpGet(fullURI);
        final String response = httpClient.execute(httpGet, new BasicResponseHandler());
        final NodeList nodeList;
        try {
            final Document responseDoc = documentBuilder.parse(new ByteArrayInputStream(response.getBytes()));
            nodeList = (NodeList) xPath.evaluate("/studySearchResults/study", responseDoc, XPathConstants.NODESET);
        } catch (final Exception ex) {
            LOG.error("Querying for studyUID " + studyInstanceUID + ": unknown server response:\n" + response);
            throw ex;
        }
        final int uuidCount = nodeList.getLength();
        switch (uuidCount) {
        case 0:
            return null;
        case 1:
            final Node node = nodeList.item(0);
            final StudyQueryInfo studyQueryInfo = new StudyQueryInfo();
            final NamedNodeMap attrMap = node.getAttributes();
            studyQueryInfo.studyUUID = attrMap.getNamedItem("studyUUID").getNodeValue();
            studyQueryInfo.studyVersion = attrMap.getNamedItem("version").getNodeValue();
            return studyQueryInfo;
        default:
            throw new Exception("Multiple matches for study UID " + studyInstanceUID);
        }
    }

    /**
     * @throws IOException
     */
    public void handleResponses() throws IOException {
        final ResponseHandler<String> responseHandler = new BasicResponseHandler();
        final Iterator<Entry<String, JobInfo>> studyIter = jobIDInfo.entrySet().iterator();
        while (studyIter.hasNext()) {
            final Entry<String, JobInfo> studyEntry = studyIter.next();
            final String jobID = studyEntry.getKey();
            final HttpGet httpGet = new HttpGet(jobStatusURI + "/" + jobID);
            final String response = httpClient.execute(httpGet, responseHandler);

            LOG.debug("Server job status response:\n" + response);

            final String statusStr;
            try {
                final Document responseDoc = documentBuilder.parse(new ByteArrayInputStream(response.getBytes()));
                statusStr = xPath.evaluate("/jobStatus/@jobStatus", responseDoc);
            } catch (final Exception ex) {
                LOG.error("Querying job " + jobID + ": unknown server response:\n" + response);
                studyIter.remove();
                continue;
            }

            final JobInfo jobInfo = studyEntry.getValue();
            final Collection<File> studyFiles = jobInfo.getFiles();
            if (statusStr.equals("IN_PROGRESS")) {
                continue;
            } else if (statusStr.equals("FAILED")) {
                LOG.error("Querying job " + jobID + ": server processing failed:\n" + response);
                //Do not delete the study's files in case of failure
                removeStudyFiles(studyFiles, false);
            } else if (statusStr.equals("SUCCESS")) {
                final long approxJobEndTime = System.currentTimeMillis();
                //Always round down the job processing time, as it's usually too high anyway
                LOG.info("Querying job " + jobID + ": server processing completed in approximately "
                        + ((approxJobEndTime - jobInfo.getJobStartTime()) / 1000) + " seconds for "
                        + studyFiles.size() + " instance files, Study ID " + jobInfo.getStudyID() + ".");
                removeStudyFiles(studyFiles, true);
            } else {
                LOG.error("Querying job " + jobID + ": unknown server response:\n" + response);
                //Do not delete the study's files in case of failure
                removeStudyFiles(studyFiles, false);
            }

            studyIter.remove();
        }
    }

    private void removeStudyFiles(final Collection<File> studyFiles, final boolean delete) {
        if (delete && deletePhysicalInstanceFiles) {
            for (final File undeletedStudyFile : studyFiles) {
                undeletedStudyFile.delete();
                handledFiles.remove(undeletedStudyFile);
            }
        }
    }

    private JobInfo send(final File metadataFile, final BinaryData binaryData, final Collection<File> studyFiles,
            final StudyQueryInfo studyQueryInfo) throws IOException, SAXException {
        final HttpPost httpPost = new HttpPost(studyQueryInfo == null ? createURI : updateURI);
        final MultipartEntity entity = new MultipartEntity();

        if (studyQueryInfo != null) {
            entity.addPart(HttpMessagePart.STUDY_UUID.toString(), new StringBody(studyQueryInfo.studyUUID));
        }

        final StudyMetadata study = useXMLNotGPB ? StudyIO.parseFromXML(metadataFile)
                : StudyIO.parseFromGPB(metadataFile);
        if (studyQueryInfo != null) {
            //Specify current study version
            entity.addPart(HttpMessagePart.OLD_VERSION.toString(), new StringBody(studyQueryInfo.studyVersion));
        }

        //Pretty significant in-memory operations, so scoping to get references released ASAP
        {
            final byte[] metaInBuffer;
            {
                final ByteArrayOutputStream metaOut = new ByteArrayOutputStream(10000);
                if (useXMLNotGPB) {
                    StudyIO.writeToXML(study, metaOut);
                } else {
                    StudyIO.writeToGPB(study, metaOut);
                }
                metaInBuffer = metaOut.toByteArray();
            }
            final ByteArrayInputStream metaIn = new ByteArrayInputStream(metaInBuffer);
            //We must distinguish MIME types for GPB vs. XML so that the server can handle them properly
            entity.addPart(metadataFile.getName(), new InputStreamBody(metaIn,
                    useXMLNotGPB ? "text/xml" : "application/octet-stream", metadataFile.getName()));
        }

        //We support only one type
        assert binaryData instanceof BinaryDcmData;
        {
            int i = 0;
            for (final InputStream binaryStream : iter(((BinaryDcmData) binaryData).streamIterator())) {
                final String fileName = "binary" + i++;
                entity.addPart(fileName, new InputStreamBody(binaryStream, fileName));
            }
        }

        //Debugging only
        //        int i = 0;
        //        for (final InputStream binaryStream: Iter.iter(((BinaryDcmData) binaryData).streamIterator())) {
        //            final OutputStream testout = new BufferedOutputStream(new FileOutputStream("E:/testdata/" + i), 10000);
        //            for(;;) {
        //                final int val = binaryStream.read();
        //                if (val == -1) {
        //                    break;
        //                }
        //                testout.write(val);
        //            }
        //            testout.close();
        //            ++i;
        //        }

        httpPost.setEntity(entity);

        final String response = httpClient.execute(httpPost, new BasicResponseHandler());
        final long uploadEndTime = System.currentTimeMillis();

        LOG.debug("Server response:" + response);

        final String jobID;
        final String studyID;
        final Document responseDoc = documentBuilder.parse(new ByteArrayInputStream(response.getBytes()));
        try {
            jobID = xPath.evaluate("/jobStatus/@jobID", responseDoc).trim();
            studyID = xPath.evaluate("/jobStatus/@studyUUID", responseDoc).trim();
        } catch (final XPathExpressionException e) {
            //This shouldn't happen
            throw new RuntimeException(e);
        }
        final JobInfo jobInfo = new JobInfo(jobID, studyID, studyFiles, uploadEndTime);
        jobIDInfo.put(jobID, jobInfo);
        return jobInfo;
    }

    private static void findPlainFilesRecursive(final File targetFile, final Collection<File> resultFiles,
            final Collection<File> handledFiles) {
        if (handledFiles.contains(targetFile)) {
            //Need to check whether file has already been handled right away, as otherwise the file may get
            //accessed or deleted by someone else while we're looking at it.
            return;
        }
        //Skip DICOM files which are not completely stored by dcmrcv yet.
        if (targetFile.getName().endsWith(".part")) {
            return;
        }
        if (targetFile.isFile()) {
            resultFiles.add(targetFile);
        } else if (targetFile.isDirectory()) {
            for (final File subFile : targetFile.listFiles()) {
                findPlainFilesRecursive(subFile, resultFiles, handledFiles);
            }
        } else {
            throw new RuntimeException(
                    "File " + targetFile.getAbsolutePath() + " is not a normal file and not a directory");
        }
    }

    private static class MetaBinaryFiles {
        public String studyInstanceUID;
        public String patientID;
        public File metadataFile;
        public BinaryData binaryData;
        public Collection<File> studyInstanceFiles;
    }

    private final HttpClient httpClient = new DefaultHttpClient();
    private final File importDir;
    private final SortedSet<File> handledFiles = new ConcurrentSkipListSet<File>();
    private final URI createURI;
    private final URI queryURI;
    private final URI updateURI;
    private final URI dicomDatadictionaryURI;
    private final URI jobStatusURI;
    private final boolean useXMLNotGPB;
    private final Collection<MetaBinaryFiles> studySendQueue = new ConcurrentLinkedQueue<MetaBinaryFiles>();

    private static final class JobInfo {
        private final String jobID;
        private final String studyID;
        private final Collection<File> files;
        private final long jobStartTime;

        public JobInfo(final String jobID, final String studyID, final Collection<File> files,
                final long jobStartTime) {
            this.jobID = jobID;
            this.studyID = studyID;
            this.files = files;
            this.jobStartTime = jobStartTime;
        }

        public final String getJobID() {
            return jobID;
        }

        public final String getStudyID() {
            return studyID;
        }

        public final Collection<File> getFiles() {
            return files;
        }

        public final long getJobStartTime() {
            return jobStartTime;
        }
    }

    private static final class StudyQueryInfo {
        public String studyUUID;
        public String studyVersion;
    }
}