edu.lternet.pasta.portal.HarvesterServlet.java Source code

Java tutorial

Introduction

Here is the source code for edu.lternet.pasta.portal.HarvesterServlet.java

Source

/*
 *
 * $Date: 2012-04-02 11:10:19 -0700 (Mon, 02 Apr 2012) $
 * $Author: dcosta $
 * $Revision: $
 *
 * Copyright 2011,2012 the University of New Mexico.
 *
 * This work was supported by National Science Foundation Cooperative
 * Agreements #DEB-0832652 and #DEB-0936498.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific
 * language governing permissions and limitations under the License.
 *
 */

package edu.lternet.pasta.portal;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import javax.servlet.RequestDispatcher;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.annotation.MultipartConfig;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import javax.servlet.http.Part;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.apache.xpath.CachedXPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import edu.lternet.pasta.client.PastaAuthenticationException;
import edu.lternet.pasta.common.UserErrorException;
import edu.lternet.pasta.common.XmlUtility;
import edu.lternet.pasta.common.eml.DataPackage;
import edu.lternet.pasta.common.eml.EMLParser;
import edu.lternet.pasta.common.eml.Entity;
import edu.ucsb.nceas.utilities.IOUtil;
import edu.ucsb.nceas.utilities.XMLUtilities;

/**
 * 
 * @author dcosta
 *
 */
@MultipartConfig
public class HarvesterServlet extends DataPortalServlet {

    /*
     * Class variables
     */

    private static String desktopUrlHead = null;
    private static String harvesterPath = null;
    private static final Logger logger = Logger.getLogger(edu.lternet.pasta.portal.HarvesterServlet.class);
    private static final long serialVersionUID = 1L;
    public static final String DESKTOP_DATA_DIR = "data";

    /*
     * Instance variables
     */

    private final String CHECK_BACK_LATER = "This may take a few minutes. Please check the "
            + "<a href=\"./harvestReport.jsp\">View Evaluate/Upload Results</a> page " + "for available reports.";

    private final String DESKTOP_FILE_NAME = "DESKTOP-EML.xml";
    private final String PASTA_READY_FILE_NAME = "PASTA-READY-EML.xml";

    /*
     * Constructors
     */

    /*
     * Class methods
     */

    /**
     * The doGet method of the servlet. <br>
     * 
     * This method is called when a form has its tag value method equals to get.
     * 
     * @param request
     *          the request send by the client to the server
     * @param response
     *          the response send by the server to the client
     * @throws ServletException
     *           if an error occurred
     * @throws IOException
     *           if an error occurred
     */
    public void doGet(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {

        // Pass request on to "doPost".
        doPost(request, response);
    }

    /**
     * The doPost method of the servlet. <br>
     * 
     * This method is called when a form has its tag value method equals to post.
     * 
     * @param request
     *          the request send by the client to the server
     * @param response
     *          the response send by the server to the client
     * @throws ServletException
     *           if an error occurred
     * @throws IOException
     *           if an error occurred
     */
    public void doPost(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        HttpSession httpSession = request.getSession();
        ServletContext servletContext = httpSession.getServletContext();
        ArrayList<String> documentURLs = null;
        File emlFile = null;
        String emlTextArea = null;
        Harvester harvester = null;
        String harvestId = null;
        String harvestListURL = null;
        String harvestReportId = null;
        boolean isDesktopUpload = false;
        boolean isEvaluate = false;
        String uid = (String) httpSession.getAttribute("uid");
        String urlTextArea = null;
        String warningMessage = "";

        try {
            if (uid == null) {
                throw new PastaAuthenticationException(LOGIN_WARNING);
            } else {
                /*
                 * The "metadataSource" request parameter can have a value of
                 * "emlText", "emlFile", "urlList", "harvestList", or
                 * "desktopHarvester". It is set as a hidden input field in 
                 * each of the harvester forms.
                 */
                String metadataSource = request.getParameter("metadataSource");

                /*
                 * "mode" can have a value of "evaluate" or "upgrade". It is set
                 * as the value of the submit button in each of the harvester
                 * forms.
                 */
                String mode = request.getParameter("submit");
                if ((mode != null) && (mode.equalsIgnoreCase("evaluate"))) {
                    isEvaluate = true;
                }

                if ((metadataSource != null) && (!metadataSource.equals("desktopHarvester"))) {
                    harvestId = generateHarvestId();
                    if (isEvaluate) {
                        harvestReportId = uid + "-evaluate-" + harvestId;
                    } else {
                        harvestReportId = uid + "-upload-" + harvestId;
                    }
                }

                if (metadataSource != null) {
                    if (metadataSource.equals("emlText")) {
                        emlTextArea = request.getParameter("emlTextArea");
                        if (emlTextArea == null || emlTextArea.trim().isEmpty()) {
                            warningMessage = "<p class=\"warning\">Please enter the text of an EML document into the text area.</p>";
                        }
                    } else if (metadataSource.equals("emlFile")) {
                        Collection<Part> parts = request.getParts();
                        for (Part part : parts) {
                            if (part.getContentType() != null) {
                                // save EML file to disk
                                emlFile = processUploadedFile(part);
                            } else {
                                /*
                                 * Parse the request parameters.
                                 */
                                String fieldName = part.getName();
                                String fieldValue = request.getParameter(fieldName);
                                if (fieldName != null && fieldValue != null) {
                                    if (fieldName.equals("submit") && fieldValue.equalsIgnoreCase("evaluate")) {
                                        isEvaluate = true;
                                    } else if (fieldName.equals("desktopUpload")
                                            && fieldValue.equalsIgnoreCase("1")) {
                                        isDesktopUpload = true;
                                    }
                                }
                            }
                        }
                    } else if (metadataSource.equals("urlList")) {
                        urlTextArea = request.getParameter("urlTextArea");
                        if (urlTextArea == null || urlTextArea.trim().isEmpty()) {
                            warningMessage = "<p class=\"warning\">Please enter one or more EML document URLs into the text area.</p>";
                        } else {
                            documentURLs = parseDocumentURLsFromTextArea(urlTextArea);
                            warningMessage = CHECK_BACK_LATER;
                        }
                    } else if (metadataSource.equals("harvestList")) {
                        harvestListURL = request.getParameter("harvestListURL");
                        if (harvestListURL == null || harvestListURL.trim().isEmpty()) {
                            warningMessage = "<p class=\"warning\">Please enter the URL to a Metacat Harvest List.</p>";
                        } else {
                            documentURLs = parseDocumentURLsFromHarvestList(harvestListURL);
                            warningMessage = CHECK_BACK_LATER;
                        }
                    }
                    /*
                     * If the metadata source is "desktopHarvester", we already have the
                     * EML file stored in a session attribute. Now we need to retrieve
                     * the data files from the brower's form fields and write the
                     * data files to a URL accessible location.
                     */
                    else if (metadataSource.equals("desktopHarvester")) {
                        emlFile = (File) httpSession.getAttribute("emlFile");
                        ArrayList<Entity> entityList = parseEntityList(emlFile);
                        harvestReportId = (String) httpSession.getAttribute("harvestReportId");
                        String dataPath = servletContext.getRealPath(DESKTOP_DATA_DIR);
                        String harvestPath = String.format("%s/%s", dataPath, harvestReportId);

                        Collection<Part> parts = request.getParts();
                        String objectName = null;
                        Part filePart = null;

                        for (Part part : parts) {
                            if (part.getContentType() != null) {
                                // save data file to disk
                                //processDataFile(part, harvestPath);
                                filePart = part;
                            } else {
                                /*
                                 * Parse the request parameters.
                                 */
                                String fieldName = part.getName();
                                String fieldValue = request.getParameter(fieldName);
                                if (fieldName != null && fieldValue != null) {
                                    if (fieldName.equals("submit") && fieldValue.equalsIgnoreCase("evaluate")) {
                                        isEvaluate = true;
                                    } else if (fieldName.startsWith("object-name-")) {
                                        objectName = fieldValue;
                                    }
                                }
                            }

                            if (filePart != null && objectName != null) {
                                processDataFile(filePart, harvestPath, objectName);
                                objectName = null;
                                filePart = null;
                            }

                        }

                        emlFile = transformDesktopEML(harvestPath, emlFile, harvestReportId, entityList);
                    }
                } else {
                    throw new IllegalStateException("No value specified for request parameter 'metadataSource'");
                }

                if (harvester == null) {
                    harvester = new Harvester(harvesterPath, harvestReportId, uid, isEvaluate);
                }

                if (emlTextArea != null) {
                    harvester.processSingleDocument(emlTextArea);
                } else if (emlFile != null) {
                    if (isDesktopUpload) {
                        ArrayList<Entity> entityList = parseEntityList(emlFile);
                        httpSession.setAttribute("entityList", entityList);
                        httpSession.setAttribute("emlFile", emlFile);
                        httpSession.setAttribute("harvestReportId", harvestReportId);
                        httpSession.setAttribute("isEvaluate", new Boolean(isEvaluate));
                    } else {
                        harvester.processSingleDocument(emlFile);
                    }
                } else if (documentURLs != null) {
                    harvester.setDocumentURLs(documentURLs);
                    ExecutorService executorService = Executors.newCachedThreadPool();
                    executorService.execute(harvester);
                    executorService.shutdown();
                }
            }
        } catch (Exception e) {
            handleDataPortalError(logger, e);
        }

        request.setAttribute("message", warningMessage);

        /*
         * If we have a new reportId, and either there is no warning message or
         * it's the "Check back later" message, set the harvestReportID session
         * attribute to the new reportId value.
         */
        if (harvestReportId != null && harvestReportId.length() > 0
                && (warningMessage.length() == 0 || warningMessage.equals(CHECK_BACK_LATER))) {
            httpSession.setAttribute("harvestReportID", harvestReportId);
        }

        if (isDesktopUpload) {
            RequestDispatcher requestDispatcher = request.getRequestDispatcher("./desktopHarvester.jsp");
            requestDispatcher.forward(request, response);
        } else if (warningMessage.length() == 0) {
            response.sendRedirect("./harvestReport.jsp");
        } else {
            RequestDispatcher requestDispatcher = request.getRequestDispatcher("./harvester.jsp");
            requestDispatcher.forward(request, response);
        }

    }

    /*
     * Parses the EML file and returns a list of Entity objects
     */
    private ArrayList<Entity> parseEntityList(File emlFile) throws IOException {
        String eml = FileUtils.readFileToString(emlFile);
        EMLParser emlParser = new EMLParser();
        DataPackage dataPackage = emlParser.parseDocument(eml);
        ArrayList<Entity> entityList = dataPackage.getEntityList();

        return entityList;
    }

    /*
     * Generates a numeric harvest ID based on the current timestamp.
     */
    private String generateHarvestId() {
        String harvestId = null;

        Date now = new Date();
        Long epochTime = now.getTime();
        String epochTimeString = epochTime.toString();
        DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
        String dateString = dateFormat.format(now);
        String numberString = epochTimeString;
        harvestId = dateString + "-" + numberString;
        logger.debug("HARVESTID: " + harvestId);

        return harvestId;
    }

    private String getFilename(Part part) {
        String contentDispositionHeader = part.getHeader("content-disposition");
        String[] elements = contentDispositionHeader.split(";");
        for (String element : elements) {
            if (element.trim().startsWith("filename")) {
                return element.substring(element.indexOf('=') + 1).trim().replace("\"", "");
            }
        }
        return null;
    }

    /**
     * Initialization of the servlet. <br>
     * 
     * @throws ServletException
     *           if an error occurs
     */
    public void init() throws ServletException {
        PropertiesConfiguration options = ConfigurationListener.getOptions();
        harvesterPath = options.getString("harvester.path");
        desktopUrlHead = options.getString("dataportal.desktopUrlHead");
    }

    /**
     * Stores a local copy of EML metadata on the file system.
     * 
     * @param   xmlContent   the XML string content
     * @return  the EML file that was created
     */
    public File storeMetadata(String dirPath, String xmlContent, boolean isPastaReady) throws IOException {
        File emlFile = null;

        File dirFile = new File(dirPath);
        if (dirFile != null && !dirFile.exists()) {
            dirFile.mkdirs();
        }
        String filename = isPastaReady ? PASTA_READY_FILE_NAME : DESKTOP_FILE_NAME;
        emlFile = new File(dirPath, filename);
        FileWriter fileWriter = null;

        StringBuffer stringBuffer = new StringBuffer(xmlContent);
        try {
            fileWriter = new FileWriter(emlFile);
            IOUtil.writeToWriter(stringBuffer, fileWriter, true);
        } catch (IOException e) {
            logger.error("IOException storing PASTA-ready metadata file:\n" + e.getMessage());
            e.printStackTrace();
            throw (e);
        } finally {
            if (fileWriter != null) {
                fileWriter.close();
            }
        }

        return emlFile;
    }

    /*
     * Transforms the user's desktop EML to PASTA-ready EML.
     */
    private File transformDesktopEML(String harvestPath, File desktopEMLFile, String harvestReportId,
            ArrayList<Entity> entityList)
            throws IOException, TransformerException, SAXException, ParserConfigurationException {
        boolean isPastaReady = false;
        File pastaReadyEMLFile = null;
        Document desktopEMLDocument = XmlUtility.xmlFileToDocument(desktopEMLFile);
        Node documentElement = desktopEMLDocument.getDocumentElement();
        String desktopEMLString = FileUtils.readFileToString(desktopEMLFile);
        storeMetadata(harvestPath, desktopEMLString, isPastaReady);
        String harvestUrlHead = String.format("%s/%s", desktopUrlHead, harvestReportId);
        DesktopEMLFactory desktopEMLFactory = new DesktopEMLFactory(harvestUrlHead);
        Document pastaReadyEMLDocument = desktopEMLFactory.makePastaReady(desktopEMLDocument, entityList);
        documentElement = pastaReadyEMLDocument.getDocumentElement();
        String pastaReadyEMLString = XMLUtilities.getDOMTreeAsString(documentElement);
        isPastaReady = true;
        pastaReadyEMLFile = storeMetadata(harvestPath, pastaReadyEMLString, isPastaReady);

        return pastaReadyEMLFile;
    }

    /*
     * Parses a list of document URLs from a legacy Metacat Harvester
     * harvest list.
     */
    private ArrayList<String> parseDocumentURLsFromHarvestList(String harvestListURL) throws Exception {
        ArrayList<String> urlList = new ArrayList<String>();

        if (harvestListURL != null) {
            DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            CachedXPathAPI xpathapi = new CachedXPathAPI();
            Document document = documentBuilder.parse(harvestListURL);

            if (document != null) {
                NodeList documentURLNodeList = xpathapi.selectNodeList(document, "//documentURL");
                for (int i = 0; i < documentURLNodeList.getLength(); i++) {
                    Node aNode = documentURLNodeList.item(i);
                    String url = aNode.getTextContent();
                    urlList.add(url);
                }
            }
        }

        return urlList;
    }

    /*
     * Parses a list of document URLs from a user input text area.
     */
    private ArrayList<String> parseDocumentURLsFromTextArea(String textArea) {
        ArrayList<String> urlList = new ArrayList<String>();

        if (textArea != null) {
            String[] lines = textArea.split("\n");
            for (String line : lines) {
                line = line.trim();
                if (line != null && (line.startsWith("http://") || line.startsWith("https://")
                        || line.startsWith("ftp://"))) {
                    urlList.add(line);
                }
            }
        }

        return urlList;
    }

    /**
     * Process the uploaded EML file
     * 
     * @param part The multipart form file data.
     * 
     * @return The uploaded EML file as File object.
     * 
     * @throws Exception
     */
    private File processUploadedFile(Part part) throws Exception {

        File eml = null;

        if (part.getContentType() != null) {
            // save file Part to disk
            String fileName = getFilename(part);
            if (fileName != null && !fileName.isEmpty()) {
                long timestamp = new Date().getTime();
                String tmpDir = String.format("%s/%d", System.getProperty("java.io.tmpdir"), timestamp);
                Harvester.createDirectory(tmpDir);
                String tmpPath = String.format("%s/%s", tmpDir, fileName);
                part.write(tmpPath);
                eml = new File(tmpPath);
            }
        }

        return eml;
    }

    /**
     * Process the uploaded data file
     * 
     * @param part The multipart form file data.
     * 
     * @return The uploaded data file as File object.
     * 
     * @throws Exception
     */
    private String processDataFile(Part part, String harvestDir, String objectName) throws Exception {
        String fileName = null;

        if (part.getContentType() != null) {
            // save the data file to disk where it can be harvested
            fileName = getFilename(part);

            if (fileName != null && !fileName.isEmpty()) {

                if (!fileName.equals(objectName)) {
                    String msg = String.format("Filename \"%s\" does not match objectName \"%s\".", fileName,
                            objectName);
                    throw new UserErrorException(msg);
                }

                Harvester.createDirectory(harvestDir);
                String filePath = String.format("%s/%s", harvestDir, fileName);
                part.write(filePath);
            }
        }

        return fileName;
    }

}