org.dspace.submit.step.UploadStep.java Source code

Java tutorial

Introduction

Here is the source code for org.dspace.submit.step.UploadStep.java

Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.submit.step;

import java.io.*;
import java.sql.SQLException;
import java.util.Enumeration;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.dspace.app.util.SubmissionInfo;
import org.dspace.app.util.Util;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.FormatIdentifier;
import org.dspace.content.Item;
import org.dspace.core.Context;
import org.dspace.core.ConfigurationManager;
import org.dspace.curate.Curator;
import org.dspace.submit.AbstractProcessingStep;

/**
 * Upload step for DSpace. Processes the actual upload of files
 * for an item being submitted into DSpace.
 * <P>
 * This class performs all the behind-the-scenes processing that
 * this particular step requires.  This class's methods are utilized 
 * by both the JSP-UI and the Manakin XML-UI
 * 
 * @see org.dspace.app.util.SubmissionConfig
 * @see org.dspace.app.util.SubmissionStepConfig
 * @see org.dspace.submit.AbstractProcessingStep
 * 
 * @author Tim Donohue
 * @version $Revision$
 */
public class UploadStep extends AbstractProcessingStep {
    /** Button to upload a file * */
    public static final String SUBMIT_UPLOAD_BUTTON = "submit_upload";

    /** Button to skip uploading a file * */
    public static final String SUBMIT_SKIP_BUTTON = "submit_skip";

    /** Button to submit more files * */
    public static final String SUBMIT_MORE_BUTTON = "submit_more";

    /** Button to cancel editing of file info * */
    public static final String CANCEL_EDIT_BUTTON = "submit_edit_cancel";

    /***************************************************************************
     * STATUS / ERROR FLAGS (returned by doProcessing() if an error occurs or
     * additional user interaction may be required)
     * 
     * (Do NOT use status of 0, since it corresponds to STATUS_COMPLETE flag
     * defined in the JSPStepManager class)
     **************************************************************************/
    // integrity error occurred
    public static final int STATUS_INTEGRITY_ERROR = 1;

    // error in uploading file
    public static final int STATUS_UPLOAD_ERROR = 2;

    // error - no files uploaded!
    public static final int STATUS_NO_FILES_ERROR = 5;

    // format of uploaded file is unknown
    public static final int STATUS_UNKNOWN_FORMAT = 10;

    // virus checker unavailable ?
    public static final int STATUS_VIRUS_CHECKER_UNAVAILABLE = 14;

    // file failed virus check
    public static final int STATUS_CONTAINS_VIRUS = 16;

    // edit file information
    public static final int STATUS_EDIT_BITSTREAM = 20;

    // return from editing file information
    public static final int STATUS_EDIT_COMPLETE = 25;

    /** log4j logger */
    private static Logger log = Logger.getLogger(UploadStep.class);

    /** is the upload required? */
    protected boolean fileRequired = ConfigurationManager.getBooleanProperty("webui.submit.upload.required", true);

    /**
     * Do any processing of the information input by the user, and/or perform
     * step processing (if no user interaction required)
     * <P>
     * It is this method's job to save any data to the underlying database, as
     * necessary, and return error messages (if any) which can then be processed
     * by the appropriate user interface (JSP-UI or XML-UI)
     * <P>
     * NOTE: If this step is a non-interactive step (i.e. requires no UI), then
     * it should perform *all* of its processing in this method!
     * 
     * @param context
     *            current DSpace context
     * @param request
     *            current servlet request object
     * @param response
     *            current servlet response object
     * @param subInfo
     *            submission info object
     * @return Status or error flag which will be processed by
     *         doPostProcessing() below! (if STATUS_COMPLETE or 0 is returned,
     *         no errors occurred!)
     */
    public int doProcessing(Context context, HttpServletRequest request, HttpServletResponse response,
            SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException {
        // get button user pressed
        String buttonPressed = Util.getSubmitButton(request, NEXT_BUTTON);

        // get reference to item
        Item item = subInfo.getSubmissionItem().getItem();

        // -----------------------------------
        // Step #0: Upload new files (if any)
        // -----------------------------------
        String contentType = request.getContentType();

        // if multipart form, then we are uploading a file
        if ((contentType != null) && (contentType.indexOf("multipart/form-data") != -1)) {
            // This is a multipart request, so it's a file upload
            // (return any status messages or errors reported)
            int status = processUploadFile(context, request, response, subInfo);

            // if error occurred, return immediately
            if (status != STATUS_COMPLETE) {
                return status;
            }
        }

        // if user pressed jump-to button in process bar,
        // return success (so that jump will occur)
        if (buttonPressed.startsWith(PROGRESS_BAR_PREFIX) || buttonPressed.startsWith(PREVIOUS_BUTTON)) {
            // check if a file is required to be uploaded
            if (fileRequired && !item.hasUploadedFiles()) {
                return STATUS_NO_FILES_ERROR;
            } else {
                return STATUS_COMPLETE;
            }
        }

        // ---------------------------------------------
        // Step #1: Check if this was just a request to
        // edit file information.
        // (or canceled editing information)
        // ---------------------------------------------
        // check if we're already editing a specific bitstream
        if (request.getParameter("bitstream_id") != null) {
            if (buttonPressed.equals(CANCEL_EDIT_BUTTON)) {
                // canceled an edit bitstream request
                subInfo.setBitstream(null);

                // this flag will just return us to the normal upload screen
                return STATUS_EDIT_COMPLETE;
            } else {
                // load info for bitstream we are editing
                Bitstream b = Bitstream.find(context, Integer.parseInt(request.getParameter("bitstream_id")));

                // save bitstream to submission info
                subInfo.setBitstream(b);
            }
        } else if (buttonPressed.startsWith("submit_edit_")) {
            // get ID of bitstream that was requested for editing
            String bitstreamID = buttonPressed.substring("submit_edit_".length());

            Bitstream b = Bitstream.find(context, Integer.parseInt(bitstreamID));

            // save bitstream to submission info
            subInfo.setBitstream(b);

            // return appropriate status flag to say we are now editing the
            // bitstream
            return STATUS_EDIT_BITSTREAM;
        }

        // ---------------------------------------------
        // Step #2: Process any remove file request(s)
        // ---------------------------------------------
        // Remove-selected requests come from Manakin
        if (buttonPressed.equalsIgnoreCase("submit_remove_selected")) {
            // this is a remove multiple request!

            if (request.getParameter("remove") != null) {
                // get all files to be removed
                String[] removeIDs = request.getParameterValues("remove");

                // remove each file in the list
                for (int i = 0; i < removeIDs.length; i++) {
                    int id = Integer.parseInt(removeIDs[i]);

                    int status = processRemoveFile(context, item, id);

                    // if error occurred, return immediately
                    if (status != STATUS_COMPLETE) {
                        return status;
                    }
                }

                // remove current bitstream from Submission Info
                subInfo.setBitstream(null);
            }
        } else if (buttonPressed.startsWith("submit_remove_")) {
            // A single file "remove" button must have been pressed

            int id = Integer.parseInt(buttonPressed.substring(14));
            int status = processRemoveFile(context, item, id);

            // if error occurred, return immediately
            if (status != STATUS_COMPLETE) {
                return status;
            }

            // remove current bitstream from Submission Info
            subInfo.setBitstream(null);
        }

        // -------------------------------------------------
        // Step #3: Check for a change in file description
        // -------------------------------------------------
        String fileDescription = request.getParameter("description");
        if (fileDescription != null && fileDescription.length() > 0) {

            if (subInfo.getBitstream() == null) {
                if (item != null) {
                    Bundle[] bundle = item.getBundles("ORIGINAL");
                    if (bundle.length != 0) {
                        Bitstream[] bitstreams = bundle[0].getBitstreams();
                        if (bitstreams[0] != null)
                            subInfo.setBitstream(bitstreams[0]);
                    }
                }
            } // - - end of DI insert

            // save this file description
            int status = processSaveFileDescription(context, request, response, subInfo);

            // if error occurred, return immediately
            if (status != STATUS_COMPLETE) {
                return status;
            }
        }

        // ------------------------------------------
        // Step #4: Check for a file format change
        // (if user had to manually specify format)
        // ------------------------------------------
        int formatTypeID = Util.getIntParameter(request, "format");
        String formatDesc = request.getParameter("format_description");

        // if a format id or description was found, then save this format!
        if (formatTypeID >= 0 || (formatDesc != null && formatDesc.length() > 0)) {
            // save this specified format
            int status = processSaveFileFormat(context, request, response, subInfo);

            // if error occurred, return immediately
            if (status != STATUS_COMPLETE) {
                return status;
            }
        }

        // ---------------------------------------------------
        // Step #5: Check if primary bitstream has changed
        // -------------------------------------------------
        if (request.getParameter("primary_bitstream_id") != null) {
            Bundle[] bundles = item.getBundles("ORIGINAL");
            if (bundles.length > 0) {
                bundles[0].setPrimaryBitstreamID(
                        Integer.valueOf(request.getParameter("primary_bitstream_id")).intValue());
                bundles[0].update();
            }
        }

        // ---------------------------------------------------
        // Step #6: Determine if there is an error because no
        // files have been uploaded.
        // ---------------------------------------------------
        //check if a file is required to be uploaded
        if (fileRequired && !item.hasUploadedFiles() && !buttonPressed.equals(SUBMIT_MORE_BUTTON)) {
            return STATUS_NO_FILES_ERROR;
        }

        // commit all changes to database
        context.commit();

        return STATUS_COMPLETE;
    }

    /**
     * Retrieves the number of pages that this "step" extends over. This method
     * is used to build the progress bar.
     * <P>
     * This method may just return 1 for most steps (since most steps consist of
     * a single page). But, it should return a number greater than 1 for any
     * "step" which spans across a number of HTML pages. For example, the
     * configurable "Describe" step (configured using input-forms.xml) overrides
     * this method to return the number of pages that are defined by its
     * configuration file.
     * <P>
     * Steps which are non-interactive (i.e. they do not display an interface to
     * the user) should return a value of 1, so that they are only processed
     * once!
     * 
     * @param request
     *            The HTTP Request
     * @param subInfo
     *            The current submission information object
     * 
     * @return the number of pages in this step
     */
    public int getNumberOfPages(HttpServletRequest request, SubmissionInfo subInfo) throws ServletException {
        // Despite using many JSPs, this step only appears
        // ONCE in the Progress Bar, so it's only ONE page
        return 1;
    }

    // ****************************************************************
    // ****************************************************************
    // METHODS FOR UPLOADING FILES (and associated information)
    // ****************************************************************
    // ****************************************************************

    /**
     * Remove a file from an item
     * 
     * @param context
     *            current DSpace context
     * @param item
     *            Item where file should be removed from
     * @param bitstreamID
     *            The id of bitstream representing the file to remove
     * @return Status or error flag which will be processed by
     *         UI-related code! (if STATUS_COMPLETE or 0 is returned,
     *         no errors occurred!)
     */
    protected int processRemoveFile(Context context, Item item, int bitstreamID)
            throws IOException, SQLException, AuthorizeException {
        Bitstream bitstream;

        // Try to find bitstream
        try {
            bitstream = Bitstream.find(context, bitstreamID);
        } catch (NumberFormatException nfe) {
            bitstream = null;
        }

        if (bitstream == null) {
            // Invalid or mangled bitstream ID
            // throw an error and return immediately
            return STATUS_INTEGRITY_ERROR;
        }

        // remove bitstream from bundle..
        // delete bundle if it's now empty
        Bundle[] bundles = bitstream.getBundles();

        bundles[0].removeBitstream(bitstream);

        Bitstream[] bitstreams = bundles[0].getBitstreams();

        // remove bundle if it's now empty
        if (bitstreams.length < 1) {
            item.removeBundle(bundles[0]);
            item.update();
        }

        // no errors occurred
        return STATUS_COMPLETE;
    }

    /**
     * Process the upload of a new file!
     * 
     * @param context
     *            current DSpace context
     * @param request
     *            current servlet request object
     * @param response
     *            current servlet response object
     * @param subInfo
     *            submission info object
     * 
     * @return Status or error flag which will be processed by
     *         UI-related code! (if STATUS_COMPLETE or 0 is returned,
     *         no errors occurred!)
     */
    public int processUploadFile(Context context, HttpServletRequest request, HttpServletResponse response,
            SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException {
        boolean formatKnown = true;
        boolean fileOK = false;
        BitstreamFormat bf = null;
        Bitstream b = null;

        //NOTE: File should already be uploaded. 
        //Manakin does this automatically via Cocoon.
        //For JSP-UI, the SubmissionController.uploadFiles() does the actual upload

        Enumeration attNames = request.getAttributeNames();

        //loop through our request attributes
        while (attNames.hasMoreElements()) {
            String attr = (String) attNames.nextElement();

            //if this ends with "-path", this attribute
            //represents a newly uploaded file
            if (attr.endsWith("-path")) {
                //strip off the -path to get the actual parameter 
                //that the file was uploaded as
                String param = attr.replace("-path", "");
                String exten = param.substring(param.length() - 3);
                // Load the file's path and input stream and description
                String filePath = (String) request.getAttribute(param + "-path");
                InputStream fileInputStreamTest = (InputStream) request.getAttribute(param + "-inputstream");

                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                byte[] buf = new byte[1024];
                int n = 0;
                while ((n = fileInputStreamTest.read(buf)) >= 0)
                    baos.write(buf, 0, n);
                byte[] content = baos.toByteArray();

                InputStream fileInputStream = new ByteArrayInputStream(content);

                InputStream fileInputStreamPdf = new ByteArrayInputStream(content);

                InputStream ifAnsi = new ByteArrayInputStream(content);

                //InputStream fss = fileInputStream.cl

                //attempt to get description from attribute first, then direct from a parameter
                String fileDescription = (String) request.getAttribute(param + "-description");
                if (fileDescription == null || fileDescription.length() == 0) {
                    fileDescription = request.getParameter("description");
                }

                // if information wasn't passed by User Interface, we had a problem
                // with the upload
                if (filePath == null || fileInputStream == null) {
                    return STATUS_UPLOAD_ERROR;
                }

                if (subInfo == null) {
                    // In any event, if we don't have the submission info, the request
                    // was malformed
                    return STATUS_INTEGRITY_ERROR;
                }

                // Create the bitstream
                Item item = subInfo.getSubmissionItem().getItem();

                // do we already have a bundle?
                Bundle[] bundles = item.getBundles("ORIGINAL");

                if (bundles.length < 1) {
                    // set bundle's name to ORIGINAL
                    b = item.createSingleBitstream(fileInputStream, "ORIGINAL");
                } else {
                    // we have a bundle already, just add bitstream
                    b = bundles[0].createBitstream(fileInputStream);
                }

                //fileDescription.op

                if (exten.toLowerCase().equals("pdf")) {
                    try {
                        PDFTextStripper pdfStripper = null;
                        PDDocument docum = null;
                        PDFParser parser = new PDFParser(fileInputStreamPdf);
                        COSDocument cosDoc = null;

                        parser.parse();
                        cosDoc = parser.getDocument();
                        pdfStripper = new PDFTextStripper();
                        docum = new PDDocument(cosDoc);
                        //pdfStripper.getText(docum);

                        String parsedText = pdfStripper.getText(docum);
                        Integer fifty = (Integer) Math.round(parsedText.length() / 2);
                        if (fifty < 0) {
                            fifty = fifty * (-1);
                        }
                        Integer toCut = 500;
                        if ((parsedText.length() - fifty) < 500) {
                            toCut = parsedText.length();
                        }

                        log.info("FUCKTHISSHIT: " + fifty + " " + toCut);
                        String subText = parsedText.substring(fifty, fifty + toCut - 1);
                        try {
                            subText = subText.substring(subText.indexOf(".") + 1);
                        } catch (Exception e) {

                        }
                        item.addMetadata("dc", "textpart", null, null, subText + "...");
                        item.update();
                        context.commit();
                        log.info(parsedText);
                    } catch (Exception e) {
                        log.info("omgerror: " + e.toString());
                    }
                }

                if (exten.toLowerCase().equals("txt")) {
                    StringWriter writer = new StringWriter();
                    IOUtils.copy(fileInputStreamPdf, writer, "UTF-8");

                    String theString = writer.toString();
                    if (theString.startsWith("\uFEFF")) {

                    } else {
                        StringWriter writerAnsi = new StringWriter();
                        IOUtils.copy(ifAnsi, writerAnsi, "Cp1252");
                        theString = writerAnsi.toString();
                    }
                    Integer fifty = (Integer) Math.round(theString.length() * (50 / 100.0f));
                    Integer toCut = 500;
                    if ((theString.length() - fifty) < 500) {
                        toCut = theString.length();
                    }
                    String subText = theString.substring(fifty, toCut - 1);
                    item.addMetadata("dc", "textpart", null, null, subText + "...");
                    item.update();
                    context.commit();
                    log.info(subText);
                }

                log.info("OMGTEST: " + exten);

                if (exten.toLowerCase().equals("doc")) {
                    WordExtractor extractor = null;
                    try {

                        HWPFDocument document = new HWPFDocument(fileInputStreamPdf);
                        extractor = new WordExtractor(document);
                        String fileData = extractor.getText();
                        Integer fifty = (Integer) Math.round(50 * 100 / fileData.length());
                        Integer toCut = 500;
                        if ((fileData.length() - fifty) < 500) {
                            toCut = fileData.length();
                        }
                        String subText = fileData.substring(fifty, toCut - 1);
                        item.addMetadata("dc", "textpart", null, null, subText + "...");
                        item.update();
                        context.commit();
                    } catch (Exception exep) {
                        log.info("OMGTESTIK:" + exep);
                    }
                }

                if ((exten.toLowerCase().equals("ocx"))) {
                    XWPFDocument document = new XWPFDocument(fileInputStreamPdf);
                    XWPFWordExtractor extractor = null;
                    extractor = new XWPFWordExtractor(document);

                    String text = extractor.getText();
                    Integer fifty = (Integer) Math.round(50 * 100 / text.length());
                    Integer toCut = 500;
                    if ((text.length() - fifty) < 500) {
                        toCut = text.length();
                    }
                    String subText = text.substring(fifty, toCut - 1);
                    item.addMetadata("dc", "textpart", null, null, subText + "...");
                    item.update();
                    context.commit();
                }

                // Strip all but the last filename. It would be nice
                // to know which OS the file came from.
                String noPath = filePath;

                while (noPath.indexOf('/') > -1) {
                    noPath = noPath.substring(noPath.indexOf('/') + 1);
                }

                while (noPath.indexOf('\\') > -1) {
                    noPath = noPath.substring(noPath.indexOf('\\') + 1);
                }

                b.setName(noPath);
                b.setSource(filePath);
                b.setDescription(fileDescription);

                // Identify the format
                bf = FormatIdentifier.guessFormat(context, b);
                b.setFormat(bf);

                // Update to DB
                b.update();
                item.update();

                if ((bf != null) && (bf.isInternal())) {
                    log.warn("Attempt to upload file format marked as internal system use only");
                    backoutBitstream(subInfo, b, item);
                    return STATUS_UPLOAD_ERROR;
                }

                // Check for virus
                if (ConfigurationManager.getBooleanProperty("submission-curation", "virus-scan")) {
                    Curator curator = new Curator();
                    curator.addTask("vscan").curate(item);
                    int status = curator.getStatus("vscan");
                    if (status == Curator.CURATE_ERROR) {
                        backoutBitstream(subInfo, b, item);
                        return STATUS_VIRUS_CHECKER_UNAVAILABLE;
                    } else if (status == Curator.CURATE_FAIL) {
                        backoutBitstream(subInfo, b, item);
                        return STATUS_CONTAINS_VIRUS;
                    }
                }

                // If we got this far then everything is more or less ok.

                // Comment - not sure if this is the right place for a commit here
                // but I'm not brave enough to remove it - Robin.
                context.commit();

                // save this bitstream to the submission info, as the
                // bitstream we're currently working with
                subInfo.setBitstream(b);

                //if format was not identified
                if (bf == null) {
                    return STATUS_UNKNOWN_FORMAT;
                }

            } //end if attribute ends with "-path"
        } //end while

        return STATUS_COMPLETE;

    }

    /*
      If we created a new Bitstream but now realised there is a problem then remove it.
     */
    protected void backoutBitstream(SubmissionInfo subInfo, Bitstream b, Item item)
            throws SQLException, AuthorizeException, IOException {
        // remove bitstream from bundle..
        // delete bundle if it's now empty
        Bundle[] bnd = b.getBundles();

        bnd[0].removeBitstream(b);

        Bitstream[] bitstreams = bnd[0].getBitstreams();

        // remove bundle if it's now empty
        if (bitstreams.length < 1) {
            item.removeBundle(bnd[0]);
            item.update();
        }

        subInfo.setBitstream(null);
    }

    /**
     * Process input from get file type page
     * 
     * @param context
     *            current DSpace context
     * @param request
     *            current servlet request object
     * @param response
     *            current servlet response object
     * @param subInfo
     *            submission info object
     * 
     * @return Status or error flag which will be processed by
     *         UI-related code! (if STATUS_COMPLETE or 0 is returned,
     *         no errors occurred!)
     */
    protected int processSaveFileFormat(Context context, HttpServletRequest request, HttpServletResponse response,
            SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException {
        if (subInfo.getBitstream() != null) {
            // Did the user select a format?
            int typeID = Util.getIntParameter(request, "format");

            BitstreamFormat format = BitstreamFormat.find(context, typeID);

            if (format != null) {
                subInfo.getBitstream().setFormat(format);
            } else {
                String userDesc = request.getParameter("format_description");

                subInfo.getBitstream().setUserFormatDescription(userDesc);
            }

            // update database
            subInfo.getBitstream().update();
        } else {
            return STATUS_INTEGRITY_ERROR;
        }

        return STATUS_COMPLETE;
    }

    /**
     * Process input from the "change file description" page
     * 
     * @param context
     *            current DSpace context
     * @param request
     *            current servlet request object
     * @param response
     *            current servlet response object
     * @param subInfo
     *            submission info object
     * 
     * @return Status or error flag which will be processed by
     *         UI-related code! (if STATUS_COMPLETE or 0 is returned,
     *         no errors occurred!)
     */
    protected int processSaveFileDescription(Context context, HttpServletRequest request,
            HttpServletResponse response, SubmissionInfo subInfo)
            throws ServletException, IOException, SQLException, AuthorizeException {
        if (subInfo.getBitstream() != null) {
            subInfo.getBitstream().setDescription(request.getParameter("description"));
            subInfo.getBitstream().update();

            context.commit();
        } else {
            return STATUS_INTEGRITY_ERROR;
        }

        return STATUS_COMPLETE;
    }

}