Example usage for java.util.zip ZipInputStream closeEntry

List of usage examples for java.util.zip ZipInputStream closeEntry

Introduction

In this page you can find the example usage for java.util.zip ZipInputStream closeEntry.

Prototype

public void closeEntry() throws IOException 

Source Link

Document

Closes the current ZIP entry and positions the stream for reading the next entry.

Usage

From source file:edu.harvard.iq.dataverse.ingest.IngestServiceBean.java

public List<DataFile> createDataFiles(DatasetVersion version, InputStream inputStream, String fileName,
        String suppliedContentType) throws IOException {
    List<DataFile> datafiles = new ArrayList<DataFile>();

    String warningMessage = null;

    // save the file, in the temporary location for now: 
    Path tempFile = null;/*from ww w. j  a  v  a  2 s. co m*/

    if (getFilesTempDirectory() != null) {
        tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
        // "temporary" location is the key here; this is why we are not using 
        // the DataStore framework for this - the assumption is that 
        // temp files will always be stored on the local filesystem. 
        //          -- L.A. Jul. 2014
        logger.fine("Will attempt to save the file as: " + tempFile.toString());
        Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
    } else {
        throw new IOException("Temp directory is not configured.");
    }
    logger.fine("mime type supplied: " + suppliedContentType);
    // Let's try our own utilities (Jhove, etc.) to determine the file type 
    // of the uploaded file. (We may already have a mime type supplied for this
    // file - maybe the type that the browser recognized on upload; or, if 
    // it's a harvest, maybe the remote server has already given us the type
    // for this file... with our own type utility we may or may not do better 
    // than the type supplied:
    //  -- L.A. 
    String recognizedType = null;
    String finalType = null;
    try {
        recognizedType = FileUtil.determineFileType(tempFile.toFile(), fileName);
        logger.fine("File utility recognized the file as " + recognizedType);
        if (recognizedType != null && !recognizedType.equals("")) {
            // is it any better than the type that was supplied to us,
            // if any?
            // This is not as trivial a task as one might expect... 
            // We may need a list of "good" mime types, that should always
            // be chosen over other choices available. Maybe it should 
            // even be a weighed list... as in, "application/foo" should 
            // be chosen over "application/foo-with-bells-and-whistles".

            // For now the logic will be as follows: 
            //
            // 1. If the contentType supplied (by the browser, most likely) 
            // is some form of "unknown", we always discard it in favor of 
            // whatever our own utilities have determined; 
            // 2. We should NEVER trust the browser when it comes to the 
            // following "ingestable" types: Stata, SPSS, R;
            // 2a. We are willing to TRUST the browser when it comes to
            //  the CSV and XSLX ingestable types.
            // 3. We should ALWAYS trust our utilities when it comes to 
            // ingestable types. 

            if (suppliedContentType == null || suppliedContentType.equals("")
                    || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT)
                    || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY)
                    || (ingestableAsTabular(suppliedContentType)
                            && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV)
                            && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT)
                            && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX))
                    || ingestableAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped")
                    || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)
                    || recognizedType.equals(MIME_TYPE_ZIP)) {
                finalType = recognizedType;
            }
        }

    } catch (Exception ex) {
        logger.warning("Failed to run the file utility mime type check on file " + fileName);
    }

    if (finalType == null) {
        finalType = (suppliedContentType == null || suppliedContentType.equals(""))
                ? MIME_TYPE_UNDETERMINED_DEFAULT
                : suppliedContentType;
    }

    // A few special cases: 

    // if this is a gzipped FITS file, we'll uncompress it, and ingest it as
    // a regular FITS file:

    if (finalType.equals("application/fits-gzipped")) {

        InputStream uncompressedIn = null;
        String finalFileName = fileName;
        // if the file name had the ".gz" extension, remove it, 
        // since we are going to uncompress it:
        if (fileName != null && fileName.matches(".*\\.gz$")) {
            finalFileName = fileName.replaceAll("\\.gz$", "");
        }

        DataFile datafile = null;
        try {
            uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
            datafile = createSingleDataFile(version, uncompressedIn, finalFileName,
                    MIME_TYPE_UNDETERMINED_DEFAULT);
        } catch (IOException ioex) {
            datafile = null;
        } finally {
            if (uncompressedIn != null) {
                try {
                    uncompressedIn.close();
                } catch (IOException e) {
                }
            }
        }

        // If we were able to produce an uncompressed file, we'll use it 
        // to create and return a final DataFile; if not, we're not going
        // to do anything - and then a new DataFile will be created further 
        // down, from the original, uncompressed file.
        if (datafile != null) {
            // remove the compressed temp file: 
            try {
                tempFile.toFile().delete();
            } catch (SecurityException ex) {
                // (this is very non-fatal)
                logger.warning("Failed to delete temporary file " + tempFile.toString());
            }

            datafiles.add(datafile);
            return datafiles;
        }

        // If it's a ZIP file, we are going to unpack it and create multiple 
        // DataFile objects from its contents:
    } else if (finalType.equals("application/zip")) {

        ZipInputStream unZippedIn = null;
        ZipEntry zipEntry = null;

        int fileNumberLimit = systemConfig.getZipUploadFilesLimit();

        try {
            Charset charset = null;
            /*
            TODO: (?)
            We may want to investigate somehow letting the user specify 
            the charset for the filenames in the zip file...
            - otherwise, ZipInputStream bails out if it encounteres a file 
            name that's not valid in the current charest (i.e., UTF-8, in 
            our case). It would be a bit trickier than what we're doing for 
            SPSS tabular ingests - with the lang. encoding pulldown menu - 
            because this encoding needs to be specified *before* we upload and
            attempt to unzip the file. 
                -- L.A. 4.0 beta12
            logger.info("default charset is "+Charset.defaultCharset().name());
            if (Charset.isSupported("US-ASCII")) {
            logger.info("charset US-ASCII is supported.");
            charset = Charset.forName("US-ASCII");
            if (charset != null) {
                logger.info("was able to obtain charset for US-ASCII");
            }
                    
            }
            */

            if (charset != null) {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset);
            } else {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()));
            }

            while (true) {
                try {
                    zipEntry = unZippedIn.getNextEntry();
                } catch (IllegalArgumentException iaex) {
                    // Note: 
                    // ZipInputStream documentation doesn't even mention that 
                    // getNextEntry() throws an IllegalArgumentException!
                    // but that's what happens if the file name of the next
                    // entry is not valid in the current CharSet. 
                    //      -- L.A.
                    warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is.";
                    logger.warning(warningMessage);
                    throw new IOException();
                }

                if (zipEntry == null) {
                    break;
                }
                // Note that some zip entries may be directories - we 
                // simply skip them:

                if (!zipEntry.isDirectory()) {
                    if (datafiles.size() > fileNumberLimit) {
                        logger.warning("Zip upload - too many files.");
                        warningMessage = "The number of files in the zip archive is over the limit ("
                                + fileNumberLimit
                                + "); please upload a zip archive with fewer files, if you want them to be ingested "
                                + "as individual DataFiles.";
                        throw new IOException();
                    }

                    String fileEntryName = zipEntry.getName();
                    logger.fine("ZipEntry, file: " + fileEntryName);

                    if (fileEntryName != null && !fileEntryName.equals("")) {

                        String shortName = fileEntryName.replaceFirst("^.*[\\/]", "");

                        // Check if it's a "fake" file - a zip archive entry 
                        // created for a MacOS X filesystem element: (these 
                        // start with "._")
                        if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store")
                                && !"".equals(shortName)) {
                            // OK, this seems like an OK file entry - we'll try 
                            // to read it and create a DataFile with it:

                            DataFile datafile = createSingleDataFile(version, unZippedIn, shortName,
                                    MIME_TYPE_UNDETERMINED_DEFAULT, false);

                            if (!fileEntryName.equals(shortName)) {
                                String categoryName = fileEntryName.replaceFirst("[\\/][^\\/]*$", "");
                                if (!"".equals(categoryName)) {
                                    logger.fine("setting category to " + categoryName);
                                    //datafile.getFileMetadata().setCategory(categoryName.replaceAll("[\\/]", "-"));
                                    datafile.getFileMetadata()
                                            .addCategoryByName(categoryName.replaceAll("[\\/]", "-"));
                                }
                            }

                            if (datafile != null) {
                                // We have created this datafile with the mime type "unknown";
                                // Now that we have it saved in a temporary location, 
                                // let's try and determine its real type:

                                String tempFileName = getFilesTempDirectory() + "/"
                                        + datafile.getStorageIdentifier();

                                try {
                                    recognizedType = FileUtil.determineFileType(new File(tempFileName),
                                            shortName);
                                    logger.fine("File utility recognized unzipped file as " + recognizedType);
                                    if (recognizedType != null && !recognizedType.equals("")) {
                                        datafile.setContentType(recognizedType);
                                    }
                                } catch (Exception ex) {
                                    logger.warning("Failed to run the file utility mime type check on file "
                                            + fileName);
                                }

                                datafiles.add(datafile);
                            }
                        }
                    }
                }
                unZippedIn.closeEntry();

            }

        } catch (IOException ioex) {
            // just clear the datafiles list and let 
            // ingest default to creating a single DataFile out
            // of the unzipped file. 
            logger.warning("Unzipping failed; rolling back to saving the file as is.");
            if (warningMessage == null) {
                warningMessage = "Failed to unzip the file. Saving the file as is.";
            }

            datafiles.clear();
        } finally {
            if (unZippedIn != null) {
                try {
                    unZippedIn.close();
                } catch (Exception zEx) {
                }
            }
        }
        if (datafiles.size() > 0) {
            // link the data files to the dataset/version: 
            Iterator<DataFile> itf = datafiles.iterator();
            while (itf.hasNext()) {
                DataFile datafile = itf.next();
                datafile.setOwner(version.getDataset());
                if (version.getFileMetadatas() == null) {
                    version.setFileMetadatas(new ArrayList());
                }
                version.getFileMetadatas().add(datafile.getFileMetadata());
                datafile.getFileMetadata().setDatasetVersion(version);

                /* TODO!!
                // re-implement this in some way that does not use the 
                // deprecated .getCategory() on FileMeatadata:
                if (datafile.getFileMetadata().getCategory() != null) {
                datafile.getFileMetadata().addCategoryByName(datafile.getFileMetadata().getCategory());
                datafile.getFileMetadata().setCategory(null);
                -- done? see above?
                }
                */
                version.getDataset().getFiles().add(datafile);
            }
            // remove the uploaded zip file: 
            try {
                Files.delete(tempFile);
            } catch (IOException ioex) {
                // do nothing - it's just a temp file.
                logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
            }
            // and return:
            return datafiles;
        }

    } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) {
        // Shape files may have to be split into multiple files, 
        // one zip archive per each complete set of shape files:

        //File rezipFolder = new File(this.getFilesTempDirectory());
        File rezipFolder = this.getShapefileUnzipTempDirectory();

        IngestServiceShapefileHelper shpIngestHelper;
        shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder);

        boolean didProcessWork = shpIngestHelper.processFile();
        if (!(didProcessWork)) {
            logger.severe("Processing of zipped shapefile failed.");
            return null;
        }
        for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) {
            FileInputStream finalFileInputStream = new FileInputStream(finalFile);
            finalType = this.getContentType(finalFile);
            if (finalType == null) {
                logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'");
                continue;
            }
            DataFile new_datafile = createSingleDataFile(version, finalFileInputStream, finalFile.getName(),
                    finalType);
            if (new_datafile != null) {
                datafiles.add(new_datafile);
            } else {
                logger.severe("Could not add part of rezipped shapefile. new_datafile was null: "
                        + finalFile.getName());
            }
            finalFileInputStream.close();

        }

        // Delete the temp directory used for unzipping
        /*
        logger.fine("Delete temp shapefile unzip directory: " + rezipFolder.getAbsolutePath());
        FileUtils.deleteDirectory(rezipFolder);
                
        // Delete rezipped files
        for (File finalFile : shpIngestHelper.getFinalRezippedFiles()){
        if (finalFile.isFile()){
            finalFile.delete();
        }
        }
        */

        if (datafiles.size() > 0) {
            return datafiles;
        } else {
            logger.severe("No files added from directory of rezipped shapefiles");
        }
        return null;

    }

    // Finally, if none of the special cases above were applicable (or 
    // if we were unable to unpack an uploaded file, etc.), we'll just 
    // create and return a single DataFile:
    // (Note that we are passing null for the InputStream; that's because
    // we already have the file saved; we'll just need to rename it, below)

    DataFile datafile = createSingleDataFile(version, null, fileName, finalType);

    if (datafile != null) {
        fileService.generateStorageIdentifier(datafile);
        if (!tempFile.toFile()
                .renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) {
            return null;
        }

        // MD5:
        MD5Checksum md5Checksum = new MD5Checksum();
        try {
            datafile.setmd5(
                    md5Checksum.CalculateMD5(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()));
        } catch (Exception md5ex) {
            logger.warning("Could not calculate MD5 signature for new file " + fileName);
        }

        if (warningMessage != null) {
            createIngestFailureReport(datafile, warningMessage);
            datafile.SetIngestProblem();
        }
        datafiles.add(datafile);

        return datafiles;
    }

    return null;
}