List of usage examples for java.util.zip ZipInputStream closeEntry
public void closeEntry() throws IOException
From source file:edu.harvard.iq.dataverse.ingest.IngestServiceBean.java
public List<DataFile> createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType) throws IOException { List<DataFile> datafiles = new ArrayList<DataFile>(); String warningMessage = null; // save the file, in the temporary location for now: Path tempFile = null;/*from ww w. j a v a 2 s. co m*/ if (getFilesTempDirectory() != null) { tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); // "temporary" location is the key here; this is why we are not using // the DataStore framework for this - the assumption is that // temp files will always be stored on the local filesystem. // -- L.A. Jul. 2014 logger.fine("Will attempt to save the file as: " + tempFile.toString()); Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); } else { throw new IOException("Temp directory is not configured."); } logger.fine("mime type supplied: " + suppliedContentType); // Let's try our own utilities (Jhove, etc.) to determine the file type // of the uploaded file. (We may already have a mime type supplied for this // file - maybe the type that the browser recognized on upload; or, if // it's a harvest, maybe the remote server has already given us the type // for this file... with our own type utility we may or may not do better // than the type supplied: // -- L.A. String recognizedType = null; String finalType = null; try { recognizedType = FileUtil.determineFileType(tempFile.toFile(), fileName); logger.fine("File utility recognized the file as " + recognizedType); if (recognizedType != null && !recognizedType.equals("")) { // is it any better than the type that was supplied to us, // if any? // This is not as trivial a task as one might expect... // We may need a list of "good" mime types, that should always // be chosen over other choices available. Maybe it should // even be a weighed list... as in, "application/foo" should // be chosen over "application/foo-with-bells-and-whistles". // For now the logic will be as follows: // // 1. If the contentType supplied (by the browser, most likely) // is some form of "unknown", we always discard it in favor of // whatever our own utilities have determined; // 2. We should NEVER trust the browser when it comes to the // following "ingestable" types: Stata, SPSS, R; // 2a. We are willing to TRUST the browser when it comes to // the CSV and XSLX ingestable types. // 3. We should ALWAYS trust our utilities when it comes to // ingestable types. if (suppliedContentType == null || suppliedContentType.equals("") || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) || (ingestableAsTabular(suppliedContentType) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || ingestableAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equals(MIME_TYPE_ZIP)) { finalType = recognizedType; } } } catch (Exception ex) { logger.warning("Failed to run the file utility mime type check on file " + fileName); } if (finalType == null) { finalType = (suppliedContentType == null || suppliedContentType.equals("")) ? MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; } // A few special cases: // if this is a gzipped FITS file, we'll uncompress it, and ingest it as // a regular FITS file: if (finalType.equals("application/fits-gzipped")) { InputStream uncompressedIn = null; String finalFileName = fileName; // if the file name had the ".gz" extension, remove it, // since we are going to uncompress it: if (fileName != null && fileName.matches(".*\\.gz$")) { finalFileName = fileName.replaceAll("\\.gz$", ""); } DataFile datafile = null; try { uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile())); datafile = createSingleDataFile(version, uncompressedIn, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT); } catch (IOException ioex) { datafile = null; } finally { if (uncompressedIn != null) { try { uncompressedIn.close(); } catch (IOException e) { } } } // If we were able to produce an uncompressed file, we'll use it // to create and return a final DataFile; if not, we're not going // to do anything - and then a new DataFile will be created further // down, from the original, uncompressed file. if (datafile != null) { // remove the compressed temp file: try { tempFile.toFile().delete(); } catch (SecurityException ex) { // (this is very non-fatal) logger.warning("Failed to delete temporary file " + tempFile.toString()); } datafiles.add(datafile); return datafiles; } // If it's a ZIP file, we are going to unpack it and create multiple // DataFile objects from its contents: } else if (finalType.equals("application/zip")) { ZipInputStream unZippedIn = null; ZipEntry zipEntry = null; int fileNumberLimit = systemConfig.getZipUploadFilesLimit(); try { Charset charset = null; /* TODO: (?) We may want to investigate somehow letting the user specify the charset for the filenames in the zip file... - otherwise, ZipInputStream bails out if it encounteres a file name that's not valid in the current charest (i.e., UTF-8, in our case). It would be a bit trickier than what we're doing for SPSS tabular ingests - with the lang. encoding pulldown menu - because this encoding needs to be specified *before* we upload and attempt to unzip the file. -- L.A. 4.0 beta12 logger.info("default charset is "+Charset.defaultCharset().name()); if (Charset.isSupported("US-ASCII")) { logger.info("charset US-ASCII is supported."); charset = Charset.forName("US-ASCII"); if (charset != null) { logger.info("was able to obtain charset for US-ASCII"); } } */ if (charset != null) { unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); } else { unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); } while (true) { try { zipEntry = unZippedIn.getNextEntry(); } catch (IllegalArgumentException iaex) { // Note: // ZipInputStream documentation doesn't even mention that // getNextEntry() throws an IllegalArgumentException! // but that's what happens if the file name of the next // entry is not valid in the current CharSet. // -- L.A. warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; logger.warning(warningMessage); throw new IOException(); } if (zipEntry == null) { break; } // Note that some zip entries may be directories - we // simply skip them: if (!zipEntry.isDirectory()) { if (datafiles.size() > fileNumberLimit) { logger.warning("Zip upload - too many files."); warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + "); please upload a zip archive with fewer files, if you want them to be ingested " + "as individual DataFiles."; throw new IOException(); } String fileEntryName = zipEntry.getName(); logger.fine("ZipEntry, file: " + fileEntryName); if (fileEntryName != null && !fileEntryName.equals("")) { String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); // Check if it's a "fake" file - a zip archive entry // created for a MacOS X filesystem element: (these // start with "._") if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { // OK, this seems like an OK file entry - we'll try // to read it and create a DataFile with it: DataFile datafile = createSingleDataFile(version, unZippedIn, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, false); if (!fileEntryName.equals(shortName)) { String categoryName = fileEntryName.replaceFirst("[\\/][^\\/]*$", ""); if (!"".equals(categoryName)) { logger.fine("setting category to " + categoryName); //datafile.getFileMetadata().setCategory(categoryName.replaceAll("[\\/]", "-")); datafile.getFileMetadata() .addCategoryByName(categoryName.replaceAll("[\\/]", "-")); } } if (datafile != null) { // We have created this datafile with the mime type "unknown"; // Now that we have it saved in a temporary location, // let's try and determine its real type: String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); try { recognizedType = FileUtil.determineFileType(new File(tempFileName), shortName); logger.fine("File utility recognized unzipped file as " + recognizedType); if (recognizedType != null && !recognizedType.equals("")) { datafile.setContentType(recognizedType); } } catch (Exception ex) { logger.warning("Failed to run the file utility mime type check on file " + fileName); } datafiles.add(datafile); } } } } unZippedIn.closeEntry(); } } catch (IOException ioex) { // just clear the datafiles list and let // ingest default to creating a single DataFile out // of the unzipped file. logger.warning("Unzipping failed; rolling back to saving the file as is."); if (warningMessage == null) { warningMessage = "Failed to unzip the file. Saving the file as is."; } datafiles.clear(); } finally { if (unZippedIn != null) { try { unZippedIn.close(); } catch (Exception zEx) { } } } if (datafiles.size() > 0) { // link the data files to the dataset/version: Iterator<DataFile> itf = datafiles.iterator(); while (itf.hasNext()) { DataFile datafile = itf.next(); datafile.setOwner(version.getDataset()); if (version.getFileMetadatas() == null) { version.setFileMetadatas(new ArrayList()); } version.getFileMetadatas().add(datafile.getFileMetadata()); datafile.getFileMetadata().setDatasetVersion(version); /* TODO!! // re-implement this in some way that does not use the // deprecated .getCategory() on FileMeatadata: if (datafile.getFileMetadata().getCategory() != null) { datafile.getFileMetadata().addCategoryByName(datafile.getFileMetadata().getCategory()); datafile.getFileMetadata().setCategory(null); -- done? see above? } */ version.getDataset().getFiles().add(datafile); } // remove the uploaded zip file: try { Files.delete(tempFile); } catch (IOException ioex) { // do nothing - it's just a temp file. logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); } // and return: return datafiles; } } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { // Shape files may have to be split into multiple files, // one zip archive per each complete set of shape files: //File rezipFolder = new File(this.getFilesTempDirectory()); File rezipFolder = this.getShapefileUnzipTempDirectory(); IngestServiceShapefileHelper shpIngestHelper; shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder); boolean didProcessWork = shpIngestHelper.processFile(); if (!(didProcessWork)) { logger.severe("Processing of zipped shapefile failed."); return null; } for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) { FileInputStream finalFileInputStream = new FileInputStream(finalFile); finalType = this.getContentType(finalFile); if (finalType == null) { logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'"); continue; } DataFile new_datafile = createSingleDataFile(version, finalFileInputStream, finalFile.getName(), finalType); if (new_datafile != null) { datafiles.add(new_datafile); } else { logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName()); } finalFileInputStream.close(); } // Delete the temp directory used for unzipping /* logger.fine("Delete temp shapefile unzip directory: " + rezipFolder.getAbsolutePath()); FileUtils.deleteDirectory(rezipFolder); // Delete rezipped files for (File finalFile : shpIngestHelper.getFinalRezippedFiles()){ if (finalFile.isFile()){ finalFile.delete(); } } */ if (datafiles.size() > 0) { return datafiles; } else { logger.severe("No files added from directory of rezipped shapefiles"); } return null; } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just // create and return a single DataFile: // (Note that we are passing null for the InputStream; that's because // we already have the file saved; we'll just need to rename it, below) DataFile datafile = createSingleDataFile(version, null, fileName, finalType); if (datafile != null) { fileService.generateStorageIdentifier(datafile); if (!tempFile.toFile() .renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) { return null; } // MD5: MD5Checksum md5Checksum = new MD5Checksum(); try { datafile.setmd5( md5Checksum.CalculateMD5(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier())); } catch (Exception md5ex) { logger.warning("Could not calculate MD5 signature for new file " + fileName); } if (warningMessage != null) { createIngestFailureReport(datafile, warningMessage); datafile.SetIngestProblem(); } datafiles.add(datafile); return datafiles; } return null; }