List of usage examples for java.util.zip ZipFile getInputStream
public InputStream getInputStream(ZipEntry entry) throws IOException
From source file:de.tudarmstadt.ukp.clarin.webanno.webapp.remoteapi.RemoteApiController.java
/** * Create a new project.//from w ww. j av a2 s . c o m * * To test, use the Linux "curl" command. * * curl -v -F 'file=@test.zip' -F 'name=Test' -F 'filetype=tcf' * 'http://USERNAME:PASSWORD@localhost:8080/de.tudarmstadt.ukp.clarin.webanno.webapp/api/project * ' * * @param aName * the name of the project to create. * @param aFileType * the type of the files contained in the ZIP. The possible file types are configured * in the formats.properties configuration file of WebAnno. * @param aFile * a ZIP file containing the project data. * @throws Exception if there was en error. */ @RequestMapping(value = "/project", method = RequestMethod.POST, consumes = MediaType.MULTIPART_FORM_DATA_VALUE) public @ResponseStatus(HttpStatus.NO_CONTENT) void createProject(@RequestParam("file") MultipartFile aFile, @RequestParam("name") String aName, @RequestParam("filetype") String aFileType) throws Exception { LOG.info("Creating project [" + aName + "]"); if (!ZipUtils.isZipStream(aFile.getInputStream())) { throw new InvalidFileNameException("", "is an invalid Zip file"); } // Get current user String username = SecurityContextHolder.getContext().getAuthentication().getName(); User user = userRepository.get(username); Project project = null; // Configure project if (!projectRepository.existsProject(aName)) { project = new Project(); project.setName(aName); // Create the project and initialize tags projectRepository.createProject(project, user); annotationService.initializeTypesForProject(project, user, new String[] {}, new String[] {}, new String[] {}, new String[] {}, new String[] {}, new String[] {}, new String[] {}, new String[] {}); // Create permission for this user ProjectPermission permission = new ProjectPermission(); permission.setLevel(PermissionLevel.ADMIN); permission.setProject(project); permission.setUser(username); projectRepository.createProjectPermission(permission); permission = new ProjectPermission(); permission.setLevel(PermissionLevel.USER); permission.setProject(project); permission.setUser(username); projectRepository.createProjectPermission(permission); } // Existing project else { throw new IOException("The project with name [" + aName + "] exists"); } // Iterate through all the files in the ZIP // If the current filename does not start with "." and is in the root folder of the ZIP, // import it as a source document File zimpFile = File.createTempFile(aFile.getOriginalFilename(), ".zip"); aFile.transferTo(zimpFile); ZipFile zip = new ZipFile(zimpFile); for (Enumeration<?> zipEnumerate = zip.entries(); zipEnumerate.hasMoreElements();) { // // Get ZipEntry which is a file or a directory // ZipEntry entry = (ZipEntry) zipEnumerate.nextElement(); // If it is the zip name, ignore it if ((FilenameUtils.removeExtension(aFile.getOriginalFilename()) + "/").equals(entry.toString())) { continue; } // IF the current filename is META-INF/webanno/source-meta-data.properties store it // as // project meta data else if (entry.toString().replace("/", "") .equals((META_INF + "webanno/source-meta-data.properties").replace("/", ""))) { InputStream zipStream = zip.getInputStream(entry); projectRepository.savePropertiesFile(project, zipStream, entry.toString()); } // File not in the Zip's root folder OR not // META-INF/webanno/source-meta-data.properties else if (StringUtils.countMatches(entry.toString(), "/") > 1) { continue; } // If the current filename does not start with "." and is in the root folder of the // ZIP, import it as a source document else if (!FilenameUtils.getExtension(entry.toString()).equals("") && !FilenameUtils.getName(entry.toString()).equals(".")) { uploadSourceDocument(zip, entry, project, user, aFileType); } } LOG.info("Successfully created project [" + aName + "] for user [" + username + "]"); }
From source file:org.nebulaframework.deployment.classloading.GridArchiveClassLoader.java
/** * Internal method which does the search for class inside * the {@code GridArchive}. First attempts locate the file directly in * the {@code .nar} file. If this fails, it then looks in the * {@code .jar} libraries available in the {@code .nar} * file, and attempts to locate the class inside each of the {@code .jar} * file.//from ww w. j ava 2 s .co m * * @param fileName expected filename of Class file to be loaded * * @return the {@code byte[]} for the class file * * @throws IOException if IO errors occur during operation * @throws ClassNotFoundException if unable to locate the class */ protected byte[] findInArchive(String fileName) throws IOException, ClassNotFoundException { ZipFile archive = new ZipFile(archiveFile); ZipEntry entry = archive.getEntry(fileName); if (entry == null) { // Unable to find file in archive try { // Attempt to look in libraries Enumeration<? extends ZipEntry> enumeration = archive.entries(); while (enumeration.hasMoreElements()) { ZipEntry zipEntry = enumeration.nextElement(); if (zipEntry.getName().contains(GridArchive.NEBULA_INF) && zipEntry.getName().endsWith(".jar")) { // Look in Jar File byte[] bytes = findInJarStream(archive.getInputStream(zipEntry), fileName); // If Found if (bytes != null) { log.debug("[GridArchiveClassLoader] found class in JAR Library " + fileName); return bytes; } } } } catch (Exception e) { log.warn("[[GridArchiveClassLoader] Exception " + "while attempting class loading", e); } // Cannot Find Class throw new ClassNotFoundException("No such file as " + fileName); } else { // Entry not null, Found Class log.debug("[GridArchiveClassLoader] found class at " + fileName); // Get byte[] and return return IOSupport.readBytes(archive.getInputStream(entry)); } }
From source file:com.azurenight.maven.TroposphereMojo.java
private Collection<File> extractAllFiles(File outputDirectory, ZipFile ja, Enumeration<JarEntry> en) throws MojoExecutionException { List<File> files = new ArrayList<File>(); while (en.hasMoreElements()) { JarEntry el = en.nextElement(); if (!el.isDirectory()) { File destFile = new File(outputDirectory, el.getName()); if (OVERRIDE || !destFile.exists()) { destFile.getParentFile().mkdirs(); try { FileOutputStream fo = new FileOutputStream(destFile); IOUtils.copy(ja.getInputStream(el), fo); fo.close();/*from w ww . ja va 2s . com*/ } catch (IOException e) { throw new MojoExecutionException( "extracting " + el.getName() + " from jython artifact jar failed", e); } } files.add(destFile); } } return files; }
From source file:io.frictionlessdata.datapackage.Package.java
/** * Load from String representation of JSON object or from a zip file path. * @param jsonStringSource//from w w w. j a v a 2 s . c om * @param strict * @throws IOException * @throws DataPackageException * @throws ValidationException */ public Package(String jsonStringSource, boolean strict) throws IOException, DataPackageException, ValidationException { this.strictValidation = strict; // If zip file is given. if (jsonStringSource.toLowerCase().endsWith(".zip")) { // Read in memory the file inside the zip. ZipFile zipFile = new ZipFile(jsonStringSource); ZipEntry entry = zipFile.getEntry(DATAPACKAGE_FILENAME); // Throw exception if expected datapackage.json file not found. if (entry == null) { throw new DataPackageException( "The zip file does not contain the expected file: " + DATAPACKAGE_FILENAME); } // Read the datapackage.json file inside the zip try (InputStream is = zipFile.getInputStream(entry)) { StringBuilder out = new StringBuilder(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { String line = null; while ((line = reader.readLine()) != null) { out.append(line); } } // Create and set the JSONObject for the datapackage.json that was read from inside the zip file. this.setJson(new JSONObject(out.toString())); // Validate. this.validate(); } } else { // Create and set the JSONObject fpr the String representation of desriptor JSON object. this.setJson(new JSONObject(jsonStringSource)); // If String representation of desriptor JSON object is provided. this.validate(); } }
From source file:org.apache.taverna.scufl2.ucfpackage.TestUCFPackage.java
@Test public void manifestMimetype() throws Exception { UCFPackage container = new UCFPackage(); container.setPackageMediaType(UCFPackage.MIME_WORKFLOW_BUNDLE); container.save(tmpFile);/*from ww w . j a va2 s . c o m*/ ZipFile zipFile = new ZipFile(tmpFile); ZipEntry manifestEntry = zipFile.getEntry("META-INF/manifest.xml"); InputStream manifestStream = zipFile.getInputStream(manifestEntry); //System.out.println(IOUtils.toString(manifestStream, "UTF-8")); /* <?xml version="1.0" encoding="UTF-8"?> <manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"> </manifest:manifest> */ Document doc = parseXml(manifestStream); assertEquals(MANIFEST_NS, doc.getRootElement().getNamespace()); assertEquals("manifest", doc.getRootElement().getNamespacePrefix()); assertEquals("manifest:manifest", doc.getRootElement().getQualifiedName()); assertNull(xpathSelectElement(doc.getRootElement(), "/manifest:manifest/*")); }
From source file:edu.stanford.epadd.launcher.Splash.java
private static void copyResourcesRecursively(String sourceDirectory, String writeDirectory) throws IOException { final URL dirURL = ePADD.class.getClassLoader().getResource(sourceDirectory); //final String path = sourceDirectory.substring( 1 ); if ((dirURL != null) && dirURL.getProtocol().equals("jar")) { final JarURLConnection jarConnection = (JarURLConnection) dirURL.openConnection(); //System.out.println( "jarConnection is " + jarConnection ); final ZipFile jar = jarConnection.getJarFile(); final Enumeration<? extends ZipEntry> entries = jar.entries(); // gives ALL entries in jar while (entries.hasMoreElements()) { final ZipEntry entry = entries.nextElement(); final String name = entry.getName(); // System.out.println( name ); if (!name.startsWith(sourceDirectory)) { // entry in wrong subdir -- don't copy continue; }//from w w w . java 2 s . com final String entryTail = name.substring(sourceDirectory.length()); final File f = new File(writeDirectory + File.separator + entryTail); if (entry.isDirectory()) { // if its a directory, create it final boolean bMade = f.mkdir(); System.out.println((bMade ? " creating " : " unable to create ") + name); } else { System.out.println(" writing " + name); final InputStream is = jar.getInputStream(entry); final OutputStream os = new BufferedOutputStream(new FileOutputStream(f)); final byte buffer[] = new byte[4096]; int readCount; // write contents of 'is' to 'os' while ((readCount = is.read(buffer)) > 0) { os.write(buffer, 0, readCount); } os.close(); is.close(); } } } else if (dirURL == null) { throw new IllegalStateException("can't find " + sourceDirectory + " on the classpath"); } else { // not a "jar" protocol URL throw new IllegalStateException("don't know how to handle extracting from " + dirURL); } }
From source file:org.apache.taverna.scufl2.ucfpackage.TestUCFPackage.java
@Test public void fileEntryFromBytes() throws Exception { UCFPackage container = new UCFPackage(); container.setPackageMediaType(UCFPackage.MIME_WORKFLOW_BUNDLE); byte[] bytes = makeBytes(1024); container.addResource(bytes, "binary", UCFPackage.MIME_BINARY); container.save(tmpFile);//from ww w . j a va2s.c o m ZipFile zipFile = new ZipFile(tmpFile); ZipEntry manifestEntry = zipFile.getEntry("META-INF/manifest.xml"); InputStream manifestStream = zipFile.getInputStream(manifestEntry); //System.out.println(IOUtils.toString(manifestStream, "UTF-8")); /* <?xml version="1.0" encoding="UTF-8"?> <manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"> <manifest:file-entry manifest:media-type="application/octet-stream" manifest:full-path="binary" manifest:size="1024"/> </manifest:manifest> */ Document doc = parseXml(manifestStream); assertEquals(MANIFEST_NS, doc.getRootElement().getNamespace()); assertEquals("manifest", doc.getRootElement().getNamespacePrefix()); assertEquals("manifest:manifest", doc.getRootElement().getQualifiedName()); assertXpathEquals("application/octet-stream", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:media-type"); assertXpathEquals("binary", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:full-path"); assertXpathEquals("1024", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:size"); InputStream io = zipFile.getInputStream(zipFile.getEntry("binary")); assertArrayEquals(bytes, IOUtils.toByteArray(io)); }
From source file:org.apache.taverna.scufl2.ucfpackage.TestUCFPackage.java
@Test public void fileEntryFromString() throws Exception { UCFPackage container = new UCFPackage(); container.setPackageMediaType(UCFPackage.MIME_WORKFLOW_BUNDLE); container.addResource("Hello there ", "helloworld.txt", "text/plain"); container.save(tmpFile);/* ww w. ja va2 s .c om*/ ZipFile zipFile = new ZipFile(tmpFile); ZipEntry manifestEntry = zipFile.getEntry("META-INF/manifest.xml"); InputStream manifestStream = zipFile.getInputStream(manifestEntry); //System.out.println(IOUtils.toString(manifestStream, "UTF-8")); /* <?xml version="1.0" encoding="UTF-8"?> <manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"> <manifest:file-entry manifest:media-type="text/plain" manifest:full-path="helloworld.txt" manifest:size="18"/> </manifest:manifest> */ Document doc = parseXml(manifestStream); assertEquals(MANIFEST_NS, doc.getRootElement().getNamespace()); assertEquals("manifest", doc.getRootElement().getNamespacePrefix()); assertEquals("manifest:manifest", doc.getRootElement().getQualifiedName()); assertXpathEquals("text/plain", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:media-type"); assertXpathEquals("helloworld.txt", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:full-path"); /* * Different platforms encode UTF8 in different ways * assertXpathEquals("18", doc.getRootElement(), "/manifest:manifest/manifest:file-entry/@manifest:size"); */ InputStream io = zipFile.getInputStream(zipFile.getEntry("helloworld.txt")); assertEquals("Hello there ", IOUtils.toString(io, "UTF-8")); }
From source file:org.apache.taverna.scufl2.ucfpackage.TestUCFPackage.java
@Test public void setRootfileSaved() throws Exception { UCFPackage container = new UCFPackage(); container.setPackageMediaType(UCFPackage.MIME_WORKFLOW_BUNDLE); container.addResource("Hello there", "helloworld.txt", "text/plain"); container.addResource("Soup for everyone", "soup.txt", "text/plain"); container.setRootFile("helloworld.txt"); container.save(tmpFile);/* w w w . jav a 2 s. co m*/ ZipFile zipFile = new ZipFile(tmpFile); ZipEntry manifestEntry = zipFile.getEntry("META-INF/container.xml"); InputStream manifestStream = zipFile.getInputStream(manifestEntry); //System.out.println(IOUtils.toString(manifestStream, "UTF-8")); /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" xmlns:ns2="http://www.w3.org/2000/09/xmldsig#" xmlns:ns3="http://www.w3.org/2001/04/xmlenc#"> <rootFiles> <rootFile full-path="helloworld.txt" media-type="text/plain"/> </rootFiles> </container> */ Document doc = parseXml(manifestStream); assertEquals(CONTAINER_NS, doc.getRootElement().getNamespace()); // Should work, but might still fail on Windows due to // TAVERNA-920. We'll avoid testing this to not break the build. // assertEquals("", doc.getRootElement().getNamespacePrefix()); // assertEquals("container", doc.getRootElement().getQualifiedName()); assertEquals("container", doc.getRootElement().getName()); assertXpathEquals("helloworld.txt", doc.getRootElement(), "/c:container/c:rootFiles/c:rootFile/@full-path"); assertXpathEquals("text/plain", doc.getRootElement(), "/c:container/c:rootFiles/c:rootFile/@media-type"); }
From source file:it.polito.tellmefirst.web.rest.clients.ClientEpub.java
private HashMap<String, String> parseEpub(File file) throws IOException, TMFVisibleException { LOG.debug("[parseEpub] - BEGIN"); ZipFile fi = new ZipFile(file); for (Enumeration e = fi.entries(); e.hasMoreElements();) { ZipEntry entry = (ZipEntry) e.nextElement(); if (entry.getName().endsWith("ncx")) { InputStream tocMaybeDirty = fi.getInputStream(entry); Scanner scanner = new Scanner(tocMaybeDirty, "UTF-8").useDelimiter("\\A"); String theString = scanner.hasNext() ? scanner.next() : ""; tocMaybeDirty.close();/* w w w. ja v a2 s . c o m*/ scanner.close(); String res = theString.replaceAll(">[\\s]*?<", "><"); InputStream toc = new ByteArrayInputStream(res.getBytes(StandardCharsets.UTF_8)); try { DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = dBuilder.parse(toc); toc.close(); if (doc.hasChildNodes()) { findNavMap(doc.getChildNodes()); } } catch (Exception ex) { LOG.error("Unable to navigate the TOC"); } removeEmptyTOC(epub); //search anchors in links and split Set set = epub.entrySet(); Iterator i = set.iterator(); while (i.hasNext()) { Map.Entry me = (Map.Entry) i.next(); if (me.getValue().toString().contains("#")) { String[] parts = me.getValue().toString().split("#"); String anchor = parts[1]; epub.put(me.getKey().toString(), anchor); } } } if (entry.getName().endsWith("opf")) { //manage files because order is important InputStream content = fi.getInputStream(entry); Scanner scanner = new Scanner(content, "UTF-8").useDelimiter("\\A"); String contentString = scanner.hasNext() ? scanner.next() : ""; content.close(); scanner.close(); String filenameRegex = "href=\"(.*.htm(|l))\".*media-type=\"application/xhtml"; Pattern pattern = Pattern.compile(filenameRegex); Matcher matcher = pattern.matcher(contentString); Integer count = 0; while (matcher.find()) { files.put(count, matcher.group(1)); count++; } } if (entry.getName().endsWith("html") || entry.getName().endsWith("htm") || entry.getName().endsWith("xhtml")) { InputStream htmlFile = fi.getInputStream(entry); Scanner scanner = new Scanner(htmlFile, "UTF-8").useDelimiter("\\A"); String htmlString = scanner.hasNext() ? scanner.next() : ""; String regex1 = htmlString.replaceAll("^[^_]*?<body>", ""); //remove head String regex2 = regex1.replaceAll("</body>.*$", ""); //remove tail String htmlCleaned = regex2.replaceAll("<a.*?/>", ""); //anchor with one tag String[] bits = entry.getName().split("/"); String fileName = bits[bits.length - 1]; htmls.put(fileName, htmlCleaned); } } fi.close(); Integer i; for (i = 0; i < files.size(); i++) { stringBuilder.append("<p id=\"" + files.get(i) + "\"></p>"); // "anchor" also the heads of each files stringBuilder.append(htmls.get(files.get(i))); } String htmlAll = stringBuilder.toString(); /* We have all needed files, start to split For each link -> made a chunk Start from the bottom */ Metadata metadata = new Metadata(); Parser parser = new HtmlParser(); ListIterator<Map.Entry<String, String>> iter = new ArrayList<>(epub.entrySet()).listIterator(epub.size()); while (iter.hasPrevious()) { Map.Entry<String, String> me = iter.previous(); try { ContentHandler contenthandler = new BodyContentHandler(10 * htmlAll.length()); Scanner sc = new Scanner(htmlAll); sc.useDelimiter("id=\"" + me.getValue().toString() + "\">"); htmlAll = sc.next(); InputStream stream = new ByteArrayInputStream(sc.next().getBytes(StandardCharsets.UTF_8)); parser.parse(stream, contenthandler, metadata, new ParseContext()); String chapterText = contenthandler.toString().toLowerCase().replaceAll("\\d+.*", ""); String chapterTextWithoutNo = chapterText.replaceAll("\\d+.*", ""); // Remove the Project Gutenberg meta information from the text String chapterTextCleaned = chapterTextWithoutNo.split("end of the project gutenberg ebook")[0]; epub.put(me.getKey().toString(), chapterTextCleaned); } catch (Exception ex) { LOG.error("Unable to parse content for index: " + me.getKey() + ", this chapter will be deleted"); removeChapter(epub, me.getKey().toString()); } } /* I remove the Project Gutenberg license chapter from the Map, because it is useless for the classification and it generates a Lucene Exception in case of the Italian language (the license text is always in English). You can use this method in order to remove each chapter that is useless for classifying your Epub document. */ removeChapter(epub, "A Word from Project Gutenberg"); removeEmptyItems(epub); //If the Epub file has a bad structure, I try to use the basic Epub extractor of Tika. if (epub.size() == 0) { LOG.info("The Epub file has a bad structure. Try to use the Tika extractor"); epub.put("All text", autoParseAll(file)); } removeEmptyItems(epub); if (epub.size() == 0) { LOG.error("Unable to extract text from this Epub"); throw new TMFVisibleException("Unable to extract any text from this Epub."); } removeDownloadedFile(TEMPORARY_PATH); LOG.debug("[parseEpub] - END"); return epub; }