Java tutorial
/* * Files.java * * Copyright (c) 1995-2012, The University of Sheffield. See the file * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * $Id$ */ package gate.util; import gate.Gate; import gate.corpora.DocumentXmlUtils; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; /** Some utilities for use with Files and with resources. * <P> * <B>Note</B> that there is a terminology conflict between the use * of "resources" here and <TT>gate.Resource</TT> and its inheritors. * <P> * Java "resources" are files that live on the CLASSPATH or in a Jar * file that are <I>not</I> <TT>.class</TT> files. For example: a * <TT>.gif</TT> file that is used by a GUI, or one of the XML files * used for testing GATE's document format facilities. This class * allows you to access these files in various ways (as streams, as * byte arrays, etc.). * <P> * GATE resources are components (Java Beans) that provide all of the * natural language processing capabilities of a GATE-based system, and * the language data that such systems analsyse and produce. For * example: parsers, lexicons, generators, corpora. * <P> * Where we say "resource" in this class we mean Java resource; elsewhere * in the system we almost always mean GATE resource. */ public class Files { /** Debug flag */ private static final boolean DEBUG = false; /** Used to generate temporary resources names*/ static long resourceIndex = 0; /**Where on the classpath the gate resources are to be found*/ protected static final String resourcePath = "/gate/resources"; /**Gets the path for the gate resources within the classpath*/ public static String getResourcePath() { return resourcePath; } /** It returns the last component in a file path. * It takes E.g: d:/tmp/file.txt and returns file.txt */ public static String getLastPathComponent(String path) { if (path == null || path.length() == 0) return ""; //we should look both for "/" and "\" as on windows the file separator is"\" //but a path coming from an URL will be separated by "/" int index = path.lastIndexOf('/'); if (index == -1) index = path.lastIndexOf('\\'); if (index == -1) return path; else return path.substring(index + 1); }// getLastPathComponent() /** Get a string representing the contents of a text file. */ public static String getString(String fileName) throws IOException { return getString(new File(fileName)); } // getString(fileName) /** Get a string representing the contents of a text file. */ public static String getString(File textFile) throws IOException { FileInputStream fis = new FileInputStream(textFile); int len = (int) textFile.length(); byte[] textBytes = new byte[len]; fis.read(textBytes, 0, len); fis.close(); return new String(textBytes); } // getString(File) /** Get a byte array representing the contents of a binary file. */ public static byte[] getByteArray(File binaryFile) throws IOException { FileInputStream fis = new FileInputStream(binaryFile); int len = (int) binaryFile.length(); byte[] bytes = new byte[len]; fis.read(bytes, 0, len); fis.close(); return bytes; } // getByteArray(File) /** Get a resource from the GATE ClassLoader as a String. * @param resourceName The resource to input. */ public static String getResourceAsString(String resourceName) throws IOException { return getResourceAsString(resourceName, null); } /** Get a resource from the GATE ClassLoader as a String. * @param encoding The encoding of the reader used to input the file * (may be null in which case the default encoding is used). * @param resourceName The resource to input. */ public static String getResourceAsString(String resourceName, String encoding) throws IOException { InputStream resourceStream = getResourceAsStream(resourceName); if (resourceStream == null) return null; BufferedReader resourceReader; if (encoding == null) { resourceReader = new BomStrippingInputStreamReader(resourceStream); } else { resourceReader = new BomStrippingInputStreamReader(resourceStream, encoding); } StringBuffer resourceBuffer = new StringBuffer(); int i; int charsRead = 0; final int size = 1024; char[] charArray = new char[size]; while ((charsRead = resourceReader.read(charArray, 0, size)) != -1) resourceBuffer.append(charArray, 0, charsRead); while ((i = resourceReader.read()) != -1) resourceBuffer.append((char) i); resourceReader.close(); return resourceBuffer.toString(); } // getResourceAsString(String) /** Get a resource from the GATE resources directory as a String. * The resource name should be relative to <code>resourcePath</code> which * is equal with <TT>gate/resources</TT>; e.g. * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>, * this method should be passed the name <TT>jape/Test11.jape</TT>. */ public static String getGateResourceAsString(String resourceName) throws IOException { InputStream resourceStream = getGateResourceAsStream(resourceName); if (resourceStream == null) throw new IOException("No such resource on classpath: " + resourceName); try { return IOUtils.toString(resourceStream); } finally { resourceStream.close(); } } // getGateResourceAsString(String) /** * Writes a temporary file into the default temporary directory, * form an InputStream a unique ID is generated and associated automaticaly * with the file name... */ public static File writeTempFile(InputStream contentStream) throws IOException { File resourceFile = null; FileOutputStream resourceFileOutputStream = null; try { // create a temporary file name resourceFile = File.createTempFile("gateResource", ".tmp"); resourceFileOutputStream = new FileOutputStream(resourceFile); resourceFile.deleteOnExit(); if (contentStream == null) return resourceFile; int bytesRead = 0; final int readSize = 1024; byte[] bytes = new byte[readSize]; while ((bytesRead = contentStream.read(bytes, 0, readSize)) != -1) resourceFileOutputStream.write(bytes, 0, bytesRead); } finally { IOUtils.closeQuietly(resourceFileOutputStream); IOUtils.closeQuietly(contentStream); } return resourceFile; }// writeTempFile() /** * Writes aString into a temporary file located inside * the default temporary directory defined by JVM, using the specific * anEncoding. * An unique ID is generated and associated automaticaly with the file name. * @param aString the String to be written. If is null then the file will be * empty. * @param anEncoding the encoding to be used. If is null then the default * encoding will be used. * @return the tmp file containing the string. */ public static File writeTempFile(String aString, String anEncoding) throws UnsupportedEncodingException, IOException { File resourceFile = null; OutputStreamWriter writer = null; // Create a temporary file name resourceFile = File.createTempFile("gateResource", ".tmp"); resourceFile.deleteOnExit(); if (aString == null) return resourceFile; // Prepare the writer if (anEncoding == null) { // Use default encoding writer = new OutputStreamWriter(new FileOutputStream(resourceFile)); } else { // Use the specified encoding writer = new OutputStreamWriter(new FileOutputStream(resourceFile), anEncoding); } // End if // This Action is added only when a gate.Document is created. // So, is for sure that the resource is a gate.Document writer.write(aString); writer.flush(); writer.close(); return resourceFile; }// writeTempFile() /** * Writes aString into a temporary file located inside * the default temporary directory defined by JVM, using the default * encoding. * An unique ID is generated and associated automaticaly with the file name. * @param aString the String to be written. If is null then the file will be * empty. * @return the tmp file containing the string. */ public static File writeTempFile(String aString) throws IOException { return writeTempFile(aString, null); }// writeTempFile() /** Get a resource from the GATE ClassLoader as a byte array. */ public static byte[] getResourceAsByteArray(String resourceName) throws IOException, IndexOutOfBoundsException, ArrayStoreException { InputStream resourceInputStream = getResourceAsStream(resourceName); BufferedInputStream resourceStream = new BufferedInputStream(resourceInputStream); byte b; final int bufSize = 1024; byte[] buf = new byte[bufSize]; int i = 0; // get the whole resource into buf (expanding the array as needed) while ((b = (byte) resourceStream.read()) != -1) { if (i == buf.length) { byte[] newBuf = new byte[buf.length * 2]; System.arraycopy(buf, 0, newBuf, 0, i); buf = newBuf; } buf[i++] = b; } // close the resource stream resourceStream.close(); // copy the contents of buf to an array of the correct size byte[] bytes = new byte[i]; // copy from buf to bytes System.arraycopy(buf, 0, bytes, 0, i); return bytes; } // getResourceAsByteArray(String) /** Get a resource from the GATE resources directory as a byte array. * The resource name should be relative to <code>resourcePath</code> which * is equal with <TT>gate/resources</TT>; e.g. * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>, * this method should be passed the name <TT>jape/Test11.jape</TT>. */ public static byte[] getGateResourceAsByteArray(String resourceName) throws IOException, IndexOutOfBoundsException, ArrayStoreException { InputStream resourceInputStream = getGateResourceAsStream(resourceName); BufferedInputStream resourceStream = new BufferedInputStream(resourceInputStream); byte b; final int bufSize = 1024; byte[] buf = new byte[bufSize]; int i = 0; // get the whole resource into buf (expanding the array as needed) while ((b = (byte) resourceStream.read()) != -1) { if (i == buf.length) { byte[] newBuf = new byte[buf.length * 2]; System.arraycopy(buf, 0, newBuf, 0, i); buf = newBuf; } buf[i++] = b; } // close the resource stream resourceStream.close(); // copy the contents of buf to an array of the correct size byte[] bytes = new byte[i]; // copy from buf to bytes System.arraycopy(buf, 0, bytes, 0, i); return bytes; } // getResourceGateAsByteArray(String) /** Get a resource from the GATE ClassLoader as an InputStream. */ public static InputStream getResourceAsStream(String resourceName) throws IOException { // Strip any leading '/' if (resourceName.charAt(0) == '/') { resourceName = resourceName.substring(1); } ClassLoader gcl = Gate.getClassLoader(); if (gcl == null) { // if the GATE ClassLoader has not been initialised yet (i.e. this // method was called before Gate.init) then fall back to the current // classloader return Files.class.getClassLoader().getResourceAsStream(resourceName); } else { // if we can, get the resource through the GATE ClassLoader to allow // loading of resources from plugin JARs as well as gate.jar return gcl.getResourceAsStream(resourceName); } //return ClassLoader.getSystemResourceAsStream(resourceName); } // getResourceAsStream(String) /** Get a resource from the GATE resources directory as an InputStream. * The resource name should be relative to <code>resourcePath<code> which * is equal with <TT>gate/resources</TT>; e.g. * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>, * this method should be passed the name <TT>jape/Test11.jape</TT>. */ public static InputStream getGateResourceAsStream(String resourceName) throws IOException { if (resourceName.startsWith("/") || resourceName.startsWith("\\")) return getResourceAsStream(resourcePath + resourceName); else return getResourceAsStream(resourcePath + "/" + resourceName); } // getResourceAsStream(String) /** * Get a resource from the GATE ClassLoader. The return value is a * {@link java.net.URL} that can be used to retrieve the contents of the * resource. */ public static URL getResource(String resourceName) { // Strip any leading '/' if (resourceName.charAt(0) == '/') { resourceName = resourceName.substring(1); } ClassLoader gcl = Gate.getClassLoader(); if (gcl == null) { // if the GATE ClassLoader has not been initialised yet (i.e. this // method was called before Gate.init) then fall back to the current // classloader return Files.class.getClassLoader().getResource(resourceName); } else { // if we can, get the resource through the GATE ClassLoader to allow // loading of resources from plugin JARs as well as gate.jar return gcl.getResource(resourceName); } } /** * Get a resource from the GATE resources directory. The return value is a * {@link java.net.URL} that can be used to retrieve the contents of the * resource. * The resource name should be relative to <code>resourcePath<code> which * is equal with <TT>gate/resources</TT>; e.g. * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>, * this method should be passed the name <TT>jape/Test11.jape</TT>. */ public static URL getGateResource(String resourceName) { if (resourceName.startsWith("/") || resourceName.startsWith("\\")) return getResource(resourcePath + resourceName); else return getResource(resourcePath + "/" + resourceName); } /** * This method takes a regular expression and a directory name and returns * the set of Files that match the pattern under that directory. * * @param regex regular expression path that begins with <code>pathFile</code> * @param pathFile directory path where to search for files * @return set of file paths under <code>pathFile</code> that matches * <code>regex</code> */ public static Set<String> Find(String regex, String pathFile) { Set<String> regexfinal = new HashSet<String>(); String[] tab; File file = null; //open a file try { file = new File(pathFile); } catch (NullPointerException npe) { npe.printStackTrace(Err.getPrintWriter()); } Pattern pattern = Pattern.compile("^" + regex); if (file.isDirectory()) { tab = file.list(); for (int i = 0; i <= tab.length - 1; i++) { String finalPath = pathFile + "/" + tab[i]; Matcher matcher = pattern.matcher(finalPath); if (matcher.matches()) { regexfinal.add(finalPath); } } } else { if (file.isFile()) { Matcher matcher = pattern.matcher(pathFile); if (matcher.matches()) { regexfinal.add(pathFile); } } } return regexfinal; } //find /** Recursively remove a directory <B>even if it contains other files * or directories</B>. Returns true when the directory and all its * contents are successfully removed, else false. */ public static boolean rmdir(File dir) { if (dir == null || !dir.isDirectory()) // only delete directories return false; // list all the members of the dir String[] members = dir.list(); // return value indicating success or failure boolean succeeded = true; // for each member, if is dir then recursively delete; if file then delete for (int i = 0; i < members.length; i++) { File member = new File(dir, members[i]); if (member.isFile()) { if (!member.delete()) succeeded = false; } else { if (!Files.rmdir(member)) succeeded = false; } } // delete the directory itself dir.delete(); // return status value return succeeded; } // rmdir(File) /** * This method updates an XML element with a new set of attributes. * If the element is not found the XML is unchanged. The attributes * keys and values must all be Strings. * * @param xml A stream of the XML data. * @param elementName The name of the element to update. * @param newAttrs The new attributes to place on the element. * @return A string of the whole XML source, with the element updated. */ public static String updateXmlElement(BufferedReader xml, String elementName, Map<String, String> newAttrs) throws IOException { String line = null; String nl = Strings.getNl(); StringBuffer newXml = new StringBuffer(); // read the whole source while ((line = xml.readLine()) != null) { newXml.append(line); newXml.append(nl); } // find the location of the element int start = newXml.toString().indexOf("<" + elementName); if (start == -1) return newXml.toString(); int end = newXml.toString().indexOf(">", start); if (end == -1) return newXml.toString(); // check if the old element is empty (ends in "/>") or not boolean isEmpty = false; if (newXml.toString().charAt(end - 1) == '/') isEmpty = true; // create the new element string with the new attributes StringBuffer newElement = new StringBuffer(); newElement.append("<"); newElement.append(elementName); // add in the new attributes Iterator<Map.Entry<String, String>> iter = newAttrs.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, String> entry = iter.next(); String key = entry.getKey(); String value = entry.getValue(); newElement.append(" "); newElement.append(DocumentXmlUtils.combinedNormalisation(key)); newElement.append("=\""); newElement.append(DocumentXmlUtils.combinedNormalisation(value)); newElement.append("\"" + nl); } // terminate the element if (isEmpty) newElement.append("/"); newElement.append(">"); // replace the old string newXml.replace(start, end + 1, newElement.toString()); return newXml.toString(); } // updateXmlElement(Reader...) /** * This method updates an XML element in an XML file * with a new set of attributes. If the element is not found the XML * file is unchanged. The attributes keys and values must all be Strings. * We first try to read the file using UTF-8 encoding. If an error occurs we * fall back to the platform default encoding (for backwards-compatibility * reasons) and try again. The file is written back in UTF-8, with an * updated encoding declaration. * * @param xmlFile An XML file. * @param elementName The name of the element to update. * @param newAttrs The new attributes to place on the element. * @return A string of the whole XML file, with the element updated (the * file is also overwritten). */ public static String updateXmlElement(File xmlFile, String elementName, Map<String, String> newAttrs) throws IOException { String newXml = null; BufferedReader utfFileReader = null; BufferedReader platformFileReader = null; Charset utfCharset = Charset.forName("UTF-8"); try { FileInputStream fis = new FileInputStream(xmlFile); // try reading with UTF-8, make sure any errors throw an exception CharsetDecoder decoder = utfCharset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPORT) .onMalformedInput(CodingErrorAction.REPORT); utfFileReader = new BomStrippingInputStreamReader(fis, decoder); newXml = updateXmlElement(utfFileReader, elementName, newAttrs); } catch (CharacterCodingException cce) { // File not readable as UTF-8, so try the platform default encoding if (utfFileReader != null) { utfFileReader.close(); utfFileReader = null; } if (DEBUG) { Err.prln("updateXmlElement: could not read " + xmlFile + " as UTF-8, " + "trying platform default"); } platformFileReader = new BufferedReader(new FileReader(xmlFile)); newXml = updateXmlElement(platformFileReader, elementName, newAttrs); } finally { if (utfFileReader != null) { utfFileReader.close(); } if (platformFileReader != null) { platformFileReader.close(); } } // write the updated file in UTF-8, fixing the encoding declaration newXml = newXml.replaceFirst("\\A<\\?xml (.*)encoding=(?:\"[^\"]*\"|'[^']*')", "<?xml $1encoding=\"UTF-8\""); FileOutputStream fos = new FileOutputStream(xmlFile); OutputStreamWriter fileWriter = new OutputStreamWriter(fos, utfCharset); fileWriter.write(newXml); fileWriter.close(); return newXml; } // updateXmlElement(File...) /** * Convert a file: URL to a <code>java.io.File</code>. First tries to parse * the URL's toExternalForm as a URI and create the File object from that * URI. If this fails, just uses the path part of the URL. This handles * URLs that contain spaces or other unusual characters, both as literals and * when encoded as (e.g.) %20. * * @exception IllegalArgumentException if the URL is not convertable into a * File. */ public static File fileFromURL(URL theURL) throws IllegalArgumentException { try { URI uri = new URI(theURL.toExternalForm()); return new File(uri); } catch (URISyntaxException use) { try { URI uri = new URI(theURL.getProtocol(), null, theURL.getPath(), null, null); return new File(uri); } catch (URISyntaxException use2) { throw new IllegalArgumentException("Cannot convert " + theURL + " to a file path"); } } } /** * Same as {@link java.io.File#listFiles(java.io.FileFilter)} * but recursive on directories. * @param directory file path to start the search, will not be include * in the results * @param filter filter apply to the search * @return an array of files (including directories) contained inside * <code>directory</code>. The array will be empty if the directory is * empty. Returns null if this abstract pathname does not denote a * directory, or if an I/O error occurs. */ public static File[] listFilesRecursively(File directory, FileFilter filter) { List<File> filesList = new ArrayList<File>(); File[] filesRootArray = directory.listFiles(filter); if (filesRootArray == null) { return null; } for (File file : filesRootArray) { filesList.add(file); if (file.isDirectory()) { File[] filesDeepArray = listFilesRecursively(file, filter); if (filesDeepArray == null) { return null; } filesList.addAll(Arrays.asList(filesDeepArray)); } } return filesList.toArray(new File[filesList.size()]); } } // class Files