Java tutorial
/** * * Copyright 2012-2013 The MITRE Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * * ************************************************************************** * NOTICE This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** */ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| // // _____ ____ __ __ ///\ __`\ /\ _`\ /\ \__ /\ \__ //\ \ \/\ \ _____ __ ___ \ \,\L\_\ __ __ _\ \ ,_\ __ ___ \ \ ,_\ // \ \ \ \ \ /\ '__`\ /'__`\ /' _ `\ \/_\__ \ /'__`\/\ \/'\\ \ \/ /'__`\ /' _ `\\ \ \/ // \ \ \_\ \\ \ \L\ \/\ __/ /\ \/\ \ /\ \L\ \ /\ __/\/> </ \ \ \_ /\ \L\.\_ /\ \/\ \\ \ \_ // \ \_____\\ \ ,__/\ \____\\ \_\ \_\ \ `\____\\ \____\/\_/\_\ \ \__\\ \__/.\_\\ \_\ \_\\ \__\ // \/_____/ \ \ \/ \/____/ \/_/\/_/ \/_____/ \/____/\//\/_/ \/__/ \/__/\/_/ \/_/\/_/ \/__/ // \ \_\ // \/_/ // // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| // package org.opensextant.util; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.URL; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.commons.io.FilenameUtils; /** * * @author ubaldino */ public class FileUtility { /** * Write file, UTF-8 is default charset here. * * @param buffer * text to save * @param fname * name of file to save * @return status true if file was written * @throws IOException * if file had IO errors. */ public static boolean writeFile(String buffer, String fname) throws IOException { return writeFile(buffer, fname, "UTF-8", false); } /** * @param buffer * text to save * @param fname * name of file to save * @param enc * text encoding * @param append * if you wish to add to existing file. * @return status if written * @throws IOException * if file had IO errors. */ public static boolean writeFile(String buffer, String fname, String enc, boolean append) throws IOException { if (fname == null || enc == null || buffer == null) { throw new IOException("Null values cannot be used to write out file."); } final FileOutputStream file = new FileOutputStream(fname, append); // APPEND final OutputStreamWriter fout = new OutputStreamWriter(file, enc); fout.write(buffer, 0, buffer.length()); fout.flush(); fout.close(); return true; } /** * Caller is responsible for write flush, close, etc. * * @param fname * file path * @param enc * encoding * @param append * true = append data to existing file. * @return stream writer * @throws IOException * if stream could not be opened */ public static OutputStreamWriter getOutputStream(String fname, String enc, boolean append) throws IOException { return new OutputStreamWriter(new FileOutputStream(fname, append), enc); } /** * Caller is responsible for write flush, close, etc. * * @param fname * file name * @param enc * text encoding * @return stream writer * @throws IOException * if stream could not be openeed */ public static OutputStreamWriter getOutputStream(String fname, String enc) throws IOException { return getOutputStream(fname, enc, false); } /** * Getting an input stream from a file. * * @param fname * file name * @param enc * text encoding * @return reader the java.io reader * @throws IOException * if file could not be opened */ public static InputStreamReader getInputStream(String fname, String enc) throws IOException { return new InputStreamReader(new FileInputStream(fname), enc); } public static InputStreamReader getInputStream(File f, String enc) throws IOException { return new InputStreamReader(new FileInputStream(f), enc); } /** * Simple check if a file is typed as a Spreadsheet Tab-delimited .txt files * or .dat files may be valid spreadsheets, however this method does not * look inside files. * * @param filepath * path to file * @return true if file represents one of the various spreadsheet file formats */ public static boolean isSpreadsheet(String filepath) { final String testpath = filepath.toLowerCase(); return (testpath.endsWith(".csv") || testpath.endsWith(".xls") || testpath.endsWith(".xlsx")); } /** * Using Commons getExtension(), determine if the filename represents an image media type. * * @param filepath * path to file * @return if file represents any type of image */ public static boolean isImage(String filepath) { if (filepath == null) { return false; } final String ext = FilenameUtils.getExtension(filepath.toLowerCase()); return imageTypeMap.containsKey(ext); } /** * Checks file extension of given filepath to see if the format is a known video type. * * @param filepath * file name or path * @return true if file is likely an video file format. */ public static boolean isVideo(String filepath) { if (filepath == null) { return false; } final String ext = FilenameUtils.getExtension(filepath.toLowerCase()); return VID_MIMETYPE.equals(filetypeMap.get(ext)); } /** * Checks file extension of given filepath to see if the format is a known audio type. * * @param filepath * file name or path * @return true if file is likely an audio file format. */ public static boolean isAudio(String filepath) { if (filepath == null) { return false; } final String ext = FilenameUtils.getExtension(filepath.toLowerCase()); return AUD_MIMETYPE.equals(filetypeMap.get(ext)); } /** * Check if a file is an archive * * @param filepath * path to file * @return boolean true if file ends with .zip, .tar, .tgz, .gz (includes .tar.gz) */ public static boolean isArchiveFile(String filepath) { final String testpath = filepath.toLowerCase(); return testpath.endsWith(".zip") || testpath.endsWith(".tar") || testpath.endsWith(".tgz") || testpath.endsWith(".gz"); // || testpath.endsWith(".tar.gz"); } /** * Allow checking of a file extention; NO prefix "." * * @param ext * extension to test * @return boolean true if file ends with .zip, .tar, .tgz, .gz (includes .tar.gz) */ public static boolean isArchiveFileType(String ext) { final String x = ext.toLowerCase(); return x.equals("zip") || x.equals("tar") || x.equals("tgz") || x.equals("gz") || x.equals("tar.gz"); } /** * Test is a path or file extension ends with .txt * NPE if null is passed in. * * @param filepath * path or extension, including "." * * @return true if is .txt or .TXT */ public static boolean isPlainText(String filepath) { return filepath.toLowerCase().endsWith(".txt"); } /** * * @param filepath * path to file * @return buffer from file * @throws IOException * on error */ public static String readFile(String filepath) throws IOException { return readFile(new File(filepath), default_encoding); } /** * * @param filepath * path to file * @return buffer from file * @throws IOException * on error */ public static String readFile(File filepath) throws IOException { return readFile(filepath, default_encoding); } /** * */ public final static String default_encoding = "UTF-8"; /** * */ private final static int ioBufferSize = 0x800; /** * Slurps a text file into a string and returns the string. * * @param fileinput * file object * @param enc * text encoding * @return buffer from file * @throws IOException * on error */ public static String readFile(File fileinput, String enc) throws IOException { if (fileinput == null) { return null; } final FileInputStream instream = new FileInputStream(fileinput); final byte[] inputBytes = new byte[instream.available()]; instream.read(inputBytes); instream.close(); return new String(inputBytes, enc); } /** * Given a file get the byte array * * @param fileinput * file object * @return byte array * @throws IOException * on error */ public static byte[] readBytesFrom(File fileinput) throws IOException { if (fileinput == null) { return null; } final FileInputStream instream = new FileInputStream(fileinput); final byte[] inputBytes = new byte[instream.available()]; instream.read(inputBytes); instream.close(); return inputBytes; } /** * * @param filepath * path to file * @return text buffer, UTF-8 decoded * @throws IOException * on error */ public static String readGzipFile(String filepath) throws IOException { if (filepath == null) { return null; } final FileInputStream instream = new FileInputStream(filepath); final GZIPInputStream gzin = new GZIPInputStream(new BufferedInputStream(instream), ioBufferSize); final byte[] inputBytes = new byte[ioBufferSize]; final StringBuilder buf = new StringBuilder(); int readcount = 0; while ((readcount = gzin.read(inputBytes, 0, ioBufferSize)) != -1) { buf.append(new String(inputBytes, 0, readcount, default_encoding)); } instream.close(); gzin.close(); return buf.toString(); } /** * * @param text * buffer to write * @param filepath * path to file * @return status true if file was written * @throws IOException * on error */ public static boolean writeGzipFile(String text, String filepath) throws IOException { if (filepath == null || text == null) { return false; } final FileOutputStream outstream = new FileOutputStream(filepath); final GZIPOutputStream gzout = new GZIPOutputStream(new BufferedOutputStream(outstream), ioBufferSize); gzout.write(text.getBytes(default_encoding)); gzout.flush(); gzout.finish(); gzout.close(); outstream.close(); return true; } /** * Utility for making dirs * * @param testDir * dir to test * @return if directory was created or if it already exists * @throws IOException * if testDir was not created */ public static boolean makeDirectory(File testDir) throws IOException { if (testDir == null) { return false; } if (testDir.isDirectory() && testDir.exists()) { return true; } if (testDir.isFile() && testDir.exists()) { throw new IOException("Cannot overwrite existing file with a directory of the same name."); } return testDir.mkdirs(); } /** * Utility for making dirs * * @param dir * dirPath * @return if directory was created or if it already exists * @throws IOException * if testDir was not created */ public static boolean makeDirectory(String dir) throws IOException { if (dir == null) { return false; } return makeDirectory(new File(dir)); } /** * Java oddity - recursive removal of a directory * * @param directory * dir to remove * @return if all contents and dir itself was removed. * @author T. Allison, MITRE */ public static boolean removeDirectory(File directory) { //taken from http://www.java2s.com/Tutorial/Java/0180__File/Removeadirectoryandallofitscontents.htm if (directory == null) { return false; } if (!directory.exists()) { return true; } if (!directory.isDirectory()) { return false; } final String[] list = directory.list(); // Some JVMs return null for File.list() when the // directory is empty. if (list != null) { for (int i = 0; i < list.length; i++) { final File entry = new File(directory, list[i]); if (entry.isDirectory()) { if (!removeDirectory(entry)) { return false; } } else { if (!entry.delete()) { return false; } } } } return directory.delete(); } /** * Generate some path with a unique date/time stamp * * @param D * directory * @param F * filename * @param Ext * file extension * @return unique path */ public static String generateUniquePath(String D, String F, String Ext) { return D + File.separator + generateUniqueFilename(F, Ext); } /** * Generate some filename with a unique date/time stamp * * @param F * filename * @param Ext * file extension * @return unique filename */ public static String generateUniqueFilename(String F, String Ext) { final SimpleDateFormat fileDateFmt = new SimpleDateFormat("_yyyyMMdd,HHmmss,S"); return F + fileDateFmt.format(new Date()) + Ext; } /** * * @param f * the file in question. * @return the parent File of a given file. */ public static File getParent(File f) { return new File(f.getAbsolutePath()).getParentFile(); } /** * Simple filter * * @param ext * the extension to filter on * @return filename filter */ public static FilenameFilter getFilenameFilter(String ext) { return new AnyFilenameFilter(ext); } /** * get the base name of a file, given any file extension. This will find the * right-most instance of a file extension and return the left hand side of * that as the file basename. * * commons io FilenameUtils says nothing about arbitrarily long file * extensions, e.g., file.a.b.c.txt split into ("file" + "a.b.c.txt") * * @param p * path * @param ext * extension * @return basename of path, less the extension */ public static String getBasename(String p, String ext) { if (p == null) { return null; } final String fn = FilenameUtils.getBaseName(p); if (ext == null || ext.isEmpty()) { return fn; } if (fn.toLowerCase().endsWith(ext)) { final int lastidx = fn.length() - ext.length() - 1; return fn.substring(0, lastidx); } return fn; } /** * On occasion file path may contain unicode chars, however as the is * encoded, it may not be decodable by OS/FS. * * @param path * path to normalize * @return filename */ public static String getValidFilename(String path) { return TextUtils.normalizeUnicode(path); } /** * Another utility to deal with unicode in filenames * * @param fname * name to clean * @return cleaner filenname */ public static String filenameCleaner(String fname) { if (fname == null) { return null; } if (fname.length() == 0) { return null; } final char[] text = fname.toCharArray(); final StringBuilder cleaned_text = new StringBuilder(); for (final char c : text) { cleaned_text.append(normalizeFilenameChar(c)); } return cleaned_text.toString(); } /** * Get a directory that does not conflict with an existing directory. * Returns null if that is not possible within the maxDups. * * @param dir * directory * @param dupeMarker * incrementor * @param maxDups * max incrementor * @return file object * @author T. Allison NOT THREAD SAFE! */ public static File getSafeDir(File dir, String dupeMarker, int maxDups) { if (!dir.exists()) { return dir; } final String base = dir.getName(); for (int i = 1; i < maxDups; i++) { final File tmp = new File(dir.getParentFile(), base + dupeMarker + i); if (!tmp.isDirectory()) { return tmp; } } return null; } /** * @author T. Allison * @param f * file obj * @param dupeMarker * incrementor * @param maxDups * max incrementor * @return new file */ public static File getSafeFile(File f, String dupeMarker, int maxDups) { if (!f.exists()) { return f; } final int suffixInd = f.getName().lastIndexOf("."); final String base = f.getName().substring(0, suffixInd); final String suffix = (suffixInd + 1 <= f.getName().length()) ? f.getName().substring(suffixInd + 1) : ""; for (int i = 1; i < maxDups; i++) { final File tmp = new File(f.getParentFile(), base + dupeMarker + i + "." + suffix); if (!tmp.exists()) { return tmp; } } return null; } /** * Char to use in place of special chars when scrubbing filenames. */ public final static char FILENAME_REPLACE_CHAR = '_'; /** * Tests for valid filename chars for simple normalization * A-Z, a-z, _-, 0-9, * * @param c * character to allow * @return given character or replacement char */ protected static char normalizeFilenameChar(char c) { if (c >= 'A' && c <= 'Z') { return c; } if (c >= 'a' && c <= 'z') { return c; } if (c >= '0' && c <= '9') { return c; } if (c == '_' || c == '-') { return c; } else { return FILENAME_REPLACE_CHAR; } } /** * A way of determining OS * Beware, OS X has Darwin in its full OS name. * * @return if OS is windows-based */ public static boolean isWindowsSystem() { final String val = System.getProperty("os.name"); /** * if (val == null) { //log.warn("Could not verify OS name"); return * false; } else { //log.debug("Operating System is " + val); } */ return (val != null ? val.contains("Windows") : false); } /** * Char used in config files, dict files. */ public static final String COMMENT_CHAR = "#"; /** * A generic word list loader. * * @param resourcepath * classpath location of a resource * @param case_sensitive * if terms are loaded with case preserved or not. * @author ubaldino, MITRE Corp * @return Set containing unique words found in resourcepath * @throws IOException * on error, resource does not exist */ public static Set<String> loadDictionary(String resourcepath, boolean case_sensitive) throws IOException { return loadDict(FileUtility.class.getResourceAsStream(resourcepath), case_sensitive); } /** * A generic word list loader. * * @param resourcepath * classpath location of a resource * @param case_sensitive * if terms are loaded with case preserved or not. * @author ubaldino, MITRE Corp * @return Set containing unique words found in resourcepath * @throws IOException * on error, resource does not exist */ public static Set<String> loadDictionary(URL resourcepath, boolean case_sensitive) throws IOException { return loadDict(resourcepath.openStream(), case_sensitive); } /** * The do all method. Load the dictionary from stream * This closes the stream when done. * * @param io * @param case_sensitive * @return set of phrases from file. * @throws IOException */ public static Set<String> loadDict(InputStream io, boolean case_sensitive) throws IOException { try (BufferedReader reader = new BufferedReader(new InputStreamReader(io, default_encoding))) { final Set<String> dict = new HashSet<String>(); String newline = null; String test = null; while ((newline = reader.readLine()) != null) { test = newline.trim(); if (test.startsWith(COMMENT_CHAR) || test.length() == 0) { continue; } if (case_sensitive) { dict.add(test); } else { dict.add(test.toLowerCase()); } } return dict; } } /** * Load a word list from a file path. * * @param resourcepath * File object to load * @param case_sensitive * if dictionary is loaded with case or not. * @return a Set object containing distinct dictionary terms * @throws IOException * if load fails */ public static Set<String> loadDictionary(File resourcepath, boolean case_sensitive) throws IOException { InputStream io = null; try { io = new FileInputStream(resourcepath); return loadDict(io, case_sensitive); } finally { io.close(); } } // // // Working with file types // // private static final HashMap<String, String> filetypeMap = new HashMap<String, String>(); public static final String IMAGE_MIMETYPE = "image"; public static final String DOC_MIMETYPE = "document"; public static final String MESSAGE_MIMETYPE = "message"; public static final String APP_MIMETYPE = "application"; public static final String VID_MIMETYPE = "video"; public static final String AUD_MIMETYPE = "audio"; public static final String FOLDER_MIMETYPE = "folder"; public static final String FEED_MIMETYPE = "feed"; public static final String DATA_MIMETYPE = "data"; public static final String WEBARCHIVE_MIMETYPE = "web archive"; public static final String WEBPAGE_MIMETYPE = "web page"; public static final String SPREADSHEET_MIMETYPE = "spreadsheet"; public static final String NOT_AVAILABLE = "other"; public static final String GIS_MIMETYPE = "GIS data"; private static final HashMap<String, String> imageTypeMap = new HashMap<String, String>(); static { // Image imageTypeMap.put("jpg", IMAGE_MIMETYPE); imageTypeMap.put("jpeg", IMAGE_MIMETYPE); imageTypeMap.put("jp2", IMAGE_MIMETYPE); imageTypeMap.put("jpx", IMAGE_MIMETYPE); imageTypeMap.put("ico", IMAGE_MIMETYPE); imageTypeMap.put("bmp", IMAGE_MIMETYPE); imageTypeMap.put("gif", IMAGE_MIMETYPE); imageTypeMap.put("png", IMAGE_MIMETYPE); imageTypeMap.put("tif", IMAGE_MIMETYPE); imageTypeMap.put("tiff", IMAGE_MIMETYPE); filetypeMap.putAll(imageTypeMap); filetypeMap.put("", NOT_AVAILABLE); // GIS Data filetypeMap.put("gdb", GIS_MIMETYPE); filetypeMap.put("shp", GIS_MIMETYPE); filetypeMap.put("kml", GIS_MIMETYPE); filetypeMap.put("kmz", GIS_MIMETYPE); // Data filetypeMap.put("dat", DATA_MIMETYPE); filetypeMap.put("xml", DATA_MIMETYPE); filetypeMap.put("rdf", DATA_MIMETYPE); // Archive filetypeMap.put("mht", WEBARCHIVE_MIMETYPE); filetypeMap.put("mhtml", WEBARCHIVE_MIMETYPE); filetypeMap.put("csv", SPREADSHEET_MIMETYPE); filetypeMap.put("xls", SPREADSHEET_MIMETYPE); filetypeMap.put("xlsx", SPREADSHEET_MIMETYPE); filetypeMap.put("htm", WEBPAGE_MIMETYPE); filetypeMap.put("html", WEBPAGE_MIMETYPE); // Docs filetypeMap.put("odf", DOC_MIMETYPE); filetypeMap.put("doc", DOC_MIMETYPE); filetypeMap.put("ppt", DOC_MIMETYPE); filetypeMap.put("pdf", DOC_MIMETYPE); filetypeMap.put("ps", DOC_MIMETYPE); filetypeMap.put("vsd", DOC_MIMETYPE); filetypeMap.put("txt", DOC_MIMETYPE); filetypeMap.put("pptx", DOC_MIMETYPE); filetypeMap.put("docx", DOC_MIMETYPE); // Messages filetypeMap.put("eml", MESSAGE_MIMETYPE); filetypeMap.put("emlx", MESSAGE_MIMETYPE); filetypeMap.put("msg", MESSAGE_MIMETYPE); filetypeMap.put("sms", MESSAGE_MIMETYPE); //Apps filetypeMap.put("do", APP_MIMETYPE); filetypeMap.put("aspx", APP_MIMETYPE); filetypeMap.put("asp", APP_MIMETYPE); filetypeMap.put("axd", APP_MIMETYPE); filetypeMap.put("js", APP_MIMETYPE); filetypeMap.put("php", APP_MIMETYPE); filetypeMap.put("vbs", APP_MIMETYPE); filetypeMap.put("vb", APP_MIMETYPE); filetypeMap.put("vba", APP_MIMETYPE); // Video filetypeMap.put("mov", VID_MIMETYPE); filetypeMap.put("rm", VID_MIMETYPE); filetypeMap.put("wmv", VID_MIMETYPE); filetypeMap.put("mp4", VID_MIMETYPE); filetypeMap.put("mpeg", VID_MIMETYPE); filetypeMap.put("mpg", VID_MIMETYPE); // Audio filetypeMap.put("au", AUD_MIMETYPE); filetypeMap.put("wma", AUD_MIMETYPE); filetypeMap.put("mp3", AUD_MIMETYPE); filetypeMap.put("ra", AUD_MIMETYPE); // Data Feed filetypeMap.put("rss", FEED_MIMETYPE); } /** * Get a plain language name of the type of file. E.g., document, image, * spreadsheet, web page. Rather than the MIME type technical descriptor. * * @param url * item to describe * @return plain language description of the URL */ public static String getFileDescription(String url) { if (url == null) { return NOT_AVAILABLE; } //------------ /* path: http://a/b.htm * */ final String test = url.toLowerCase(); /* path: /a/b/ * */ if (url.endsWith("/") && !test.startsWith("http")) { return FOLDER_MIMETYPE; } final String urlTestExtension = FilenameUtils.getExtension(test); /* * Known file type. */ final String urlMimeType = filetypeMap.get(urlTestExtension); if (urlMimeType != null) { return urlMimeType; } /* * path: .../abc.rss */ if (test.contains("rss")) { return FEED_MIMETYPE; } if (test.startsWith("http:") || test.startsWith("https:")) { return WEBPAGE_MIMETYPE; } /* * path: /some/default/path */ if (url.contains("/")) { return FOLDER_MIMETYPE; } /* * Give up. */ return NOT_AVAILABLE; } /** * Check if path or URL is a webpage. This is helpful for looking at found URLs * in unstructured data. * * @param link * a URL * @return true if link looks like a URL (ie., if it starts with http: or https:) */ public static boolean isWebURL(String link) { if (link == null) { return false; } String test = link.toLowerCase(); if (test.startsWith("http:") || test.startsWith("https:")) { return true; } return false; } }