Java tutorial
/** See the file "LICENSE" for the full license governing this code. */ package au.org.ands.vocabs.toolkit.utils; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.invoke.MethodHandles; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.glassfish.jersey.uri.UriComponent; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import au.org.ands.vocabs.toolkit.db.TaskUtils; import au.org.ands.vocabs.toolkit.db.VersionUtils; import au.org.ands.vocabs.toolkit.db.VocabularyUtils; import au.org.ands.vocabs.toolkit.db.model.Task; import au.org.ands.vocabs.toolkit.db.model.Version; import au.org.ands.vocabs.toolkit.db.model.Vocabulary; import au.org.ands.vocabs.toolkit.tasks.TaskInfo; /** Utility methods for working with files. */ public final class ToolkitFileUtils { /** Logger for this class. */ private static Logger logger; /** Maximum length of a slug component. All generated slug * components are truncated to this length. */ private static final int MAX_SLUG_COMPONENT_LENGTH = 50; /** Private constructor for utility class. */ private ToolkitFileUtils() { } static { logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); } /** Require the existence of a directory. Create it, if it * does not already exist. * @param dir The full pathname of the required directory. */ public static void requireDirectory(final String dir) { File oDir = new File(dir); if (!oDir.exists()) { oDir.mkdirs(); } } /** Require the existence of a directory, but clean it out if * it already exists. Create it, if it does not already exist. * @param dir The full pathname of the required directory. * @throws IOException If the directory already exists but can * not be cleaned. */ public static void requireEmptyDirectory(final String dir) throws IOException { File oDir = new File(dir); if (!oDir.exists()) { oDir.mkdirs(); } else { try { FileUtils.cleanDirectory(new File(dir)); } catch (IOException e) { logger.error("requireEmptyDirectory failed: ", e); throw e; } } } /** Save data to a file. * @param dirName The full directory name * @param fileName The base name of the file to create * @param format The format to use; a key in * ToolkitConfig.FORMAT_TO_FILEEXT_MAP. * @param data The data to be written * @return The complete, full path to the file. */ public static String saveFile(final String dirName, final String fileName, final String format, final String data) { String fileExtension = ToolkitConfig.FORMAT_TO_FILEEXT_MAP.get(format.toLowerCase()); String filePath = dirName + File.separator + fileName + fileExtension; FileWriter writer = null; try { requireDirectory(dirName); File oFile = new File(filePath); writer = new FileWriter(oFile); writer.write(data); writer.close(); } catch (IOException e) { return "Exception: " + e.toString(); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { return "Exception: " + e.toString(); } } } return filePath; } /** Construct a TaskInfo object based on a task id. * @param taskId The task's task id * @return The TaskInfo object */ public static TaskInfo getTaskInfo(final int taskId) { Task task = TaskUtils.getTaskById(taskId); if (task == null) { logger.error("getTaskInfo: getTaskById returned null; task id:" + taskId); return null; } Vocabulary vocab = VocabularyUtils.getVocabularyById(task.getVocabularyId()); if (vocab == null) { logger.error("getTaskInfo: getVocabularyById returned null; " + "task id:" + taskId + "; vocab id:" + task.getVocabularyId()); return null; } Version version = VersionUtils.getVersionById(task.getVersionId()); if (version == null) { logger.error("getTaskInfo: getVersionById returned null; " + "task id:" + taskId + "; version id:" + task.getVersionId()); return null; } if (!version.getVocabId().equals(task.getVocabularyId())) { logger.error("getTaskInfo: task's vocab id does not match" + " task's version's vocab id; " + "task id:" + taskId + "; vocab id:" + task.getVocabularyId() + "; version's vocab id:" + version.getVocabId()); return null; } if (vocab.getSlug() == null || vocab.getSlug().trim().isEmpty()) { logger.error("getTaskInfo: vocab's slug is empty; " + "task id:" + taskId + "; vocab id:" + task.getVocabularyId()); return null; } if (vocab.getOwner() == null || vocab.getOwner().trim().isEmpty()) { logger.error("getTaskInfo: vocab's owner is empty; " + "task id:" + taskId + "; vocab id:" + task.getVocabularyId()); return null; } if (version.getTitle() == null || version.getTitle().trim().isEmpty()) { logger.error("getTaskInfo: version's title is empty; " + "task id:" + taskId + "; version id:" + task.getVersionId()); return null; } TaskInfo taskInfo = new TaskInfo(task, vocab, version); return taskInfo; } /** Get the full path of the directory used to store all * the files referred to by the task. * @param taskInfo The TaskInfo object representing the task. * @param extraPath An optional additional path component to be added * at the end. If not required, pass in null or an empty string. * @return The full path of the directory used to store the * vocabulary data. */ public static String getTaskOutputPath(final TaskInfo taskInfo, final String extraPath) { // NB: We call makeSlug() on the vocabulary slug, which should // (as of ANDS-Registry-Core commit e365392831ae) // not really be necessary. Path path = Paths.get(ToolkitConfig.DATA_FILES_PATH).resolve(makeSlug(taskInfo.getVocabulary().getOwner())) .resolve(makeSlug(taskInfo.getVocabulary().getSlug())) .resolve(makeSlug(taskInfo.getVersion().getTitle())); if (extraPath != null && (!extraPath.isEmpty())) { path = path.resolve(extraPath); } return path.toString(); } /** Get the full path of the directory used to store all * harvested data referred to by the task. * @param taskInfo The TaskInfo object representing the task. * @return The full path of the directory used to store the * vocabulary data. */ public static String getTaskHarvestOutputPath(final TaskInfo taskInfo) { return getTaskOutputPath(taskInfo, ToolkitConfig.HARVEST_DATA_PATH); } /** Get the full path of (what will be) a new directory used to store * transformed data referred to by the task. This is intended * to be used as a temporary directory during the transform. * If the transform succeeds, call renameTransformTemporaryOutputPath() * to rename this directory to become the harvest directory. * @param taskInfo The TaskInfo object representing the task. * @param transformName The name of the transform being done. This is * used in the generation of the path. * @return The full path of the directory used to store the * transformed data. The directory does not yet exist; it must be * created by the caller. */ public static String getTaskTransformTemporaryOutputPath(final TaskInfo taskInfo, final String transformName) { return getTaskOutputPath(taskInfo, "after_" + transformName); } /** This method is used by transforms that produce new vocabulary * data to replace harvested data. If such a transform succeeds, * call this method. It renames the original harvest directory, and * then renames the temporary directory to become the harvest directory. * @param taskInfo The TaskInfo object representing the task. * @param transformName The name of the transform that has been done. * @return True iff the renaming succeeded. */ public static boolean renameTransformTemporaryOutputPath(final TaskInfo taskInfo, final String transformName) { Path transformOutputPath = Paths.get(getTaskOutputPath(taskInfo, "after_" + transformName)); Path harvestPath = Paths.get(getTaskHarvestOutputPath(taskInfo)); Path harvestPathDestination = Paths.get(getTaskOutputPath(taskInfo, "before_" + transformName)); try { // Remove any previous harvestPathDestination FileUtils.deleteQuietly(harvestPathDestination.toFile()); Files.move(harvestPath, harvestPathDestination); Files.move(transformOutputPath, harvestPath); } catch (IOException e) { logger.error("Exception in renameTransformTemporaryOutputPath", e); return false; } return true; } /** Get the full path of the temporary directory used to store all * harvested data for metadata extraction for a PoolParty vocabulary. * @param projectId The PoolParty projectId. * @return The full path of the directory used to store the * vocabulary data. */ public static String getMetadataOutputPath(final String projectId) { Path path = Paths.get(ToolkitConfig.METADATA_TEMP_FILES_PATH).resolve(makeSlug(projectId)); return path.toString(); } /** Get the full path of the backup directory used to store all * backup data for a project. * @param projectId The project ID. For now, this will be a PoolParty * project ID. * @return The full path of the directory used to store the * vocabulary data. */ public static String getBackupPath(final String projectId) { Path path = Paths.get(ToolkitConfig.BACKUP_FILES_PATH).resolve(makeSlug(projectId)); return path.toString(); } /** Apply slug conventions. In practice, this means * (a) replacing punctuation with hyphens, * (b) replacing whitespace with hyphen, * (c) converting to lowercase, * (d) encoding as a URL, * (e) replacing percents with hyphens, * (f) coalescing multiple consecutive hyphens into one, * (g) removing any leading and trailing hyphens, * (h) trimming the result to a maximum length of * MAX_SLUG_COMPONENT_LENGTH, * (i) removing any remaining trailing hyphen. * @param aString The string that is to be converted. * @return The value of aString with slug conventions applied. */ public static String makeSlug(final String aString) { String slug = StringUtils.strip( UriComponent.encode(aString.replaceAll("\\p{Punct}", "-").replaceAll("\\s", "-").toLowerCase(), UriComponent.Type.PATH_SEGMENT).replaceAll("%", "-").replaceAll("-+", "-"), "-"); return StringUtils.stripEnd(slug.substring(0, Math.min(MAX_SLUG_COMPONENT_LENGTH, slug.length())), "-"); } /** * Get the Sesame repository ID for a vocabulary's version * referred to by the task. * * @param taskInfo * The TaskInfo object representing the task. * @return The repository id for the vocabulary with this version. */ public static String getSesameRepositoryId(final TaskInfo taskInfo) { // As of ANDS-Registry-Core commit e365392831ae, // now use the vocabulary title slug directly from the database. return makeSlug(taskInfo.getVocabulary().getOwner()) + "_" + taskInfo.getVocabulary().getSlug() + "_" + makeSlug(taskInfo.getVersion().getTitle()); } /** * Get the SISSVoc repository ID for a vocabulary's version * referred to by the task. It neither begins nor ends with a slash. * * @param taskInfo * The TaskInfo object representing the task. * @return The repository id for the vocabulary with this version. */ public static String getSISSVocRepositoryPath(final TaskInfo taskInfo) { // As of ANDS-Registry-Core commit e365392831ae, // now use the vocabulary title slug directly from the database. return makeSlug(taskInfo.getVocabulary().getOwner()) + "/" + taskInfo.getVocabulary().getSlug() + "/" + makeSlug(taskInfo.getVersion().getTitle()); } /** Size of buffer to use when writing to a ZIP archive. */ private static final int BUFFER_SIZE = 4096; /** Add a file to a ZIP archive. * @param zos The ZipOutputStream representing the ZIP archive. * @param file The File which is to be added to the ZIP archive. * @return True if adding succeeded. * @throws IOException Any exception when reading/writing data. */ private static boolean zipFile(final ZipOutputStream zos, final File file) throws IOException { if (!file.canRead()) { logger.error("zipFile can not read " + file.getCanonicalPath()); return false; } zos.putNextEntry(new ZipEntry(file.getName())); FileInputStream fis = new FileInputStream(file); byte[] buffer = new byte[BUFFER_SIZE]; int byteCount = 0; while ((byteCount = fis.read(buffer)) != -1) { zos.write(buffer, 0, byteCount); } fis.close(); zos.closeEntry(); return true; } /** Compress the files in the backup folder for a project. * @param projectId The project ID * @throws IOException Any exception when reading/writing data. */ public static void compressBackupFolder(final String projectId) throws IOException { String backupPath = getBackupPath(projectId); if (!Files.isDirectory(Paths.get(backupPath))) { // No such directory, so nothing to do. return; } String projectSlug = makeSlug(projectId); // The name of the ZIP file that does/will contain all // backups for this project. Path zipFilePath = Paths.get(backupPath).resolve(projectSlug + ".zip"); // A temporary ZIP file. Any existing content in the zipFilePath // will be copied into this, followed by any other files in // the directory that have not yet been added. Path tempZipFilePath = Paths.get(backupPath).resolve("temp" + ".zip"); File tempZipFile = tempZipFilePath.toFile(); if (!tempZipFile.exists()) { tempZipFile.createNewFile(); } ZipOutputStream tempZipOut = new ZipOutputStream(new FileOutputStream(tempZipFile)); File existingZipFile = zipFilePath.toFile(); if (existingZipFile.exists()) { ZipFile zipIn = new ZipFile(existingZipFile); Enumeration<? extends ZipEntry> entries = zipIn.entries(); while (entries.hasMoreElements()) { ZipEntry e = entries.nextElement(); logger.debug("compressBackupFolder copying: " + e.getName()); tempZipOut.putNextEntry(e); if (!e.isDirectory()) { copy(zipIn.getInputStream(e), tempZipOut); } tempZipOut.closeEntry(); } zipIn.close(); } File dir = new File(backupPath); File[] files = dir.listFiles(); for (File source : files) { if (!source.getName().toLowerCase().endsWith(".zip")) { logger.debug("compressBackupFolder compressing and " + "deleting file: " + source.toString()); if (zipFile(tempZipOut, source)) { source.delete(); } } } tempZipOut.flush(); tempZipOut.close(); tempZipFile.renameTo(existingZipFile); } /** Size of buffer to use for copying files. */ private static final int COPY_BUFFER_SIZE = 4096 * 1024; /** Copy the contents of an InputStream into an OutputStream. * @param input The content to be copied. * @param output The destination of the content being copied. * @throws IOException Any IOException during read/write. */ public static void copy(final InputStream input, final OutputStream output) throws IOException { int bytesRead; byte[] buffer = new byte[COPY_BUFFER_SIZE]; while ((bytesRead = input.read(buffer)) != -1) { output.write(buffer, 0, bytesRead); } } }