Java tutorial
/* File: $Id$ * Revision: $Revision$ * Date: $Date$ * Author: $Author$ * * The Netarchive Suite - Software to harvest and preserve websites * Copyright 2004-2012 The Royal Danish Library, the Danish State and * University Library, the National Library of France and the Austrian * National Library. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 * USA */ package dk.netarkivet.archive.arcrepository.bitpreservation; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import dk.netarkivet.archive.arcrepositoryadmin.AdminData; import dk.netarkivet.archive.arcrepositoryadmin.ArcRepositoryEntry; import dk.netarkivet.archive.arcrepositoryadmin.ReadOnlyAdminData; import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory; import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState; import dk.netarkivet.common.distribute.arcrepository.Replica; import dk.netarkivet.common.distribute.arcrepository.PreservationArcRepositoryClient; import dk.netarkivet.common.exceptions.ArgumentNotValid; import dk.netarkivet.common.exceptions.IOFailure; import dk.netarkivet.common.exceptions.IllegalState; import dk.netarkivet.common.exceptions.NetarkivetException; import dk.netarkivet.common.exceptions.NotImplementedException; import dk.netarkivet.common.exceptions.PermissionDenied; import dk.netarkivet.common.exceptions.UnknownID; import dk.netarkivet.common.utils.CleanupHook; import dk.netarkivet.common.utils.CleanupIF; import dk.netarkivet.common.utils.FileUtils; import dk.netarkivet.common.utils.StringUtils; import dk.netarkivet.common.utils.batch.ChecksumJob; /** * Class handling integrity check of the arcrepository. <p/> This class must * run on the same machine as the arcrepository, as it uses the same admin data * file (read-only). However, it still talks JMS with the arcrepository. * * @deprecated Use the DatabaseBasedActiveBitPreservation instead (define in * the setting: <b>settings.archive.admin.class</b>). */ @Deprecated public class FileBasedActiveBitPreservation implements ActiveBitPreservation, CleanupIF { /** The class log. */ private Log log = LogFactory.getLog(FileBasedActiveBitPreservation.class); /** * When replacing a broken file, the broken file is downloaded and stored in * a temporary directory under Settings.COMMON_TEMP_DIR with this name. * It can then be inspected at your leisure. */ private static final String REMOVED_FILES = "bitpreservation"; /** * The maximum size of logged collections. * This is used either when a subcollection is extracted, or when objects * are concatenated. * Default value = 10. */ private static final int MAX_LIST_SIZE = 10; /** * This should be updated at the entrance of each major use block, to ensure * it is reasonably in sync with the file. We cannot, however, guarantee * total sync, as the file can change at any time. We consider it good * enough that it is updated every time there is user interaction. */ private ReadOnlyAdminData admin; /** * File preservation is done in a singleton, which means that any user using * the file preservation interface will update the same state. * * Nothing breaks by two users simultaneously do bit preservation actions, * but it may have undesirable consequences, such as two users * simultaneously starting checksum jobs of the full archive. */ private static FileBasedActiveBitPreservation instance; /** Hook to close down application. */ private CleanupHook closeHook; /** Initializes a FileBasedActiveBitPreservation instance. */ protected FileBasedActiveBitPreservation() { this.admin = AdminData.getReadOnlyInstance(); this.closeHook = new CleanupHook(this); Runtime.getRuntime().addShutdownHook(closeHook); } /** * Get singleton instance. * * @return the singleton. */ public static synchronized FileBasedActiveBitPreservation getInstance() { if (instance == null) { instance = new FileBasedActiveBitPreservation(); } return instance; } /** * Retrieve the preservation status for the files with the given filenames. * This will ask for a fresh checksum from the bitarchives and admin data. * * @param filenames List of filenames * * @return a map ([filename]-> [FilePreservationState]) of the preservation * status for the given files. * The preservationstate is null, if the file named does not exist * in admin data. * * @throws ArgumentNotValid If the list of filenames is null or contains * a null. */ public Map<String, PreservationState> getPreservationStateMap(String... filenames) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(filenames, "String... filenames"); // check, that the files are not empty strings for (String file : filenames) { ArgumentNotValid.checkNotNullOrEmpty(file, "String file"); } // Start by retrieving the admin status admin.synchronize(); // Temporary datastructures: // adminInfo: A map ([filename]->[ArcRepositoryEntry]) to hold admindata // info. Holds one entry for each of the files // known by admin data. // missingInAdminData: Contains the names of files that admindata just // don't know. Map<String, ArcRepositoryEntry> adminInfo = new HashMap<String, ArcRepositoryEntry>(); Set<String> missingInAdmindata = new HashSet<String>(); for (String filename : filenames) { ArcRepositoryEntry ae = admin.getEntry(filename); if (ae != null) { adminInfo.put(filename, ae); } else { missingInAdmindata.add(filename); } } if (missingInAdmindata.size() > 0) { log.warn("The following " + missingInAdmindata.size() + " files are unknown to admindata: " + StringUtils.conjoin(",", new ArrayList<String>(missingInAdmindata).subList(0, Math.min(missingInAdmindata.size(), MAX_LIST_SIZE)))); } // filepreservationStates: map ([filename] -> [filepreservationstate]) // This is the datastructure returned from this method Map<String, PreservationState> filepreservationStates = new HashMap<String, PreservationState>(); // Phase 1: Add null FilePreservationState entries for the files // absent from admindata. for (String missing : missingInAdmindata) { filepreservationStates.put(missing, (FilePreservationState) null); } // Phase 2: For every filename present in admin data, // construct a map ([replica] -> [list of checksums]). // The resulting map: // map ([filename] -> map ([replica] -> [list of checksums])). // This takes a long time, as two batchjobs will be sent out to // to the bitarchives to compute checksums for the files with these // filenames. Map<String, Map<Replica, List<String>>> checksumMaps = getChecksumMaps(adminInfo.keySet()); // Phase 3: construct FilePreservationState objects for subset of // filenames known by admin data. The rest of the filenames are // represented with a null FilePreservationState object. for (Map.Entry<String, ArcRepositoryEntry> entry : adminInfo.entrySet()) { String filename = entry.getKey(); ArcRepositoryEntry adminFileInfo = entry.getValue(); filepreservationStates.put(filename, new FilePreservationState(filename, adminFileInfo, checksumMaps.get(filename))); } return filepreservationStates; } /** * Get the details of the state of the given file in the bitarchives * and admin data. * @param filename A given file * @return the FilePreservationState for the given file. This will be null, * if the filename is not found in admin data. */ public PreservationState getPreservationState(String filename) { ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); Map<String, PreservationState> filepreservationStates = getPreservationStateMap(filename); return filepreservationStates.get(filename); } /** * Generate a map of checksums for these filenames in the bitarchives ( * map ([filename] -> map ([replica] -> [list of checksums]))). * This takes a long time, as a batchjob will be sent out to * all the bitarchives to compute checksums for the files with these * filenames. * * @param filenames The filenames to get the checksums for. * * @return Map containing the output of checksum jobs from the bitarchives. */ private Map<String, Map<Replica, List<String>>> getChecksumMaps(Set<String> filenames) { //checksummaps: map ([filename] -> map ([replica] // -> [list of checksums])). // This datastructure will contain for each filename the computed // checksums for the file with this filename on all replicas // (bitarchives). Map<String, Map<Replica, List<String>>> checksummaps = new HashMap<String, Map<Replica, List<String>>>(); //Only make one checksum job for each replica for (Replica rep : Replica.getKnown()) { // Get the checksum information from Replica 'rep' as // a map ([filename]->[list of checksums]). Map<String, List<String>> checksums = getChecksums(rep, filenames); log.debug("Adding checksums for replica '" + rep + "' for filenames: " + StringUtils.conjoin(",", filenames, MAX_LIST_SIZE)); for (String filename : filenames) { // Update 'checksummaps' datastructure with the checksums // received from Replica 'rep'. // replicaMap: map ([replica] // -> [list of checksums for one filename]). Map<Replica, List<String>> replicaMap; // Get current map in 'checksummaps' datastructure for filename, // if it exists. Otherwise a new one is created, and // stored. if (checksummaps.containsKey(filename)) { replicaMap = checksummaps.get(filename); } else { replicaMap = new HashMap<Replica, List<String>>(); checksummaps.put(filename, replicaMap); } // Extract the list of checksums for the given filename from // the 'checksums' datastructure. List<String> checksumsForFileOnRep = checksums.get(filename); if (checksumsForFileOnRep == null) { // If no checksums for file was available on replica 'ba' // just add an empty list of checksums. checksumsForFileOnRep = new ArrayList<String>(); } // Add the list of checksums for the given file // on replica 'rep' to datastructure 'replicaMap'. replicaMap.put(rep, checksumsForFileOnRep); } } return checksummaps; } /** * Get the checksum of a list of files in a replica * (map ([filename] -> map ([replica] -> [list of checksums])). * * Note that this method runs a batch job on the bitarchives, and therefore * may take a long time, depending on network delays. * * @param rep The replica to ask for checksums. * @param filenames The names of the files to ask for checksums for. * @return The MD5 checksums of the files, or the empty string if the file * was not in the replica. * @see ChecksumJob#parseLine(String) */ private Map<String, List<String>> getChecksums(Replica rep, Set<String> filenames) { // initialise the resulting map. Map<String, List<String>> res = new HashMap<String, List<String>>(); try { PreservationArcRepositoryClient arcClient = ArcRepositoryClientFactory.getPreservationInstance(); // for each file extract the checksum through a checksum message // and then put it into the resulting map. for (String file : filenames) { // retrieve the checksum from the replica. String checksum = arcClient.getChecksum(rep.getId(), file); // put the checksum into a list, or make empty list if the // checksum was not retrieved. List<String> csList; if (checksum == null || checksum.isEmpty()) { log.warn("The checksum for file '" + file + "' from " + "replica '" + rep + "' was invalid. " + "Empty list returned"); csList = Collections.<String>emptyList(); } else { csList = new ArrayList<String>(); csList.add(checksum); } // put the filename and list into the map. res.put(file, csList); } log.debug("The map from a checksum archive: " + res.toString()); } catch (NetarkivetException e) { // This is not critical. Log and continue. log.warn("The retrieval of checksums from a checksum archive was " + "not successful.", e); } return res; } /** * Get a list of missing files in a given replica. * * @param replica A given replica. * @return A list of missing files in a given replica. * @throws IllegalState if the file with the list cannot be found. * @throws ArgumentNotValid If the replica is null. */ public Iterable<String> getMissingFiles(Replica replica) throws IllegalState, ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); File missingOutput = WorkFiles.getFile(replica, WorkFiles.MISSING_FILES_BA); if (!missingOutput.exists()) { throw new IllegalState("Could not find the file: " + missingOutput.getAbsolutePath()); } return FileUtils.readListFromFile(missingOutput); } /** * This method takes as input the name of a replica for which we wish to * retrieve the list of files, either through a FileListJob or a * GetAllFilenamesMessage. It also reads in the known files in the * arcrepository from the AdminData directory specified in the Setting * DIRS_ARCREPOSITORY_ADMIN. The two file lists are compared and a * subdirectory missingFiles is created with two unsorted files: * 'missingba.txt' containing missing files, ie those registered in the * admin data, but not found in the replica, and 'missingadmindata.txt' * containing extra files, ie. those found in the replica but not in the * arcrepository admin data. * * TODO The second file is never used on the current implementation. * * FIXME: It is unclear if the decision if which files are missing isn't * better suited to be in getMissingFiles, so this method only runs the * batch job. * * @param replica the replica to search for missing files * * @throws ArgumentNotValid If the given directory does not contain a file * filelistOutput/sorted.txt, or the argument * replica is null. * @throws PermissionDenied If the output directory cannot be created. */ public void findMissingFiles(Replica replica) throws ArgumentNotValid, PermissionDenied { ArgumentNotValid.checkNotNull(replica, "Replica replica"); runFileListJob(replica); log.trace("Finding missing files in directory '" + WorkFiles.getPreservationDir(replica) + "'"); admin.synchronize(); // Create set of file names from replica data Set<String> filesInReplica = new HashSet<String>(WorkFiles.getLines(replica, WorkFiles.FILES_ON_BA)); // Get set of files in arcrepository Set<String> arcrepNameSet = admin.getAllFileNames(); // Find difference set 1 (the files missing from the replica). Set<String> extraFilesInAdminData = new HashSet<String>(arcrepNameSet); extraFilesInAdminData.removeAll(filesInReplica); // Log result if (extraFilesInAdminData.size() > 0) { log.warn("The " + extraFilesInAdminData.size() + " files '" + new ArrayList<String>(extraFilesInAdminData).subList(0, Math.min(extraFilesInAdminData.size(), MAX_LIST_SIZE)) + "' are not present in the replica listing in '" + WorkFiles.getPreservationDir(replica).getAbsolutePath() + "'"); } // Write output data WorkFiles.write(replica, WorkFiles.MISSING_FILES_BA, extraFilesInAdminData); // Find difference set 2 (the files missing in admin.data). Set<String> extraFilesInRep = new HashSet<String>(filesInReplica); extraFilesInRep.removeAll(arcrepNameSet); // Log result if (extraFilesInRep.size() > 0) { log.warn("The " + extraFilesInRep.size() + " files '" + new ArrayList<String>(extraFilesInRep).subList(0, Math.min(extraFilesInRep.size(), MAX_LIST_SIZE)) + "' have been found in the replica listing in '" + WorkFiles.getPreservationDir(replica).getAbsolutePath() + "' though they are not known by the " + "system."); } // Write output data WorkFiles.write(replica, WorkFiles.MISSING_FILES_ADMINDATA, extraFilesInRep); log.trace("Finished finding missing files."); } /** * Method to get a list of all files in a given bitarchive. The result is * stored (unsorted) in the area specified by WorkFiles.FILES_ON_BA. * * @param replica the replica where the given bitarchive lies * * @throws PermissionDenied if the output directories cannot be created * @throws IOFailure if there is a problem writing the output file, * or if the job fails for some reason * @throws UnknownID If the replica has an unknown replicaType. */ private void runFileListJob(Replica replica) throws IOFailure, UnknownID, PermissionDenied { // Pick the right directory to output to File batchOutputFile = WorkFiles.getFile(replica, WorkFiles.FILES_ON_BA); log.trace("runFileListJob for replica '" + replica + "', output file '" + batchOutputFile + "'"); // Retrieve a file containing all the filenames of the replica through // a GetAllFilenamesMessage File filenames = ArcRepositoryClientFactory.getPreservationInstance().getAllFilenames(replica.getId()); // copy the list of filenames to the output file. FileUtils.copyFile(filenames, batchOutputFile); } /** * Get a list of corrupt files in a given bitarchive. * * @param bitarchive a bitarchive * * @return a list of wrong files in a given bitarchive. * * @throws IllegalState if the file with the list cannot be found. */ public Iterable<String> getChangedFiles(Replica bitarchive) throws IllegalState { ArgumentNotValid.checkNotNull(bitarchive, "Replica bitarchive"); File wrongFilesOutput = WorkFiles.getFile(bitarchive, WorkFiles.WRONG_FILES); if (!wrongFilesOutput.exists()) { throw new IllegalState("Could not find the file: " + wrongFilesOutput.getAbsolutePath()); } // Create set of file names from bitarchive data return FileUtils.readListFromFile(wrongFilesOutput); } /** * This method finds out which files in a given bitarchive are * misrepresented in the admin data: Either having the wrong checksum or not * being marked as uploaded when it actually is. <p/> It uses the admindata * file from the DIRS_ARCREPOSITORY_ADMIN directory, as well as the files * output by a runChecksumJob. The erroneous files are stored in files. * * FIXME: It is unclear if the decision if which files are changed isn't * better suited to be in getChangedFiles, so this method only runs the * batch job. * * @param replica the bitarchive replica the checksumjob came from * * @throws IOFailure On file or network trouble. * @throws PermissionDenied if the output directory cannot be created * @throws ArgumentNotValid if argument replica is null */ public void findChangedFiles(Replica replica) throws IOFailure, PermissionDenied, ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); runChecksumJob(replica); admin.synchronize(); // Create set of checksums from bitarchive data Set<String> replicaChecksumSet = new HashSet<String>( WorkFiles.getLines(replica, WorkFiles.CHECKSUMS_ON_BA)); // Get set of files in arcrepository Set<String> arcrepChecksumSet = new HashSet<String>(); for (String fileName : admin.getAllFileNames()) { arcrepChecksumSet.add(ChecksumJob.makeLine(fileName, admin.getCheckSum(fileName))); } // Get set of completed files in arcrepository // Note that these files use the format <filename>##<checksum> to // conform to the checksum output. Set<String> arcrepCompletedChecksumSet = new HashSet<String>(); for (String fileName : admin.getAllFileNames(replica, ReplicaStoreState.UPLOAD_COMPLETED)) { arcrepCompletedChecksumSet.add(ChecksumJob.makeLine(fileName, admin.getCheckSum(fileName))); } // Find files where checksums differ Set<String> wrongChecksums = new HashSet<String>(replicaChecksumSet); wrongChecksums.removeAll(arcrepChecksumSet); // Find files where state is wrong Set<String> wrongStates = new HashSet<String>(replicaChecksumSet); wrongStates.removeAll(wrongChecksums); wrongStates.removeAll(arcrepCompletedChecksumSet); // Remove files unknown in admin data (note - these are not ignored, // they will be handled by missing files operations) for (String checksum : new ArrayList<String>(wrongChecksums)) { Map.Entry<String, String> entry = ChecksumJob.parseLine(checksum); if (!admin.hasEntry(entry.getKey())) { wrongChecksums.remove(checksum); wrongStates.remove(checksum); } } // Log result if (wrongChecksums.size() > 0) { log.warn("The " + wrongChecksums.size() + " files '" + new ArrayList<String>(wrongChecksums).subList(0, Math.min(wrongChecksums.size(), MAX_LIST_SIZE)) + "' have wrong checksum in the bitarchive listing in '" + WorkFiles.getPreservationDir(replica).getAbsolutePath() + "'"); } if (wrongStates.size() > 0) { log.warn("The " + wrongStates.size() + " files '" + new ArrayList<String>(wrongStates).subList(0, Math.min(wrongStates.size(), MAX_LIST_SIZE)) + "' have wrong states in the bitarchive listing in '" + WorkFiles.getPreservationDir(replica).getAbsolutePath() + "'"); } // Collect all names of files with the wrong checksum Set<String> wrongChecksumFilenames = new HashSet<String>(); for (String checksum : wrongChecksums) { Map.Entry<String, String> entry = ChecksumJob.parseLine(checksum); wrongChecksumFilenames.add(entry.getKey()); } // Collect all names of files with the wrong state Set<String> wrongStateFilenames = new HashSet<String>(); for (String checksum : wrongStates) { Map.Entry<String, String> entry = ChecksumJob.parseLine(checksum); wrongStateFilenames.add(entry.getKey()); } // Write output data to the files. WorkFiles.write(replica, WorkFiles.WRONG_FILES, wrongChecksumFilenames); WorkFiles.write(replica, WorkFiles.WRONG_STATES, wrongStateFilenames); } /** * Runs a checksum job on if the replica is a bitarchive replica and sends * a GetAllChecksumsMessage if the replica is a checksum replica. Output is * written to file returned by WorkFiles.getChecksumOutputFile(replica). * * @param replica One of the bitarchive replicas. * @throws IOFailure If unable to create output dirs or if unable to * write/read output to files. */ private void runChecksumJob(Replica replica) throws IOFailure { // Create directories for output File outputFile = WorkFiles.getFile(replica, WorkFiles.CHECKSUMS_ON_BA); // Retrieve a file containing the checksums of the replica through a // GetAllChecksumsMessage. File checksumFile = ArcRepositoryClientFactory.getPreservationInstance().getAllChecksums(replica.getId()); // copy the resulting file to the output file. FileUtils.copyFile(checksumFile, outputFile); } /** * Return the number of files found in the replica. If nothing is known * about the replica, -1 is returned. * * @param replica the bitarchive to check * * @return the number of files found in the bitarchive. If nothing is known * about the bitarchive replica, -1 is returned. * @throws ArgumentNotValid If the replica is null. */ public long getNumberOfFiles(Replica replica) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); File unsortedOutput = WorkFiles.getFile(replica, WorkFiles.FILES_ON_BA); if (!unsortedOutput.exists()) { return -1; } return FileUtils.countLines(unsortedOutput); } /** * Get the number of missing files in a given replica. If nothing is * known about the replica, -1 is returned. * * @param replica a given replica. * @return the number of missing files in the given replica. If nothing * is known about the replica, -1 is returned. * @throws ArgumentNotValid If the replica is null. */ public long getNumberOfMissingFiles(Replica replica) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); File missingOutput = WorkFiles.getFile(replica, WorkFiles.MISSING_FILES_BA); if (!missingOutput.exists()) { return -1; } return FileUtils.countLines(missingOutput); } /** * Get the number of wrong files for a replica. If nothing is known * about the replica, -1 is returned. * * @param replica a replica. * @return the number of wrong files for the replica. If nothing is known * about the replica, -1 is returned. * @throws ArgumentNotValid If the replica is null. */ public long getNumberOfChangedFiles(Replica replica) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica bitarchive"); File wrongFileOutput = WorkFiles.getFile(replica, WorkFiles.WRONG_FILES); if (!wrongFileOutput.exists()) { return -1; } return FileUtils.countLines(wrongFileOutput); } /** * Get the date for last time the checksum information was updated for * this replica. * @param replica The replica to check last time for. * @return The date for last check. Will return 1970-01-01 for never. * @throws ArgumentNotValid If the replica is null. */ public Date getDateForChangedFiles(Replica replica) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); return WorkFiles.getLastUpdate(replica, WorkFiles.WRONG_FILES); } /** * Get the date for last time the missing files information was updated for * this replica. * @param replica The replica to check last time for. * @return The date for last check. Will return 1970-01-01 for never. * @throws ArgumentNotValid If the replica is null. */ public Date getDateForMissingFiles(Replica replica) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); return WorkFiles.getLastUpdate(replica, WorkFiles.FILES_ON_BA); } /** * Check that the files we want to restore are indeed missing on the * replica, and present in admin data and the reference bitarchive. * If so, upload missing files from reference replica to this replica. * * @param replica The replica to restore files to * @param filenames The names of the files. * @throws IllegalState If one of the files is unknown * (For all known files, there will be an attempt at udpload) * @throws IOFailure If some file cannot be reestablished. All files * will be attempted, though. * @throws ArgumentNotValid If the replica or the list of filenames are * null. */ public void uploadMissingFiles(Replica replica, String... filenames) throws IOFailure, IllegalState, ArgumentNotValid { ArgumentNotValid.checkNotNull(replica, "Replica replica"); ArgumentNotValid.checkNotNull(filenames, "String... filenames"); // Contains all files that we couldn't reestablish List<String> troubleNames = new ArrayList<String>(); // preservationStates: map [filename]->[filepreservationstate] // Initialized here to contain an entry for each filename in vargargs // 'filenames'. Map<String, PreservationState> preservationStates = getPreservationStateMap(filenames); // For each given filename, try to reestablish it on // Replica 'replica' for (String fn : filenames) { PreservationState fps = preservationStates.get(fn); try { if (fps == null) { throw new IllegalState("No state known about '" + fn + "'"); } if (!fps.isAdminDataOk()) { setAdminDataFailed(fn, replica); admin.synchronize(); fps = getPreservationState(fn); if (fps == null) { throw new IllegalState("No state known about '" + fn + "'"); } } reestablishMissingFile(fn, replica, fps); } catch (Exception e) { log.warn("Trouble reestablishing file '" + fn + "' on replica " + replica.getName(), e); troubleNames.add(fn); } } if (troubleNames.size() > 0) { throw new IOFailure("Could not reestablish all files. The following" + " files were not reestablished: " + troubleNames); } } /** * Reestablish a file missing in a replica. The following pre-conditions * for reestablishing the file are checked before changing anything:<p> * 1) the file is registered correctly in AdminData. <br> * 2) the file is missing in the given replica. <br> * 3) the file is present in another replica, which must be a bitarchive * replica (the reference archive).<br> * 4) admin data and the reference archive agree on the * checksum of the file. * * @param fileName Name of the file to reestablish. * @param damagedReplica Name of the replica missing the file. * @param fps The FilePreservationStatus of the file to fix. * @throws IOFailure On trouble updating the file. */ private void reestablishMissingFile(String fileName, Replica damagedReplica, PreservationState fps) throws IOFailure { log.debug("Reestablishing missing file '" + fileName + "' in replica '" + damagedReplica + "'."); if (!satisfiesMissingFileConditions(fps, damagedReplica, fileName)) { throw new IOFailure( "Unable to reestablish missing file. '" + fileName + "'. " + "It is not in the right state."); } // Retrieve the file from the reference archive (must be a bitarchive) Replica referenceArchive = fps.getReferenceBitarchive(); try { PreservationArcRepositoryClient arcrep = ArcRepositoryClientFactory.getPreservationInstance(); File tmpDir = FileUtils.createUniqueTempDir(FileUtils.getTempDir(), REMOVED_FILES); File missingFile = new File(tmpDir, fileName); arcrep.getFile(fileName, referenceArchive, missingFile); arcrep.store(missingFile); tmpDir.delete(); } catch (IOFailure e) { String errmsg = "Failed to reestablish '" + fileName + "' in '" + damagedReplica.getName() + "' with copy from '" + referenceArchive + "'"; log.warn(errmsg, e); throw new IOFailure(errmsg, e); } log.info("Reestablished " + fileName + " in " + damagedReplica.getName() + " with copy from " + referenceArchive.getName()); FileUtils.removeLineFromFile(fileName, WorkFiles.getFile(damagedReplica, WorkFiles.MISSING_FILES_BA)); FileUtils.appendToFile(WorkFiles.getFile(damagedReplica, WorkFiles.FILES_ON_BA), fileName); } /** * Checks the conditions that must be true before reestablishing a missing * file. Returns true if and only if all of the below are true; returns * false otherwise.<p> * * 1) the file is registered correctly in AdminData.<br/> * 2) the file is missing in the given bitarchive.<br/> * 3) the file is present in another bitarchive (the reference archive). * <br/> * 4) admin data and the reference archive agree on the checksum. * * @param state the status for one file in the bitarchives. * @param damagedReplica the replica where the file is corrupt or missing. * @param fileName the name of the file being considered. * @return true if all conditions are true, false otherwise. */ private boolean satisfiesMissingFileConditions(PreservationState state, Replica damagedReplica, String fileName) { // condition 1 if (!state.isAdminDataOk()) { log.warn("Admin.data is not consistent regarding file '" + fileName + "'"); return false; } // condition 2 if (!state.fileIsMissing(damagedReplica)) { log.warn("File '" + fileName + "' is not missing in bitarchive on replica '" + damagedReplica.getName() + "'."); return false; } // conditions 3 and 4 Replica referenceArchive = state.getReferenceBitarchive(); if (referenceArchive == null) { log.warn("No correct version of file '" + fileName + "' exists in any archive"); return false; } return true; } /** * Calls upon the arcrepository to change the known state for the given * file in one replica. This method uses JMS and blocks until a reply is * sent. * We don't wait for an acknowledgement that admin data indeed has been * updated. * * @param filename The file to change state for * @param rep The replica to change state for the file for. * @throws ArgumentNotValid if arguments are null or empty strings */ private void setAdminDataFailed(String filename, Replica rep) throws ArgumentNotValid { ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); ArgumentNotValid.checkNotNull(rep, "Replica rep"); ArcRepositoryClientFactory.getPreservationInstance().updateAdminData(filename, rep.getId(), ReplicaStoreState.UPLOAD_FAILED); } /** * Check that file checksum is indeed different to admin data and reference * replica. If so, remove missing file and upload it from reference * replica to this replica. * * @param replica The replica to restore file to * @param filename The name of the file. * @param credentials The credentials used to perform this replace operation * @param checksum The expected checksum. * @throws IOFailure if the file cannot be reestablished * @throws PermissionDenied if the file is not in correct state * @throws ArgumentNotValid If the filename, the credentials or the checksum * either are null or contain the empty string, or if the replica is null. */ public void replaceChangedFile(Replica replica, String filename, String credentials, String checksum) throws ArgumentNotValid, IOFailure, PermissionDenied { ArgumentNotValid.checkNotNull(replica, "Replica replica"); ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials"); // Send a correct message to the archive. correctArchiveEntry(replica, filename, checksum, credentials); } /** * Method for correcting a corrupt entry in an archive. This message is * handled different for the different replicas * * @param replica The replica which contains the bad entry. * @param filename The name of the file. * @param checksum The checksum of the bad entry. * @param credentials The credentials for correcting the bad entry. */ private void correctArchiveEntry(Replica replica, String filename, String checksum, String credentials) { // get the preservation state. Map<String, PreservationState> preservationStates = getPreservationStateMap(filename); PreservationState fps = preservationStates.get(filename); // Use the preservation state to find a reference archive (bitarchive). Replica referenceArchive = fps.getReferenceBitarchive(); // Get the arc repository client and a temporary file PreservationArcRepositoryClient arcrepClient = ArcRepositoryClientFactory.getPreservationInstance(); File tmpDir = FileUtils.createUniqueTempDir(FileUtils.getTempDir(), REMOVED_FILES); File missingFile = new File(tmpDir, filename); // retrieve a good copy of the file arcrepClient.getFile(filename, referenceArchive, missingFile); // correct the bad entry in the archive with the retrieved good copy. arcrepClient.correct(replica.getId(), checksum, missingFile, credentials); // cleanup afterwards. tmpDir.delete(); } /** * Return a list of files present in bitarchive but missing in AdminData. * * @return A list of missing files. * @throws NotImplementedException Always, since this will not been * implemented. */ public Iterable<String> getMissingFilesForAdminData() throws NotImplementedException { throw new NotImplementedException("Not to be implemented"); } /** * Return a list of files with wrong checksum or status in admin data. * * @return A list of files with wrong checksum or status. * @throws NotImplementedException Always, since this will not been * implemented. */ public Iterable<String> getChangedFilesForAdminData() throws NotImplementedException { throw new NotImplementedException("Not to be implemented"); } /** * Reestablish admin data to match bitarchive states for files. * * @param filenames The files to reestablish state for. * @throws NotImplementedException Always, since this will not been * implemented. * @throws ArgumentNotValid If the list of filenames are null. */ public void addMissingFilesToAdminData(String... filenames) throws NotImplementedException, ArgumentNotValid { ArgumentNotValid.checkNotNull(filenames, "String... filenames"); //TODO implement method throw new NotImplementedException("Not to be implemented"); } /** * Reestablish admin data to match replica states for file. * * @param filename The file to reestablish state for. * @throws PermissionDenied if the file is not in correct state * @throws ArgumentNotValid If the filename is null or empty. */ public void changeStateForAdminData(String filename) throws PermissionDenied, ArgumentNotValid { ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); admin.synchronize(); PreservationState fps = getPreservationState(filename); String checksum = fps.getReferenceCheckSum(); if (checksum == null || checksum.isEmpty()) { throw new PermissionDenied("No correct checksum for '" + filename + "'"); } if (!admin.getCheckSum(filename).equals(checksum)) { ArcRepositoryClientFactory.getPreservationInstance().updateAdminChecksum(filename, checksum); } for (Replica rep : Replica.getKnown()) { if (fps.getUniqueChecksum(rep).equals(admin.getCheckSum(filename))) { FileUtils.removeLineFromFile(filename, WorkFiles.getFile(rep, WorkFiles.WRONG_FILES)); } } } /** Shut down cleanly. */ public void close() { if (closeHook != null) { Runtime.getRuntime().removeShutdownHook(closeHook); } closeHook = null; cleanup(); } /** @see CleanupIF#cleanup() */ public void cleanup() { // In case a listener was set up, remove it. ArcRepositoryClientFactory.getPreservationInstance().close(); instance = null; } }