Java tutorial
/* * Copyright 2011, 2012 Delving BV * * Licensed under the EUPL, Version 1.0 or? as soon they * will be approved by the European Commission - subsequent * versions of the EUPL (the "Licence"); * you may not use this work except in compliance with the * Licence. * You may obtain a copy of the Licence at: * * http://ec.europa.eu/idabc/eupl * * Unless required by applicable law or agreed to in * writing, software distributed under the Licence is * distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. * See the Licence for the specific language governing * permissions and limitations under the Licence. */ package eu.delving.metadata; import org.apache.commons.io.FileUtils; import java.io.*; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.zip.GZIPInputStream; /** * This class manages all aspects of using an MD5 hash as prefix in the naming of files in the data set, * so that files are not uploaded repeatedly. * * @author Gerald de Jong <gerald@delving.eu> */ public class Hasher { private static final String SEPARATOR = "__"; private static final int BLOCK_SIZE = 4096; private static final int QUICK_SAMPLES = 8; public static final int QUICK_SAMPLE_SIZE = 1024; private MessageDigest messageDigest; public static String extractFileName(File file) { String fileName = file.getName(); int hashSeparator = fileName.indexOf(SEPARATOR); if (hashSeparator > 0) { fileName = fileName.substring(hashSeparator + 2); } return fileName; } public static String extractHash(File file) { return extractHashFromFileName(file.getName()); } public static String extractHashFromFileName(String fileName) { int hashSeparator = fileName.indexOf(SEPARATOR); if (hashSeparator > 0) { return fileName.substring(0, hashSeparator); } else { return null; } } public static File ensureFileHashed(File file) throws IOException { if (file.getName().contains(SEPARATOR)) { return file; } else { Hasher hasher = new Hasher(); hasher.update(file); File hashedFile = new File(file.getParentFile(), hasher.prefixFileName(file.getName())); if (hashedFile.exists()) FileUtils.deleteQuietly(hashedFile); FileUtils.moveFile(file, hashedFile); return hashedFile; } } public static boolean checkHash(File file) throws IOException { String hash = extractHash(file); Hasher hasher = new Hasher(); hasher.update(file); return hash.equals(hasher.getHashString()); } public static String quickHash(File file) throws IOException { Hasher hasher = new Hasher(); RandomAccessFile raf = new RandomAccessFile(file, "r"); byte[] chunk = new byte[QUICK_SAMPLE_SIZE]; long length = raf.length() - chunk.length; long step = length / QUICK_SAMPLES; for (int walk = 0; walk < QUICK_SAMPLES; walk++) { raf.seek(step * walk); raf.readFully(chunk); hasher.update(chunk, chunk.length); } raf.close(); return hasher.getHashString().substring(4, 14); } public Hasher() { try { this.messageDigest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { throw new RuntimeException("MD5 not available??"); } } public DigestOutputStream createDigestOutputStream(OutputStream outputStream) { messageDigest.reset(); return new DigestOutputStream(outputStream, messageDigest); } public void update(String string) { byte[] bytes = new byte[0]; try { bytes = string.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } update(bytes, bytes.length); } public void update(byte[] buffer, int bytes) { messageDigest.update(buffer, 0, bytes); } public void update(File inputFile) throws IOException { try { InputStream inputStream = new FileInputStream(inputFile); if (inputFile.getName().endsWith(".gz")) { inputStream = new GZIPInputStream(inputStream); } byte[] buffer = new byte[BLOCK_SIZE]; int bytesRead; while (-1 != (bytesRead = inputStream.read(buffer))) { update(buffer, bytesRead); } inputStream.close(); } catch (Exception e) { throw new IOException("Unable to get hash of " + inputFile.getAbsolutePath(), e); } } public byte[] getHash(String value) { try { return messageDigest.digest(value.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } public String getHashString(String value) { try { return toHexadecimal(messageDigest.digest(value.getBytes("UTF-8"))); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } public String getHashString() { return toHexadecimal(messageDigest.digest()); } public String prefixFileName(String fileName) { return getHashString() + SEPARATOR + fileName; } static final String HEXES = "0123456789ABCDEF"; private static String toHexadecimal(byte[] raw) { final StringBuilder hex = new StringBuilder(2 * raw.length); for (final byte b : raw) { hex.append(HEXES.charAt((b & 0xF0) >> 4)).append(HEXES.charAt((b & 0x0F))); } return hex.toString(); } }