List of usage examples for java.util.zip CRC32 getValue
@Override public long getValue()
From source file:org.commoncrawl.util.MultiFileMergeUtils.java
public static void main(String[] args) { Path testPath = new Path(args[0]); LOG.info("Initializing Hadoop Config"); Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("mapred-site.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("commoncrawl-default.xml"); conf.addResource("commoncrawl-site.xml"); conf.setClass(MultiFileInputReader.MULTIFILE_COMPARATOR_CLASS, URLFPV2RawComparator.class, RawComparator.class); conf.setClass(MultiFileInputReader.MULTIFILE_KEY_CLASS, URLFPV2.class, WritableComparable.class); CrawlEnvironment.setHadoopConfig(conf); CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/"); try {//from ww w . java2s . c o m FileSystem fs = CrawlEnvironment.getDefaultFileSystem(); Vector<Path> paths = new Vector<Path>(); paths.add(new Path(testPath, "part-00000")); // paths.add(new Path(testPath,"part-00000")); paths.add(new Path(testPath, "part-00001")); TreeSet<URLFPV2> directReadSet = new TreeSet<URLFPV2>(); TreeSet<URLFPV2> multiFileReadSet = new TreeSet<URLFPV2>(); MultiFileInputReader<URLFPV2> inputReader = new MultiFileInputReader<URLFPV2>(fs, paths, conf); KeyAndValueData<URLFPV2> keyValueData = null; int multiFileKeyCount = 0; while ((keyValueData = inputReader.readNextItem()) != null) { LOG.info("Got Key Domain:" + keyValueData._keyObject.getDomainHash() + " URLHash:" + keyValueData._keyObject.getUrlHash() + " Item Count:" + keyValueData._values.size() + " Path[0]:" + keyValueData._values.get(0).source); if (keyValueData._values.size() > 1) { LOG.error("Got more than one item"); for (int i = 0; i < keyValueData._values.size(); ++i) { CRC32 crc = new CRC32(); crc.update(keyValueData._keyData.getData(), 0, keyValueData._keyData.getLength()); LOG.error("Item at[" + i + "] Path:" + keyValueData._values.get(i).source + " CRC:" + crc.getValue()); } } if (multiFileKeyCount++ < 1000) multiFileReadSet.add((URLFPV2) keyValueData._keyObject.clone()); } inputReader.close(); addFirstNFPItemsToSet(fs, new Path(testPath, "part-00000"), conf, directReadSet, 1000); addFirstNFPItemsToSet(fs, new Path(testPath, "part-00001"), conf, directReadSet, 1000); Iterator<URLFPV2> directReadIterator = directReadSet.iterator(); Iterator<URLFPV2> multiFileReadIterator = multiFileReadSet.iterator(); for (int i = 0; i < 1000; ++i) { URLFPV2 directReadFP = directReadIterator.next(); URLFPV2 multiFileReadFP = multiFileReadIterator.next(); if (directReadFP.compareTo(multiFileReadFP) != 0) { LOG.info("Mismatch at Index:" + i); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } catch (CloneNotSupportedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:Main.java
public static long getStringCRC(String localData) { if (localData == null) return 0; CRC32 crc = new CRC32(); crc.update(localData.getBytes());/*ww w. ja va 2 s .c om*/ return crc.getValue(); }
From source file:com.esri.geoportal.harvester.api.base.SimpleScrambler.java
/** * Encodes string./*from www . java2 s .co m*/ * @param txt string to encode * @return encoded string or <code>null</code> if error encoding string */ public static String encode(String txt) { txt = StringUtils.defaultIfEmpty(txt, ""); try { CRC32 crC32 = new CRC32(); crC32.update(txt.getBytes("UTF-8")); long crc = crC32.getValue(); String crctxt = String.format("%10d%s", crc, txt); Base64.Encoder encoder = Base64.getEncoder(); return encoder.encodeToString(crctxt.getBytes("UTF-8")); } catch (UnsupportedEncodingException ex) { return null; } }
From source file:com.esri.geoportal.harvester.api.base.SimpleScrambler.java
/** * Decodes string./*from w w w. j a v a2 s . c om*/ * @param encoded encoded string to decode * @return decoded string or <code>null</code> if error decoding string */ public static String decode(String encoded) { try { encoded = StringUtils.defaultIfEmpty(encoded, ""); Base64.Decoder decoder = Base64.getDecoder(); String crctxt = new String(decoder.decode(encoded), "UTF-8"); if (crctxt.length() < 10) { return null; } long crc = Long.parseLong(StringUtils.trimToEmpty(crctxt.substring(0, 10))); String txt = crctxt.substring(10); CRC32 crC32 = new CRC32(); crC32.update(txt.getBytes("UTF-8")); if (crc != crC32.getValue()) { return null; } return txt; } catch (NumberFormatException | UnsupportedEncodingException ex) { return null; } }
From source file:org.apache.tika.server.writer.ZipWriter.java
private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString()); zipEntry.setMethod(ZipOutputStream.STORED); zipEntry.setSize(dataBuffer.length); CRC32 crc32 = new CRC32(); crc32.update(dataBuffer);/*w w w .j av a 2 s . c o m*/ zipEntry.setCrc(crc32.getValue()); try { zip.putArchiveEntry(new ZipArchiveEntry(zipEntry)); } catch (ZipException ex) { if (name != null) { zipStoreBuffer(zip, "x-" + name, dataBuffer); return; } } zip.write(dataBuffer); zip.closeArchiveEntry(); }
From source file:brut.directory.ZipUtils.java
private static void processFolder(final File folder, final ZipOutputStream zipOutputStream, final int prefixLength) throws BrutException, IOException { for (final File file : folder.listFiles()) { if (file.isFile()) { final String cleanedPath = BrutIO.sanitizeUnknownFile(folder, file.getPath().substring(prefixLength)); final ZipEntry zipEntry = new ZipEntry(BrutIO.normalizePath(cleanedPath)); // aapt binary by default takes in parameters via -0 arsc to list extensions that shouldn't be // compressed. We will replicate that behavior final String extension = FilenameUtils.getExtension(file.getAbsolutePath()); if (mDoNotCompress != null && (mDoNotCompress.contains(extension) || mDoNotCompress.contains(zipEntry.getName()))) { zipEntry.setMethod(ZipEntry.STORED); zipEntry.setSize(file.length()); BufferedInputStream unknownFile = new BufferedInputStream(new FileInputStream(file)); CRC32 crc = BrutIO.calculateCrc(unknownFile); zipEntry.setCrc(crc.getValue()); unknownFile.close();/* w w w .j a v a 2 s . com*/ } else { zipEntry.setMethod(ZipEntry.DEFLATED); } zipOutputStream.putNextEntry(zipEntry); try (FileInputStream inputStream = new FileInputStream(file)) { IOUtils.copy(inputStream, zipOutputStream); } zipOutputStream.closeEntry(); } else if (file.isDirectory()) { processFolder(file, zipOutputStream, prefixLength); } } }
From source file:com.haulmont.cuba.core.sys.logging.LogArchiver.java
private static ArchiveEntry newTailArchive(String name, byte[] tail) { ZipArchiveEntry zipEntry = new ZipArchiveEntry(name); zipEntry.setSize(tail.length);/*from www. j a v a 2 s . co m*/ zipEntry.setCompressedSize(zipEntry.getSize()); CRC32 crc32 = new CRC32(); crc32.update(tail); zipEntry.setCrc(crc32.getValue()); return zipEntry; }
From source file:com.hortonworks.registries.storage.tool.shell.ShellMigrationResolver.java
/** * Calculates the checksum of these bytes. * * @param bytes The bytes to calculate the checksum for. * @return The crc-32 checksum of the bytes. *//* w ww . j a v a 2 s . co m*/ private static int calculateChecksum(byte[] bytes) { final CRC32 crc32 = new CRC32(); crc32.update(bytes); return (int) crc32.getValue(); }
From source file:org.trellisldp.rosid.file.FileUtils.java
/** * Partition an identifier into a directory structure * @param identifier the identifier/*w w w. jav a 2 s . c o m*/ * @return a string usable as a directory path */ public static String partition(final String identifier) { requireNonNull(identifier, "identifier must not be null!"); final StringJoiner joiner = new StringJoiner(separator); final CRC32 hasher = new CRC32(); hasher.update(identifier.getBytes(UTF_8)); final String intermediate = Long.toHexString(hasher.getValue()); range(0, intermediate.length() / LENGTH).limit(MAX) .forEach(i -> joiner.add(intermediate.substring(i * LENGTH, (i + 1) * LENGTH))); joiner.add(md5Hex(identifier)); return joiner.toString(); }
From source file:org.bdval.util.ShortHash.java
/** * Return a short hash (String of 5 chars, A-Z) of the contents of toHash. * @param toHash the content to hash/* w w w .ja v a2s . com*/ * @return the short hash */ public static String shortHash(final String toHash) { if (StringUtils.isBlank(toHash)) { return null; } // Get the CRC32 checksum of the string (CRC will clash less often than the Adler checksum for short strings) final CRC32 crc32 = new CRC32(); crc32.update(toHash.getBytes()); // Map it from a long to an int with mod final int checksum = (int) (crc32.getValue() % Integer.MAX_VALUE); final StringBuilder output = new StringBuilder(); for (int i = 0; i < MASKS.length; i++) { // Mask the value, shift it to the right, and mod it to the output-able characters final int partial = ((checksum & MASKS[i]) >> MASK_SHIFTS[i]) % HASH_CHARS.length; final char asChar = HASH_CHARS[partial]; output.append(asChar); } LOG.debug(String.format("hash=%s for string=%s", output.toString(), toHash)); return output.toString(); }