Example usage for java.util.zip CRC32 getValue

List of usage examples for java.util.zip CRC32 getValue

Introduction

In this page you can find the example usage for java.util.zip CRC32 getValue.

Prototype

@Override
public long getValue() 

Source Link

Document

Returns CRC-32 value.

Usage

From source file:com.redskyit.scriptDriver.RunTests.java

private void info(WebElement element, String selector, boolean verify) throws Exception {
    do {/*from   w  w w. ja  va  2 s .co m*/
        try {
            Point loc = element.getLocation();
            Dimension size = element.getSize();
            String tag = element.getTagName();
            System.out
                    .print(null == selector ? "test-id \"" + element.getAttribute("test-id") + "\"" : selector);
            System.out.print(" info");
            System.out.print(" tag " + tag);
            System.out.print((element.isDisplayed() ? "" : " not") + " displayed");
            System.out.print(" at " + loc.x + "," + loc.y);
            System.out.print(" size " + size.width + "," + size.height);
            System.out.print((element.isEnabled() ? "" : " not") + " enabled");
            System.out.print((element.isSelected() ? "" : " not") + " selected");
            if (tag.equals("input") || tag.equals("select")) {
                System.out.print(" check \"" + element.getAttribute("value") + "\"");
            } else {
                String text = tag.equals("textarea") ? element.getAttribute("value") : element.getText();
                if (text.indexOf('\n') != -1) {
                    CRC32 crc = new CRC32();
                    crc.update(text.getBytes());
                    System.out.print(" checksum \"crc32:" + crc.getValue() + "\"");
                } else {
                    System.out.print(" check \"" + element.getText() + "\"");
                }
            }
            System.out.println();
            return;
        } catch (StaleElementReferenceException e) {
            // If element has gone stale during a dump, ignore it
            if (!verify)
                return;
            // element has gone stale, re-select it
            System.out.println("// EXCEPTION : StaleElementReference");
        } catch (Exception e) {
            if (verify)
                throw e;
            return;
        }
        sleepAndReselect(100);
    } while (_waitFor > 0 && (new Date()).getTime() < _waitFor);
}

From source file:org.exist.xquery.modules.compression.AbstractCompressFunction.java

/**
 * Adds a document to a archive//from   ww  w  .j  a  va2 s .  com
 * 
 * @param os
 *            The Output Stream to add the document to
 * @param doc
 *            The document to add to the archive
 * @param useHierarchy
 *            Whether to use a folder hierarchy in the archive file that
 *            reflects the collection hierarchy
 */
private void compressResource(OutputStream os, DocumentImpl doc, boolean useHierarchy, String stripOffset,
        String method, String name) throws IOException, SAXException {
    // create an entry in the Tar for the document
    Object entry = null;
    byte[] value = new byte[0];
    CRC32 chksum = new CRC32();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    if (name != null) {
        entry = newEntry(name);
    } else if (useHierarchy) {
        String docCollection = doc.getCollection().getURI().toString();
        XmldbURI collection = XmldbURI.create(removeLeadingOffset(docCollection, stripOffset));
        entry = newEntry(collection.append(doc.getFileURI()).toString());
    } else {
        entry = newEntry(doc.getFileURI().toString());
    }

    if (doc.getResourceType() == DocumentImpl.XML_FILE) {
        // xml file
        Serializer serializer = context.getBroker().getSerializer();
        serializer.setUser(context.getUser());
        serializer.setProperty("omit-xml-declaration", "no");
        getDynamicSerializerOptions(serializer);
        String strDoc = serializer.serialize(doc);
        value = strDoc.getBytes();
    } else if (doc.getResourceType() == DocumentImpl.BINARY_FILE) {
        // binary file
        InputStream is = context.getBroker().getBinaryResource((BinaryDocument) doc);
        byte[] data = new byte[16384];
        int len = 0;
        while ((len = is.read(data, 0, data.length)) > 0) {
            baos.write(data, 0, len);
        }
        is.close();
        value = baos.toByteArray();
    }
    // close the entry
    if (entry instanceof ZipEntry && "store".equals(method)) {
        ((ZipEntry) entry).setMethod(ZipOutputStream.STORED);
        chksum.update(value);
        ((ZipEntry) entry).setCrc(chksum.getValue());
        ((ZipEntry) entry).setSize(value.length);
    }

    putEntry(os, entry);
    os.write(value);
    closeEntry(os);
}

From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java

private void cacheCrawlHistoryLog(File localCacheDir, long timestamp) throws IOException {

    SequenceFile.Reader reader = null;
    Path mapFilePath = new Path(_remoteDataDirectory, CRAWL_HISTORY_HDFS_LOGFILE_PREFIX + timestamp);
    Path indexFilePath = new Path(mapFilePath, "index");
    Path dataFilePath = new Path(mapFilePath, "data");
    File cacheFilePath = new File(localCacheDir, CRAWL_HISTORY_HDFS_LOGFILE_PREFIX + timestamp);

    SequenceFile.Reader indexReader = new SequenceFile.Reader(_remoteFileSystem, dataFilePath,
            CrawlEnvironment.getHadoopConfig());

    ValueBytes valueBytes = indexReader.createValueBytes();
    DataOutputBuffer keyBytes = new DataOutputBuffer();
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataOutputBuffer finalOutputStream = new DataOutputBuffer();
    DataOutputBuffer uncompressedValueBytes = new DataOutputBuffer();
    URLFP fp = new URLFP();

    try {/* w w w .  j ava  2  s  . c o m*/
        while (indexReader.nextRaw(keyBytes, valueBytes) != -1) {

            keyBuffer.reset(keyBytes.getData(), 0, keyBytes.getLength());
            // read fingerprint ...
            fp.readFields(keyBuffer);
            // write hash only
            finalOutputStream.writeLong(fp.getUrlHash());
            uncompressedValueBytes.reset();
            // write value bytes to intermediate buffer ...
            valueBytes.writeUncompressedBytes(uncompressedValueBytes);
            // write out uncompressed length
            WritableUtils.writeVInt(finalOutputStream, uncompressedValueBytes.getLength());
            // write out bytes
            finalOutputStream.write(uncompressedValueBytes.getData(), 0, uncompressedValueBytes.getLength());
        }
        // delete existing ...
        cacheFilePath.delete();
        // compute crc ...
        CRC32 crc = new CRC32();
        crc.update(finalOutputStream.getData(), 0, finalOutputStream.getLength());
        // open final output stream
        DataOutputStream fileOutputStream = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(cacheFilePath)));

        try {
            fileOutputStream.writeLong(crc.getValue());
            fileOutputStream.write(finalOutputStream.getData(), 0, finalOutputStream.getLength());
            fileOutputStream.flush();
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            fileOutputStream.close();
            fileOutputStream = null;
            cacheFilePath.delete();
            throw e;
        } finally {
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
        }
    } finally {
        if (indexReader != null) {
            indexReader.close();
        }
    }
}

From source file:org.commoncrawl.service.crawler.CrawlList.java

private static void appendTargetsToLogFile(File logFileName, IntrusiveList<CrawlTarget> list)
        throws IOException {

    LogFileHeader header = new LogFileHeader();

    boolean preExistingHeader = logFileName.exists();

    RandomAccessFile file = new RandomAccessFile(logFileName, "rw");

    try {//  ww w .ja v  a2s .  c  o m
        long headerOffset = 0;

        if (preExistingHeader) {

            headerOffset = readLogFileHeader(file, header);

            if (header._writePos == 0) {
                file.seek(headerOffset);
            } else {
                // seelk to appropriate write position 
                file.seek(header._writePos);
            }
        } else {
            headerOffset = writeLogFileHeader(file, header);
        }

        CustomByteArrayOutputStream bufferOutputStream = new CustomByteArrayOutputStream(1 << 17);
        DataOutputStream dataOutputStream = new DataOutputStream(bufferOutputStream);
        CRC32 crc = new CRC32();

        for (CrawlTarget target : list) {

            PersistentCrawlTarget persistentTarget = target.createPersistentTarget();

            bufferOutputStream.reset();
            // write to intermediate stream ... 
            persistentTarget.write(dataOutputStream);
            // and crc the data ... 
            crc.reset();
            crc.update(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size());
            // write out length first 
            file.writeInt(bufferOutputStream.size());
            //crc next
            long computedValue = crc.getValue();
            //TODO: waste of space - write 32 bit values as long because having problems with java sign promotion rules during read...
            file.writeLong(computedValue);
            // and then the data 
            file.write(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size());
        }

        // now update header ... 
        header._itemCount += list.size();
        header._writePos = file.getFilePointer();

        // now write out header anew ... 
        writeLogFileHeader(file, header);

    } finally {
        if (file != null) {
            file.close();
        }
    }
}

From source file:org.commoncrawl.service.listcrawler.CacheManager.java

/**
 * loadCacheItemFromDisk - load a single cache item from disk 
 * //w  ww . jav a2  s  .  co m
 * @param file
 * @param optTargetURL
 * @param location
 * @return
 * @throws IOException
 */
private CacheItem loadCacheItemFromDisk(FileInputStream file, String optTargetURL, long location)
        throws IOException {

    long timeStart = System.currentTimeMillis();

    // and read out the Item Header ...  
    CacheItemHeader itemHeader = new CacheItemHeader();
    itemHeader.readHeader(new DataInputStream(file));
    // see if it is valid ... 
    if (!Arrays.equals(itemHeader._sync, _header._sync)) {
        LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                + " failed - corrupt sync bytes detected!!!");
    } else {
        CRC32 crc32 = new CRC32();
        // ok deserialize the bytes ... 
        CacheItem item = new CacheItem();
        CheckedInputStream checkedStream = new CheckedInputStream(file, crc32);
        DataInputStream itemStream = new DataInputStream(checkedStream);
        item.readFields(itemStream);
        // read the content buffer length 
        int contentBufferLen = itemStream.readInt();
        if (contentBufferLen != 0) {
            byte data[] = new byte[contentBufferLen];
            itemStream.read(data);
            item.setContent(new Buffer(data));
        }

        // cache crc 
        long crcValueComputed = crc32.getValue();
        // read disk crc 
        long crcValueOnDisk = itemStream.readLong();
        // validate 
        if (crcValueComputed == crcValueOnDisk) {
            String canonicalURL = URLUtils.canonicalizeURL(item.getUrl(), true);
            if (optTargetURL.length() == 0 || optTargetURL.equals(canonicalURL)) {
                if (isValidCacheItem(item)) {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " completed in:" + (System.currentTimeMillis() - timeStart));
                    return item;
                } else {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " failed with invalid result code");
                }

            } else {
                LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                        + " failed with url mismatch. record url:" + item.getUrl());
            }
        } else {
            LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                    + " failed - crc mismatch!!!");
        }
    }
    return null;
}

From source file:org.commoncrawl.service.crawler.CrawlList.java

private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount,
        IntrusiveList<CrawlTarget> targetsOut) throws IOException {

    int itemsRead = 0;

    if (logFileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(logFileName, "rw");

        LogFileHeader header = new LogFileHeader();

        try {// ww  w. ja v a  2 s  .c  om

            long headerOffset = readLogFileHeader(file, header);

            // seelk to appropriate write position 
            if (header._readPos != 0)
                file.seek(header._readPos);

            int itemsToRead = Math.min(desiredReadAmount, header._itemCount);

            PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget();
            CRC32 crc = new CRC32();
            CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16);

            for (int i = 0; i < itemsToRead; ++i) {
                // read length ... 
                int urlDataLen = file.readInt();
                long urlDataCRC = file.readLong();

                buffer.reset();

                if (urlDataLen > buffer.getBuffer().length) {
                    buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                }
                file.read(buffer.getBuffer(), 0, urlDataLen);
                crc.reset();
                crc.update(buffer.getBuffer(), 0, urlDataLen);

                long computedValue = crc.getValue();

                // validate crc values ... 
                if (computedValue != urlDataCRC) {
                    throw new IOException("Crawl Target Log File Corrupt");
                } else {
                    //populate a persistentTarget from the (in memory) data stream
                    DataInputStream bufferReader = new DataInputStream(
                            new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));

                    persistentTarget.clear();
                    persistentTarget.readFields(bufferReader);

                    //populate a new crawl target structure ... 
                    CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget);

                    targetsOut.addTail(newTarget);
                }
            }

            itemsRead = itemsToRead;

            // now update header ... 
            header._itemCount -= itemsRead;
            // now if item count is non zero ... 
            if (header._itemCount != 0) {
                // set read cursor to next record location 
                header._readPos = file.getFilePointer();
            }
            // otherwise ... 
            else {
                // reset both cursors ... 
                header._readPos = 0;
                header._writePos = 0;
            }

            // now write out header anew ... 
            writeLogFileHeader(file, header);
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return itemsRead;
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId)
        throws IOException {

    // and open the crawl log file ...
    RandomAccessFile inputStream = null;

    IOException exception = null;

    CRC32 crc = new CRC32();
    CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17);
    byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE];

    // save position for potential debug output.
    long lastReadPosition = 0;

    try {//from w ww.j av  a2  s .  c  o  m
        inputStream = new RandomAccessFile(crawlLogPath, "rw");
        // and a data input stream ...
        RandomAccessFile reader = inputStream;
        // seek to zero
        reader.seek(0L);

        // read the header ...
        LogFileHeader header = readLogFileHeader(reader);

        // read a crawl url from the stream...

        while (inputStream.getFilePointer() < header._fileSize) {

            if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) {

                try {
                    lastReadPosition = inputStream.getFilePointer();

                    // skip sync
                    inputStream.skipBytes(SYNC_BYTES_SIZE);

                    // read length ...
                    int urlDataLen = reader.readInt();
                    long urlDataCRC = reader.readLong();

                    if (urlDataLen > buffer.getBuffer().length) {
                        buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                    }
                    reader.read(buffer.getBuffer(), 0, urlDataLen);
                    crc.reset();
                    crc.update(buffer.getBuffer(), 0, urlDataLen);

                    long computedValue = crc.getValue();

                    // validate crc values ...
                    if (computedValue != urlDataCRC) {
                        LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:"
                                + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId
                                + " FilePosition:" + lastReadPosition);
                        inputStream.seek(lastReadPosition + 1);
                    } else {
                        // allocate a crawl url data structure
                        CrawlURL url = new CrawlURL();
                        DataInputStream bufferReader = new DataInputStream(
                                new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));
                        // populate it from the (in memory) data stream
                        url.readFields(bufferReader);
                        try {
                            // and write out appropriate sequence file entries ...
                            writer.writeCrawlURLItem(new Text(url.getUrl()), url);
                        } catch (IOException e) {
                            LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:"
                                    + CCStringUtils.stringifyException(e));
                            throw new URLWriterException();
                        }
                    }
                } catch (URLWriterException e) {
                    LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!");
                    throw e;
                } catch (Exception e) {
                    LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition
                            + " Exception:" + CCStringUtils.stringifyException(e));
                }
            } else {
                break;
            }
        }
    } catch (EOFException e) {
        LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath()
                + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition);
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        exception = e;
        throw e;
    } finally {
        if (inputStream != null)
            inputStream.close();
    }
}

From source file:ee.sk.digidoc.SignedDoc.java

/**
 * Writes the SignedDoc to an output file
 * and automatically calculates DataFile sizes
 * and digests/*from   w ww .  j a  v a  2 s  .  c o m*/
 * @param outputFile output file name
 * @param fTempSdoc temporrary file, copy of original for copying items
 * @throws DigiDocException for all errors
 */
public void writeToStream(OutputStream os/*, File fTempSdoc*/) throws DigiDocException {
    DigiDocException ex1 = validateFormatAndVersion();
    if (ex1 != null)
        throw ex1;
    try {
        DigiDocXmlGenFactory genFac = new DigiDocXmlGenFactory(this);
        if (m_format.equals(SignedDoc.FORMAT_BDOC)) {
            ZipArchiveOutputStream zos = new ZipArchiveOutputStream(os);
            zos.setEncoding("UTF-8");
            if (m_logger.isDebugEnabled())
                m_logger.debug("OS: " + ((os != null) ? "OK" : "NULL"));
            // write mimetype
            if (m_logger.isDebugEnabled())
                m_logger.debug("Writing: " + MIMET_FILE_NAME);
            ZipArchiveEntry ze = new ZipArchiveEntry(MIMET_FILE_NAME);
            if (m_comment == null)
                m_comment = DigiDocGenFactory.getUserInfo(m_format, m_version);
            ze.setComment(m_comment);
            ze.setMethod(ZipArchiveEntry.STORED);
            java.util.zip.CRC32 crc = new java.util.zip.CRC32();
            if (m_version.equals(BDOC_VERSION_1_0)) {
                ze.setSize(SignedDoc.MIMET_FILE_CONTENT_10.getBytes().length);
                crc.update(SignedDoc.MIMET_FILE_CONTENT_10.getBytes());
            }
            if (m_version.equals(BDOC_VERSION_1_1)) {
                ze.setSize(SignedDoc.MIMET_FILE_CONTENT_11.getBytes().length);
                crc.update(SignedDoc.MIMET_FILE_CONTENT_11.getBytes());
            }
            if (m_version.equals(BDOC_VERSION_2_1)) {
                ze.setSize(SignedDoc.MIMET_FILE_CONTENT_20.getBytes().length);
                crc.update(SignedDoc.MIMET_FILE_CONTENT_20.getBytes());
            }
            ze.setCrc(crc.getValue());
            zos.putArchiveEntry(ze);
            if (m_version.equals(BDOC_VERSION_1_0)) {
                zos.write(SignedDoc.MIMET_FILE_CONTENT_10.getBytes());
            }
            if (m_version.equals(BDOC_VERSION_1_1)) {
                zos.write(SignedDoc.MIMET_FILE_CONTENT_11.getBytes());
            }
            if (m_version.equals(BDOC_VERSION_2_1)) {
                zos.write(SignedDoc.MIMET_FILE_CONTENT_20.getBytes());
            }
            zos.closeArchiveEntry();
            // write manifest.xml
            if (m_logger.isDebugEnabled())
                m_logger.debug("Writing: " + MANIF_FILE_NAME);
            ze = new ZipArchiveEntry(MANIF_DIR_META_INF);
            ze = new ZipArchiveEntry(MANIF_FILE_NAME);
            ze.setComment(DigiDocGenFactory.getUserInfo(m_format, m_version));
            zos.putArchiveEntry(ze);
            //if(m_logger.isDebugEnabled())
            //   m_logger.debug("Writing manif:\n" + m_manifest.toString());
            zos.write(m_manifest.toXML());
            zos.closeArchiveEntry();
            // write data files
            for (int i = 0; i < countDataFiles(); i++) {
                DataFile df = getDataFile(i);
                if (m_logger.isDebugEnabled())
                    m_logger.debug("Writing DF: " + df.getFileName() + " content: " + df.getContentType()
                            + " df-cache: "
                            + ((df.getDfCacheFile() != null) ? df.getDfCacheFile().getAbsolutePath() : "NONE"));
                InputStream is = null;
                if (df.hasAccessToDataFile())
                    is = df.getBodyAsStream();
                else
                    is = findDataFileAsStream(df.getFileName());
                if (is != null) {
                    File dfFile = new File(df.getFileName());
                    String fileName = dfFile.getName();
                    ze = new ZipArchiveEntry(fileName);
                    if (df.getComment() == null)
                        df.setComment(DigiDocGenFactory.getUserInfo(m_format, m_version));
                    ze.setComment(df.getComment());
                    ze.setSize(dfFile.length());
                    ze.setTime(
                            (df.getLastModDt() != null) ? df.getLastModDt().getTime() : dfFile.lastModified());
                    zos.putArchiveEntry(ze);
                    byte[] data = new byte[2048];
                    int nRead = 0, nTotal = 0;
                    crc = new java.util.zip.CRC32();
                    while ((nRead = is.read(data)) > 0) {
                        zos.write(data, 0, nRead);
                        nTotal += nRead;
                        crc.update(data, 0, nRead);
                    }
                    ze.setSize(nTotal);
                    ze.setCrc(crc.getValue());
                    zos.closeArchiveEntry();
                    is.close();
                }
            }
            for (int i = 0; i < countSignatures(); i++) {
                Signature sig = getSignature(i);
                String sFileName = sig.getPath();
                if (sFileName == null) {
                    if (m_version.equals(BDOC_VERSION_2_1))
                        sFileName = SIG_FILE_NAME_20 + (i + 1) + ".xml";
                    else
                        sFileName = SIG_FILE_NAME + (i + 1) + ".xml";
                }
                if (!sFileName.startsWith("META-INF"))
                    sFileName = "META-INF/" + sFileName;
                if (m_logger.isDebugEnabled())
                    m_logger.debug("Writing SIG: " + sFileName + " orig: "
                            + ((sig.getOrigContent() != null) ? "OK" : "NULL"));
                ze = new ZipArchiveEntry(sFileName);
                if (sig.getComment() == null)
                    sig.setComment(DigiDocGenFactory.getUserInfo(m_format, m_version));
                ze.setComment(sig.getComment());
                String sSig = null;
                if (sig.getOrigContent() != null)
                    sSig = new String(sig.getOrigContent(), "UTF-8");
                else
                    sSig = sig.toString();
                if (sSig != null && !sSig.startsWith("<?xml"))
                    sSig = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + sSig;
                byte[] sdata = sSig.getBytes("UTF-8");
                if (m_logger.isDebugEnabled())
                    m_logger.debug("Writing SIG: " + sFileName + " xml:\n---\n "
                            + ((sSig != null) ? sSig : "NULL") + "\n---\n ");
                ze.setSize(sdata.length);
                crc = new java.util.zip.CRC32();
                crc.update(sdata);
                ze.setCrc(crc.getValue());
                zos.putArchiveEntry(ze);
                zos.write(sdata);
                zos.closeArchiveEntry();
            }
            zos.close();
        } else if (m_format.equals(SignedDoc.FORMAT_DIGIDOC_XML)) { // ddoc format
            os.write(xmlHeader().getBytes());
            for (int i = 0; i < countDataFiles(); i++) {
                DataFile df = getDataFile(i);
                df.writeToFile(os);
                os.write("\n".getBytes());
            }
            for (int i = 0; i < countSignatures(); i++) {
                Signature sig = getSignature(i);
                if (sig.getOrigContent() != null)
                    os.write(sig.getOrigContent());
                else
                    os.write(genFac.signatureToXML(sig));
                os.write("\n".getBytes());
            }
            os.write(xmlTrailer().getBytes());
        }
    } catch (DigiDocException ex) {
        throw ex; // allready handled
    } catch (Exception ex) {
        DigiDocException.handleException(ex, DigiDocException.ERR_WRITE_FILE);
    }
}

From source file:org.anarres.lzo.LzopInputStream.java

/**
 * Read and verify an lzo header, setting relevant block checksum options
 * and ignoring most everything else./*  ww w . j  av a  2s . c  o m*/
 */
protected int readHeader() throws IOException {
    byte[] buf = new byte[9];
    readBytes(buf, 0, 9);
    if (!Arrays.equals(buf, LzopConstants.LZOP_MAGIC))
        throw new IOException("Invalid LZO header");
    Arrays.fill(buf, (byte) 0);
    Adler32 adler = new Adler32();
    CRC32 crc32 = new CRC32();
    int hitem = readHeaderItem(buf, 2, adler, crc32); // lzop version
    if (hitem > LzopConstants.LZOP_VERSION) {
        LOG.debug("Compressed with later version of lzop: " + Integer.toHexString(hitem) + " (expected 0x"
                + Integer.toHexString(LzopConstants.LZOP_VERSION) + ")");
    }
    hitem = readHeaderItem(buf, 2, adler, crc32); // lzo library version
    if (hitem > LzoVersion.LZO_LIBRARY_VERSION) {
        throw new IOException("Compressed with incompatible lzo version: 0x" + Integer.toHexString(hitem)
                + " (expected 0x" + Integer.toHexString(LzoVersion.LZO_LIBRARY_VERSION) + ")");
    }
    hitem = readHeaderItem(buf, 2, adler, crc32); // lzop extract version
    if (hitem > LzopConstants.LZOP_VERSION) {
        throw new IOException("Compressed with incompatible lzop version: 0x" + Integer.toHexString(hitem)
                + " (expected 0x" + Integer.toHexString(LzopConstants.LZOP_VERSION) + ")");
    }
    hitem = readHeaderItem(buf, 1, adler, crc32); // method
    switch (hitem) {
    case LzopConstants.M_LZO1X_1:
    case LzopConstants.M_LZO1X_1_15:
    case LzopConstants.M_LZO1X_999:
        break;
    default:
        throw new IOException("Invalid strategy " + Integer.toHexString(hitem));
    }
    readHeaderItem(buf, 1, adler, crc32); // ignore level

    // flags
    int flags = readHeaderItem(buf, 4, adler, crc32);
    boolean useCRC32 = (flags & LzopConstants.F_H_CRC32) != 0;
    boolean extraField = (flags & LzopConstants.F_H_EXTRA_FIELD) != 0;
    if ((flags & LzopConstants.F_MULTIPART) != 0)
        throw new IOException("Multipart lzop not supported");
    if ((flags & LzopConstants.F_H_FILTER) != 0)
        throw new IOException("lzop filter not supported");
    if ((flags & LzopConstants.F_RESERVED) != 0)
        throw new IOException("Unknown flags in header");
    // known !F_H_FILTER, so no optional block

    readHeaderItem(buf, 4, adler, crc32); // ignore mode
    readHeaderItem(buf, 4, adler, crc32); // ignore mtime
    readHeaderItem(buf, 4, adler, crc32); // ignore gmtdiff
    hitem = readHeaderItem(buf, 1, adler, crc32); // fn len
    if (hitem > 0) {
        byte[] tmp = (hitem > buf.length) ? new byte[hitem] : buf;
        readHeaderItem(tmp, hitem, adler, crc32); // skip filename
    }
    int checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue());
    hitem = readHeaderItem(buf, 4, adler, crc32); // read checksum
    if (hitem != checksum) {
        throw new IOException("Invalid header checksum: " + Long.toHexString(checksum) + " (expected 0x"
                + Integer.toHexString(hitem) + ")");
    }
    if (extraField) { // lzop 1.08 ultimately ignores this
        LOG.debug("Extra header field not processed");
        adler.reset();
        crc32.reset();
        hitem = readHeaderItem(buf, 4, adler, crc32);
        readHeaderItem(new byte[hitem], hitem, adler, crc32);
        checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue());
        if (checksum != readHeaderItem(buf, 4, adler, crc32)) {
            throw new IOException("Invalid checksum for extra header field");
        }
    }

    return flags;
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException {

    // and open the crawl log file ...
    RandomAccessFile inputStream = null;

    IOException exception = null;

    CRC32 crc = new CRC32();
    CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17);
    byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE];

    // save position for potential debug output.
    long lastReadPosition = 0;

    try {/*from   w  ww .ja  v  a  2  s.c o  m*/
        inputStream = new RandomAccessFile(crawlLogPath, "rw");

        // and a data input stream ...
        RandomAccessFile reader = inputStream;
        // seek to zero
        reader.seek(0L);

        // read the header ...
        LogFileHeader header = readLogFileHeader(reader);

        System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize);

        if (startOffset != 0L) {
            System.out.println("Preseeking to:" + startOffset);
            reader.seek(startOffset);
        }

        Configuration conf = new Configuration();

        // read a crawl url from the stream...

        long recordCount = 0;
        while (inputStream.getFilePointer() < header._fileSize) {

            // System.out.println("PRE-SYNC SeekPos:"+
            // inputStream.getFilePointer());
            if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) {

                // System.out.println("POST-SYNC SeekPos:"+
                // inputStream.getFilePointer());

                lastReadPosition = inputStream.getFilePointer();

                // skip sync
                inputStream.skipBytes(SYNC_BYTES_SIZE);

                // read length ...
                int urlDataLen = reader.readInt();
                long urlDataCRC = reader.readLong();

                if (urlDataLen > buffer.getBuffer().length) {
                    buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                }
                reader.read(buffer.getBuffer(), 0, urlDataLen);
                crc.reset();
                crc.update(buffer.getBuffer(), 0, urlDataLen);

                long computedValue = crc.getValue();

                // validate crc values ...
                if (computedValue != urlDataCRC) {
                    LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:"
                            + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition);
                    inputStream.seek(lastReadPosition + 1);
                } else {
                    if (recordCount++ % 10000 == 0) {
                        // allocate a crawl url data structure
                        CrawlURL url = new CrawlURL();
                        DataInputStream bufferReader = new DataInputStream(
                                new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));
                        // populate it from the (in memory) data stream
                        url.readFields(bufferReader);

                        System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:"
                                + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:"
                                + url.getContentRaw().getCount() + " LastModified:"
                                + new Date(url.getLastAttemptTime()).toString());
                    }
                }
            } else {
                break;
            }
        }
    } catch (EOFException e) {
        LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath()
                + " FilePosition:" + lastReadPosition);
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        exception = e;
        throw e;
    } finally {
        if (inputStream != null)
            inputStream.close();
    }
}