List of usage examples for java.io RandomAccessFile seek
public void seek(long pos) throws IOException
From source file:au.org.ala.layers.intersect.Grid.java
/** * buffering on top of RandomAccessFile//from w w w . j a v a 2 s . c om */ private byte getByte(RandomAccessFile raf, byte[] buffer, Long bufferOffset, long seekTo) throws IOException { long relativePos = seekTo - bufferOffset; if (relativePos < 0) { raf.seek(seekTo); bufferOffset = seekTo; raf.read(buffer); return buffer[0]; } else if (relativePos >= 0 && relativePos < buffer.length) { return buffer[(int) relativePos]; } else if (relativePos - buffer.length < buffer.length) { bufferOffset += buffer.length; raf.read(buffer); return buffer[(int) (relativePos - buffer.length)]; } else { raf.seek(seekTo); bufferOffset = seekTo; raf.read(buffer); return buffer[0]; } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** * serialize metadata to disk // www. jav a 2s.com * @throws IOException */ void writeMetadataToDisk() throws IOException { synchronized (_metadata) { RandomAccessFile file = new RandomAccessFile(_listMetadataFile, "rw"); try { file.seek(0); _metadata.serialize(file, new BinaryProtocol()); } finally { file.close(); } } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private ProxyCrawlHistoryItem getHistoryItemFromOnDiskItem(OnDiskCrawlHistoryItem item) throws IOException { ProxyCrawlHistoryItem itemOut = new ProxyCrawlHistoryItem(); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS) != 0) itemOut.setCrawlStatus(item._crawlStatus); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_ORIGINAL_RESULT_CODE) != 0) itemOut.setHttpResultCode(item._httpResultCode); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS) != 0) itemOut.setRedirectStatus(item._redirectStatus); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_RESULT_CODE) != 0) itemOut.setRedirectHttpResult(item._redirectHttpResult); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_LASTMODIFIED_TIME) != 0) itemOut.setLastModifiedTime(item._updateTimestamp); // now attept to get the string offset RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try {/*w w w. j a v a 2 s . c om*/ // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // now populate original url ... itemOut.setOriginalURL(stringDataReader.readUTF()); // now if redirect url is present if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_URL) != 0) { itemOut.setRedirectURL(stringDataReader.readUTF()); } } finally { stringDataReader.close(); } return itemOut; }
From source file:org.apache.jackrabbit.oak.plugins.segment.file.TarReader.java
/** * Scans through the tar file, looking for all segment entries. * * @throws IOException if the tar file could not be read */// w ww . j av a2 s.co m private static void recoverEntries(File file, RandomAccessFile access, LinkedHashMap<UUID, byte[]> entries) throws IOException { byte[] header = new byte[BLOCK_SIZE]; while (access.getFilePointer() + BLOCK_SIZE <= access.length()) { // read the tar header block access.readFully(header); // compute the header checksum int sum = 0; for (int i = 0; i < BLOCK_SIZE; i++) { sum += header[i] & 0xff; } // identify possible zero block if (sum == 0 && access.getFilePointer() + 2 * BLOCK_SIZE == access.length()) { return; // found the zero blocks at the end of the file } // replace the actual stored checksum with spaces for comparison for (int i = 148; i < 148 + 8; i++) { sum -= header[i] & 0xff; sum += ' '; } byte[] checkbytes = String.format("%06o\0 ", sum).getBytes(UTF_8); for (int i = 0; i < checkbytes.length; i++) { if (checkbytes[i] != header[148 + i]) { log.warn("Invalid entry checksum at offset {} in tar file {}, skipping...", access.getFilePointer() - BLOCK_SIZE, file); } } // The header checksum passes, so read the entry name and size ByteBuffer buffer = ByteBuffer.wrap(header); String name = readString(buffer, 100); buffer.position(124); int size = readNumber(buffer, 12); if (access.getFilePointer() + size > access.length()) { // checksum was correct, so the size field should be accurate log.warn("Partial entry {} in tar file {}, ignoring...", name, file); return; } Matcher matcher = NAME_PATTERN.matcher(name); if (matcher.matches()) { UUID id = UUID.fromString(matcher.group(1)); String checksum = matcher.group(3); if (checksum != null || !entries.containsKey(id)) { byte[] data = new byte[size]; access.readFully(data); // skip possible padding to stay at block boundaries long position = access.getFilePointer(); long remainder = position % BLOCK_SIZE; if (remainder != 0) { access.seek(position + (BLOCK_SIZE - remainder)); } if (checksum != null) { CRC32 crc = new CRC32(); crc.update(data); if (crc.getValue() != Long.parseLong(checksum, 16)) { log.warn("Checksum mismatch in entry {} of tar file {}, skipping...", name, file); continue; } } entries.put(id, data); } } else if (!name.equals(file.getName() + ".idx")) { log.warn("Unexpected entry {} in tar file {}, skipping...", name, file); long position = access.getFilePointer() + size; long remainder = position % BLOCK_SIZE; if (remainder != 0) { position += BLOCK_SIZE - remainder; } access.seek(position); } } }
From source file:au.org.ala.layers.intersect.Grid.java
/** * buffering on top of RandomAccessFile// w w w. j av a 2s . c o m */ private Long getBytes(RandomAccessFile raf, byte[] buffer, Long bufferOffset, long seekTo, byte[] dest) throws IOException { long relativePos = seekTo - bufferOffset; if (relativePos < 0) { if (seekTo < 0) { seekTo = 0; } raf.seek(seekTo); bufferOffset = seekTo; raf.read(buffer); System.arraycopy(buffer, 0, dest, 0, dest.length); } else if (relativePos >= 0 && relativePos < buffer.length) { System.arraycopy(buffer, (int) relativePos, dest, 0, dest.length); } else if (relativePos - buffer.length < buffer.length) { bufferOffset += buffer.length; raf.read(buffer); int offset = (int) (relativePos - buffer.length); System.arraycopy(buffer, offset, dest, 0, dest.length); } else { raf.seek(seekTo); bufferOffset = seekTo; raf.read(buffer); System.arraycopy(buffer, 0, dest, 0, dest.length); } return bufferOffset; }
From source file:big.BigZip.java
/** * Given a position inside our knowledge base, retrieve the data up to * the next file indicator.//from w ww. ja v a 2s . com * @param targetFile The new file that will be created * @param startPosition The position from where we start to read the data * @param endPosition * @return */ public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) { /** * This is a tricky method. We will be extracting data from a the BIG * archive onto a new file somewhere on disk. The biggest challenge here * is to find exactly when the data for the file ends and still do the * file copy with a wonderful performance. */ try { // enable random access to the BIG file (fast as heck) RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r"); // if the target file exists, try to delete it if (targetFile.exists()) { targetFile.delete(); if (targetFile.exists()) { // we failed completely System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath()); return false; } } // we need to create a temporary zip file holder File fileZip = new File("temp.zip"); // delete the zip file if it already exists if (fileZip.exists()) { fileZip.delete(); if (fileZip.exists()) { // we failed completely System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath()); return false; } } // create a new file RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw"); // jump directly to the position where the file is positioned dataBIG.seek(startPosition); // now we start reading bytes during the mentioned interval while (dataBIG.getFilePointer() < endPosition) { // read a byte from our BIG archive int data = dataBIG.read(); // write the same byte on the target file dataNew.write(data); } // close the file streams dataBIG.close(); dataNew.close(); // extract the file zip.extract(fileZip, new File(".")); // delete the temp zip file fileZip.delete(); } catch (FileNotFoundException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return false; } catch (IOException ex) { Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; }
From source file:org.apache.jackrabbit.oak.segment.file.TarReader.java
/** * Scans through the tar file, looking for all segment entries. * * @throws IOException if the tar file could not be read *///from w w w . ja va 2 s . c om private static void recoverEntries(File file, RandomAccessFile access, LinkedHashMap<UUID, byte[]> entries) throws IOException { byte[] header = new byte[BLOCK_SIZE]; while (access.getFilePointer() + BLOCK_SIZE <= access.length()) { // read the tar header block access.readFully(header); // compute the header checksum int sum = 0; for (int i = 0; i < BLOCK_SIZE; i++) { sum += header[i] & 0xff; } // identify possible zero block if (sum == 0 && access.getFilePointer() + 2 * BLOCK_SIZE == access.length()) { return; // found the zero blocks at the end of the file } // replace the actual stored checksum with spaces for comparison for (int i = 148; i < 148 + 8; i++) { sum -= header[i] & 0xff; sum += ' '; } byte[] checkbytes = String.format("%06o\0 ", sum).getBytes(UTF_8); for (int i = 0; i < checkbytes.length; i++) { if (checkbytes[i] != header[148 + i]) { log.warn("Invalid entry checksum at offset {} in tar file {}, skipping...", access.getFilePointer() - BLOCK_SIZE, file); } } // The header checksum passes, so read the entry name and size ByteBuffer buffer = wrap(header); String name = readString(buffer, 100); buffer.position(124); int size = readNumber(buffer, 12); if (access.getFilePointer() + size > access.length()) { // checksum was correct, so the size field should be accurate log.warn("Partial entry {} in tar file {}, ignoring...", name, file); return; } Matcher matcher = NAME_PATTERN.matcher(name); if (matcher.matches()) { UUID id = UUID.fromString(matcher.group(1)); String checksum = matcher.group(3); if (checksum != null || !entries.containsKey(id)) { byte[] data = new byte[size]; access.readFully(data); // skip possible padding to stay at block boundaries long position = access.getFilePointer(); long remainder = position % BLOCK_SIZE; if (remainder != 0) { access.seek(position + (BLOCK_SIZE - remainder)); } if (checksum != null) { CRC32 crc = new CRC32(); crc.update(data); if (crc.getValue() != Long.parseLong(checksum, 16)) { log.warn("Checksum mismatch in entry {} of tar file {}, skipping...", name, file); continue; } } entries.put(id, data); } } else if (!name.equals(file.getName() + ".idx")) { log.warn("Unexpected entry {} in tar file {}, skipping...", name, file); long position = access.getFilePointer() + size; long remainder = position % BLOCK_SIZE; if (remainder != 0) { position += BLOCK_SIZE - remainder; } access.seek(position); } } }
From source file:FileBaseDataMap.java
/** * put Method.<br>// w w w .j a v a 2 s.c o m * * @param key * @param value * @param hashCode This is a key value hash code */ public void put(String key, String value, int hashCode) { try { File file = dataFileList[hashCode % numberOfDataFiles]; StringBuffer buf = new StringBuffer(this.fillCharacter(key, keyDataLength)); buf.append(this.fillCharacter(value, oneDataLength)); CacheContainer accessor = (CacheContainer) innerCache.get(file.getAbsolutePath()); RandomAccessFile raf = null; BufferedWriter wr = null; if (accessor == null || accessor.isClosed == true) { raf = new RandomAccessFile(file, "rwd"); wr = new BufferedWriter(new FileWriter(file, true)); accessor = new CacheContainer(); accessor.raf = raf; accessor.wr = wr; accessor.file = file; innerCache.put(file.getAbsolutePath(), accessor); } else { raf = accessor.raf; wr = accessor.wr; } // KeyData Write File for (int tryIdx = 0; tryIdx < 2; tryIdx++) { try { // Key?? long dataLineNo = this.getLinePoint(key, raf); if (dataLineNo == -1) { wr.write(buf.toString()); wr.flush(); // The size of an increment this.totalSize.getAndIncrement(); } else { // ?????1 boolean increMentFlg = false; if (this.get(key, hashCode) == null) increMentFlg = true; raf.seek(dataLineNo * (lineDataSize)); raf.write(buf.toString().getBytes(), 0, lineDataSize); if (increMentFlg) this.totalSize.getAndIncrement(); } break; } catch (IOException ie) { // IOException???1???? if (tryIdx == 1) throw ie; try { if (raf != null) raf.close(); if (wr != null) wr.close(); raf = new RandomAccessFile(file, "rwd"); wr = new BufferedWriter(new FileWriter(file, true)); accessor = new CacheContainer(); accessor.raf = raf; accessor.wr = wr; accessor.file = file; innerCache.put(file.getAbsolutePath(), accessor); } catch (Exception e) { throw e; } } } } catch (Exception e2) { e2.printStackTrace(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** * serialize metadata to disk // ww w . ja v a 2 s. c o m * @throws IOException */ void writeSubDomainMetadataToDisk(CrawlListMetadata subDomainData) throws IOException { DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); subDomainData.serialize(outputBuffer, new BinaryProtocol()); if (outputBuffer.getLength() > CrawlListMetadata.Constants.FixedDataSize) { LOG.error("ListMetadata Serialize for List:" + subDomainData.getDomainName() + " > FixedDataSize!!!"); outputBuffer.reset(); subDomainData.setDomainName("<<CORRUPT>>"); subDomainData.serialize(outputBuffer, new BinaryProtocol()); } synchronized (_subDomainMetadataFile) { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); try { if (subDomainData.getSubDomainDataOffset() == 0) { throw new IOException("Data Offset Zero during write!"); } file.seek(subDomainData.getSubDomainDataOffset()); file.write(outputBuffer.getData(), 0, outputBuffer.getLength()); } finally { file.close(); } } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** * update list state of a recently crawled item * // w ww. j av a2 s. c o m * @param fingerprint - the fingerprint of the updated item * @param newData - the updated crawl history data for the given item * @throws IOException */ @Override public void updateItemState(URLFP fingerprint, ProxyCrawlHistoryItem newData) throws IOException { if (_listState == LoadState.LOADED) { // check for membership ... if (_bloomFilter.isPresent(fingerprint)) { //LOG.info("UpdateItemState Called for URL:" + newData.getOriginalURL() + " List:" + getListId()); //LOG.info("UpdateItemState Loading OnDisk Item for URL:" + newData.getOriginalURL() + " List:" + getListId()); // extract existing item from disk OnDiskCrawlHistoryItem originalItem = loadOnDiskItemForURLFP(fingerprint); //if present (null if false cache hit) if (originalItem != null) { // build an on disk item data structure for any potential changes ... OnDiskCrawlHistoryItem newItem = onDiskItemFromHistoryItem(fingerprint, newData); // set inital offset information newItem._fileOffset = originalItem._fileOffset; newItem._stringsOffset = originalItem._stringsOffset; // LOG.info("UpdateItemState Comparing OnDisk Item to New Item for URL:" + newData.getOriginalURL() + " List:" + getListId()); // compare the two items ... if (!newItem.equals(originalItem)) { //LOG.info("UpdateItemState Items Don't Match for URL:" + newData.getOriginalURL() + " List:" + getListId()); // ok items do not match ... figure out if strings are different ... if (newItem._stringsCRC != originalItem._stringsCRC) { RandomAccessFile stringsFile = new RandomAccessFile(_variableDataFile, "rw"); try { // seek to end stringsFile.seek(stringsFile.length()); // update offset info newItem._stringsOffset = stringsFile.length(); // write out string data length WritableUtils.writeVInt(stringsFile, _stringBuffer1.getLength()); // write strings to log file stringsFile.write(_stringBuffer1.getData(), 0, _stringBuffer1.getLength()); } finally { stringsFile.close(); } } // otherwise take the offset from old item else { newItem._stringsOffset = originalItem._stringsOffset; } //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + newItem._urlFingerprint); // ok, different paths depending on wether this is an in memory update or not ... boolean wroteToMemory = false; synchronized (this) { if (_tempFixedDataBuffer != null) { wroteToMemory = true; // reset output buffer _tempOutputBuffer.reset(); // serizlie to output buffer newItem.serialize(_tempOutputBuffer); // copy to appropriate location System.arraycopy(_tempOutputBuffer.getData(), 0, _tempFixedDataBuffer, (int) originalItem._fileOffset, OnDiskCrawlHistoryItem.ON_DISK_SIZE); } } if (!wroteToMemory) { // write to disk RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw"); try { while (true) { try { //LOG.info("*** TRYING UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); FileLock lock = file.getChannel().tryLock(originalItem._fileOffset, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); file.seek(originalItem._fileOffset); newItem.serialize(file); //LOG.info("Updated Data File for OnDiskItem for Fingerprint:" + originalItem._urlFingerprint); break; } finally { //LOG.info("*** RELEASED UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); lock.release(); } } catch (OverlappingFileLockException e) { LOG.error("###LockConflict(RETRY):" + CCStringUtils.stringifyException(e)); } } } finally { file.close(); } } // ok now update metadata ... synchronized (_metadata) { int updateFlags = calculateUpdateFlags(originalItem, newItem); if (updateFlags != 0) { int metadataDirtyFlags = updateMetadata(newItem, _metadata, 0); // only write metadata to disk if temp data buffer is null if (metadataDirtyFlags != 0 && !wroteToMemory) { if ((metadataDirtyFlags & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) { _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() - 1); } writeMetadataToDisk(); } // if not writing to memory then update subdomain metadata if (!wroteToMemory) { synchronized (_subDomainMetadataFile) { CrawlListMetadata subDomainMetadata = getSubDomainMetadataByURL( newData.getOriginalURL()); int subDomainMetadataDirtyFlags = updateMetadata(newItem, subDomainMetadata, processFileOffsets); if (subDomainMetadataDirtyFlags != 0 && !wroteToMemory) { if ((subDomainMetadataDirtyFlags & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) { subDomainMetadata.setQueuedItemCount( subDomainMetadata.getQueuedItemCount() - 1); } writeSubDomainMetadataToDisk(subDomainMetadata); } } } } } synchronized (this) { if (_eventListener != null) { _eventListener.itemUpdated(fingerprint); } } } } } } }