List of usage examples for java.io RandomAccessFile readUTF
public final String readUTF() throws IOException
From source file:Main.java
public static void main(String[] args) { try {// ww w .j ava 2 s. c o m // create a new RandomAccessFile with filename Example RandomAccessFile raf = new RandomAccessFile("c:/test.txt", "rw"); // write something in the file raf.writeUTF("java2s.com Hello World"); // set the file pointer at 0 position raf.seek(0); // read and print the contents of the file System.out.println(raf.readUTF()); // return the file pointer System.out.println(raf.getFilePointer()); // change the position of the file pointer raf.seek(5); // return the file pointer System.out.println(raf.getFilePointer()); // close the strea and release resources raf.close(); } catch (IOException ex) { ex.printStackTrace(); } }
From source file:Main.java
public static void main(String[] args) throws Exception { RandomAccessFile raf = new RandomAccessFile("books.dat", "rw"); String books[] = new String[5]; books[0] = "A"; books[1] = "B"; books[2] = "C"; books[3] = "D"; books[4] = "E"; for (int i = 0; i < books.length; i++) { raf.writeUTF(books[i]);//from w ww.j a v a2 s .c om } raf.seek(raf.length()); raf.writeUTF("Servlet & JSP Programming"); raf.seek(0); while (raf.getFilePointer() < raf.length()) { System.out.println(raf.readUTF()); } }
From source file:Main.java
public static void readFile(String fileName) throws IOException { RandomAccessFile raf = new RandomAccessFile(fileName, "rw"); int counter = raf.readInt(); String msg = raf.readUTF(); System.out.println(counter);/* w ww .ja v a 2 s .c om*/ System.out.println(msg); incrementReadCounter(raf); raf.close(); }
From source file:net.ontopia.infoset.content.FileContentStore.java
private void allocateNewBlock() throws ContentStoreException { RandomAccessFile out = null; boolean exception_thrown = false; try {//from ww w . ja v a2s . co m out = new RandomAccessFile(key_file, "rws"); for (int i = 0; i < MAX_SPINS; i++) { // acquire exclusive lock FileLock l = out.getChannel().tryLock(); if (l == null) { // wait a little before trying again try { Thread.sleep(SPIN_TIMEOUT); } catch (InterruptedException e) { } continue; } else { try { // allocate new key int old_key; int new_key; String content = null; if (out.length() == 0) { old_key = 0; new_key = old_key + KEY_BLOCK_SIZE; } else { try { content = out.readUTF(); old_key = Integer.parseInt(content); new_key = old_key + KEY_BLOCK_SIZE; } catch (NumberFormatException e) { if (content.length() > 100) content = content.substring(0, 100) + "..."; throw new ContentStoreException( "Content store key file corrupted. Contained: '" + content + "'"); } } // truncate key file and write out new key out.seek(0); out.writeUTF(Integer.toString(new_key)); end_of_key_block = new_key; last_key = old_key; return; } finally { // release file lock try { l.release(); } catch (Throwable t) { throw new ContentStoreException("Could not release key file lock.", t); } } } } throw new ContentStoreException("Block allocation timed out."); } catch (ContentStoreException e) { exception_thrown = true; throw e; } catch (Throwable t) { exception_thrown = true; throw new ContentStoreException(t); } finally { if (out != null) { try { out.close(); } catch (IOException e) { if (!exception_thrown) throw new ContentStoreException("Problems occurred when closing content store.", e); } } } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** queue uncrawled urls via the CrawlQueueLoader * // w w w.j a va 2s. co m * @param loader */ public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException { _queueState = QueueState.QUEUEING; int metadataVersion = getMetadata().getVersion(); synchronized (_metadata) { // reset metadata PERIOD int urlCount = _metadata.getUrlCount(); _metadata.clear(); _metadata.setUrlCount(urlCount); } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position); while (true) { // get read lock on position ... try { FileLock lock = fixedDataReader.getChannel().tryLock(position, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position); item.deserialize(fixedDataReader); break; } finally { lock.release(); //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position); } } catch (OverlappingFileLockException e) { LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:" + CCStringUtils.stringifyException(e)); } } // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // first, if it has not been crawled ever, crawl it not matter what ... boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if it has been crawled ... check list metadata version ... if (!crawlItem && metadataVersion >= 1) { // ok this is newer version of the list ... // check refresh time if specified ... int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS; if (getMetadata().getRefreshInterval() != 0) { refreshIntervalInSeconds = getMetadata().getRefreshInterval(); } if (item._updateTimestamp > 0) { long timeSinceLastCrawl = item._updateTimestamp; if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) { crawlItem = true; } } } if (crawlItem) { loader.queueURL(fingerprint, url); synchronized (_metadata) { // update queued item count _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1); } } else { updateMetadata(item, _metadata, 0); } // ok update subdomain stats updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem); } flushCachedSubDomainMetadata(); loader.flush(); _queueState = QueueState.QUEUED; } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.ERROR; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** resubmit failed items * //from w w w . ja v a2s .co m * @param loader */ public void requeueFailedItems(CrawlQueueLoader loader) throws IOException { synchronized (this) { _queueState = QueueState.QUEUEING; } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); boolean queueItem = false; if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } if (queueItem) { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // and spill fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); loader.queueURL(fingerprint, url); } } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private ProxyCrawlHistoryItem getHistoryItemFromOnDiskItem(OnDiskCrawlHistoryItem item) throws IOException { ProxyCrawlHistoryItem itemOut = new ProxyCrawlHistoryItem(); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS) != 0) itemOut.setCrawlStatus(item._crawlStatus); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_ORIGINAL_RESULT_CODE) != 0) itemOut.setHttpResultCode(item._httpResultCode); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS) != 0) itemOut.setRedirectStatus(item._redirectStatus); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_RESULT_CODE) != 0) itemOut.setRedirectHttpResult(item._redirectHttpResult); if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_LASTMODIFIED_TIME) != 0) itemOut.setLastModifiedTime(item._updateTimestamp); // now attept to get the string offset RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try {/*from ww w .ja va 2 s.c o m*/ // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // now populate original url ... itemOut.setOriginalURL(stringDataReader.readUTF()); // now if redirect url is present if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_URL) != 0) { itemOut.setRedirectURL(stringDataReader.readUTF()); } } finally { stringDataReader.close(); } return itemOut; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void loadSubDomainMetadataFromDisk() throws IOException { LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ... "); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try {// w w w . j a v a 2 s.co m // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); } else { LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH"); RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { //ok rebuild top level metadata as well _metadata.clear(); OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int processedCount = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); // store offset in item item._fileOffset = position; // load from disk item.deserialize(fixedDataReader); try { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // get metadata object for subdomain CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url); // increment url count subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1); // increment top level metadata count _metadata.setUrlCount(_metadata.getUrlCount() + 1); // update top level metadata .. updateMetadata(item, _metadata, 0); // update sub-domain metadata object from item data updateMetadata(item, subDomainMetadata, 0); ++processedCount; } catch (IOException e) { LOG.error("Exception Reading String Data For Item:" + (processedCount + 1)); LOG.error("Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); } if (processedCount % 10000 == 0) { LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items"); } } // ok commit top level metadata to disk as well writeMetadataToDisk(); } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK"); // write metadat to disk writeInitialSubDomainMetadataToDisk(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE"); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
public static void dumpUnCrawledItems(File dataDir, long listId, File outputFilePath, boolean includeRobotsExcludedItems) throws IOException { File fixedDataFile = new File(dataDir, LIST_VALUE_MAP_PREFIX + Long.toString(listId)); File variableDataFile = new File(dataDir, LIST_STRING_MAP_PREFIX + Long.toString(listId)); LOG.info("FixedDataFile is:" + fixedDataFile); LOG.info("VariableDataFile is:" + variableDataFile); RandomAccessFile fixedDataReader = new RandomAccessFile(fixedDataFile, "r"); RandomAccessFile stringDataReader = new RandomAccessFile(variableDataFile, "r"); JsonWriter writer = new JsonWriter(new BufferedWriter(new FileWriter(outputFilePath), 1024 * 1024 * 10)); writer.setIndent(" "); try {/*from www . ja v a2 s. c o m*/ writer.beginObject(); writer.name("urls"); writer.beginArray(); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); item.deserialize(fixedDataReader); // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // any item that has not been crawled needs to be queued boolean queueItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if item is not queued, check to see if we need to retry the item if (!queueItem && item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } } if (queueItem) { // ok if queue item is set ... writer.beginObject(); writer.name("url"); writer.value(url); writer.name("redirected"); writer.value((boolean) item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)); writer.name("lastStatus"); if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { if (item._redirectStatus == 0) { writer.value("HTTP-" + item._redirectHttpResult); } else { writer.value(CrawlURL.FailureReason.toString(item._redirectHttpResult)); } } else { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item._crawlStatus == 0) { writer.value("HTTP-" + item._httpResultCode); } else { writer.value(CrawlURL.FailureReason.toString(item._crawlStatus)); } } else { writer.value("UNCRAWLED"); } } writer.name("updateTime"); writer.value(item._updateTimestamp); writer.endObject(); } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + listId + " Exception:" + CCStringUtils.stringifyException(e)); } finally { fixedDataReader.close(); stringDataReader.close(); } writer.endArray(); writer.endObject(); } catch (Exception e) { LOG.error(CCStringUtils.stringifyException(e)); throw new IOException(e); } finally { writer.flush(); writer.close(); } }