List of usage examples for java.io RandomAccessFile length
public native long length() throws IOException;
From source file:com.siblinks.ws.service.impl.UserServiceImpl.java
/** * {@inheritDoc}// ww w . j a v a 2 s .c om * */ @SuppressWarnings("resource") @Override @RequestMapping(value = "/getAvatar/{path}", method = RequestMethod.GET, produces = MediaType.IMAGE_JPEG_VALUE) public ResponseEntity<byte[]> getAvatar(@PathVariable(value = "path") final String path) { logger.info("Call service get avatar"); RandomAccessFile randomAccessFile = null; ResponseEntity<byte[]> responseEntity = null; try { if (StringUtil.isNull(path)) { // Reader avatar file randomAccessFile = new RandomAccessFile(path, "r"); byte[] r = new byte[(int) randomAccessFile.length()]; randomAccessFile.readFully(r); responseEntity = new ResponseEntity<byte[]>(r, new HttpHeaders(), HttpStatus.OK); } else { responseEntity = new ResponseEntity<byte[]>(HttpStatus.NO_CONTENT); } } catch (Exception e) { logger.debug("File not found"); responseEntity = new ResponseEntity<byte[]>(HttpStatus.NOT_FOUND); } finally { try { if (randomAccessFile != null) { randomAccessFile.close(); } } catch (IOException io) { // Do nothing } } return responseEntity; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private void writeInitialOnDiskItem(URLFP fp, ProxyCrawlHistoryItem historyItem, DataOutputStream valueStreamOut, RandomAccessFile stringStream) throws IOException { OnDiskCrawlHistoryItem itemOut = onDiskItemFromHistoryItem(fp, historyItem); // update string offset ... itemOut._stringsOffset = stringStream.length(); // write out string data length WritableUtils.writeVInt(stringStream, _stringBuffer1.getLength()); // write strings to log file stringStream.write(_stringBuffer1.getData(), 0, _stringBuffer1.getLength()); // update timestamp ... itemOut._updateTimestamp = -1;//w ww . j av a2s.c o m // and write to disk itemOut.serialize(valueStreamOut); }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private void dumpFixedDataFile() { try {/*from www . java2 s. c o m*/ RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int index = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); LOG.info("Item at Index:" + index++ + " Domain:" + item._domainHash + " URLFP:" + item._urlFingerprint); } } finally { fixedDataReader.close(); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** * update list state of a recently crawled item * /*from w ww . j a va 2 s. c o m*/ * @param fingerprint - the fingerprint of the updated item * @param newData - the updated crawl history data for the given item * @throws IOException */ @Override public void updateItemState(URLFP fingerprint, ProxyCrawlHistoryItem newData) throws IOException { if (_listState == LoadState.LOADED) { // check for membership ... if (_bloomFilter.isPresent(fingerprint)) { //LOG.info("UpdateItemState Called for URL:" + newData.getOriginalURL() + " List:" + getListId()); //LOG.info("UpdateItemState Loading OnDisk Item for URL:" + newData.getOriginalURL() + " List:" + getListId()); // extract existing item from disk OnDiskCrawlHistoryItem originalItem = loadOnDiskItemForURLFP(fingerprint); //if present (null if false cache hit) if (originalItem != null) { // build an on disk item data structure for any potential changes ... OnDiskCrawlHistoryItem newItem = onDiskItemFromHistoryItem(fingerprint, newData); // set inital offset information newItem._fileOffset = originalItem._fileOffset; newItem._stringsOffset = originalItem._stringsOffset; // LOG.info("UpdateItemState Comparing OnDisk Item to New Item for URL:" + newData.getOriginalURL() + " List:" + getListId()); // compare the two items ... if (!newItem.equals(originalItem)) { //LOG.info("UpdateItemState Items Don't Match for URL:" + newData.getOriginalURL() + " List:" + getListId()); // ok items do not match ... figure out if strings are different ... if (newItem._stringsCRC != originalItem._stringsCRC) { RandomAccessFile stringsFile = new RandomAccessFile(_variableDataFile, "rw"); try { // seek to end stringsFile.seek(stringsFile.length()); // update offset info newItem._stringsOffset = stringsFile.length(); // write out string data length WritableUtils.writeVInt(stringsFile, _stringBuffer1.getLength()); // write strings to log file stringsFile.write(_stringBuffer1.getData(), 0, _stringBuffer1.getLength()); } finally { stringsFile.close(); } } // otherwise take the offset from old item else { newItem._stringsOffset = originalItem._stringsOffset; } //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + newItem._urlFingerprint); // ok, different paths depending on wether this is an in memory update or not ... boolean wroteToMemory = false; synchronized (this) { if (_tempFixedDataBuffer != null) { wroteToMemory = true; // reset output buffer _tempOutputBuffer.reset(); // serizlie to output buffer newItem.serialize(_tempOutputBuffer); // copy to appropriate location System.arraycopy(_tempOutputBuffer.getData(), 0, _tempFixedDataBuffer, (int) originalItem._fileOffset, OnDiskCrawlHistoryItem.ON_DISK_SIZE); } } if (!wroteToMemory) { // write to disk RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw"); try { while (true) { try { //LOG.info("*** TRYING UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); FileLock lock = file.getChannel().tryLock(originalItem._fileOffset, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); file.seek(originalItem._fileOffset); newItem.serialize(file); //LOG.info("Updated Data File for OnDiskItem for Fingerprint:" + originalItem._urlFingerprint); break; } finally { //LOG.info("*** RELEASED UPDATE LOCK FOR OFFSET:" + originalItem._fileOffset); lock.release(); } } catch (OverlappingFileLockException e) { LOG.error("###LockConflict(RETRY):" + CCStringUtils.stringifyException(e)); } } } finally { file.close(); } } // ok now update metadata ... synchronized (_metadata) { int updateFlags = calculateUpdateFlags(originalItem, newItem); if (updateFlags != 0) { int metadataDirtyFlags = updateMetadata(newItem, _metadata, 0); // only write metadata to disk if temp data buffer is null if (metadataDirtyFlags != 0 && !wroteToMemory) { if ((metadataDirtyFlags & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) { _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() - 1); } writeMetadataToDisk(); } // if not writing to memory then update subdomain metadata if (!wroteToMemory) { synchronized (_subDomainMetadataFile) { CrawlListMetadata subDomainMetadata = getSubDomainMetadataByURL( newData.getOriginalURL()); int subDomainMetadataDirtyFlags = updateMetadata(newItem, subDomainMetadata, processFileOffsets); if (subDomainMetadataDirtyFlags != 0 && !wroteToMemory) { if ((subDomainMetadataDirtyFlags & MetadataUpdateFlag_ModifiedCrawlStatus) != 0) { subDomainMetadata.setQueuedItemCount( subDomainMetadata.getQueuedItemCount() - 1); } writeSubDomainMetadataToDisk(subDomainMetadata); } } } } } synchronized (this) { if (_eventListener != null) { _eventListener.itemUpdated(fingerprint); } } } } } } }
From source file:au.org.ala.layers.intersect.Grid.java
public void mergeMissingValues(Grid sourceOfMissingValues, boolean hideMissing) { float[] cells = sourceOfMissingValues.getGrid(); float[] actual = getGrid(); int length = actual.length; int i;/*from w ww. j a v a 2 s . c om*/ RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "rw"); } else { afile = new RandomAccessFile(filename + ".GRI", "rw"); } byte[] b = new byte[(int) afile.length()]; ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } afile.seek(0); if (datatype.equalsIgnoreCase("UBYTE")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { if (nodatavalue >= 128) { bb.put((byte) (nodatavalue - 256)); } else { bb.put((byte) nodatavalue); } } else { if (actual[i] >= 128) { bb.put((byte) (actual[i] - 256)); } else { bb.put((byte) actual[i]); } } } } else if (datatype.equalsIgnoreCase("BYTE")) { for (i = 0; i < length; i++) { bb.put((byte) actual[i]); } } else if (datatype.equalsIgnoreCase("SHORT")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { bb.putShort((short) nodatavalue); } else { bb.putShort((short) actual[i]); } } } else if (datatype.equalsIgnoreCase("INT")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { bb.putInt((int) nodatavalue); } else { bb.putInt((int) actual[i]); } } } else if (datatype.equalsIgnoreCase("LONG")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { bb.putLong((long) nodatavalue); } else { bb.putLong((long) actual[i]); } } } else if (datatype.equalsIgnoreCase("FLOAT")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { bb.putFloat((float) nodatavalue); } else { bb.putFloat(actual[i]); } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { for (i = 0; i < length; i++) { if (hideMissing == Float.isNaN(cells[i])) { bb.putDouble((double) nodatavalue); } else { bb.putDouble((double) actual[i]); } } } else { // should not happen logger.error("unsupported grid data type: " + datatype); } afile.write(bb.array()); } catch (Exception e) { logger.error("error getting grid file values", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** resubmit failed items * /* w w w. j a va 2s. c om*/ * @param loader */ public void requeueFailedItems(CrawlQueueLoader loader) throws IOException { synchronized (this) { _queueState = QueueState.QUEUEING; } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); boolean queueItem = false; if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } if (queueItem) { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // and spill fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); loader.queueURL(fingerprint, url); } } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:au.org.ala.layers.intersect.Grid.java
/** * Increase sampleEveryNthPoint to return a smaller grid. * * Grid max and min values may be skipped. * * This does not used previously cached data. * * @param sampleEveryNthPoint/*from ww w. j ava2s .co m*/ * @return */ public float[] getGrid(int sampleEveryNthPoint) { int maxArrayLength = Integer.MAX_VALUE - 10; if (subgrids != null) { //sample points int size = 1000; double[][] points = new double[size * size][2]; int pos = 0; for (int i = 0; i < 1000; i++) { for (int j = 0; j < 1000; j++) { points[pos][0] = xmin + (xmax - xmin) * j / (double) size; points[pos][1] = ymax - (ymax - ymin) * i / (double) size; pos++; } } return getValues3(points, 64); } int length = (nrows / sampleEveryNthPoint) * (ncols); float[] ret = new float[length]; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } int sz = (int) Math.min(afile.length() / sampleEveryNthPoint / sampleEveryNthPoint, maxArrayLength); sz += 8 - sz % 8; byte[] b = new byte[sz]; long i = 0; long max = 0; int len; while ((len = afile.read(b)) > 0) { ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("UBYTE")) { max += len; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.get(); if (ret[(int) (i / sampleEveryNthPoint)] < 0) { ret[(int) (i / sampleEveryNthPoint)] += 256; } } } else if (datatype.equalsIgnoreCase("BYTE")) { max += len; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.get(); } } else if (datatype.equalsIgnoreCase("SHORT")) { max += len / 2; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getShort(); } } else if (datatype.equalsIgnoreCase("INT")) { max += len / 4; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getInt(); } } else if (datatype.equalsIgnoreCase("LONG")) { max += len / 8; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getLong(); } } else if (datatype.equalsIgnoreCase("FLOAT")) { max += len / 4; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / sampleEveryNthPoint)] = bb.getFloat(); } } else if (datatype.equalsIgnoreCase("DOUBLE")) { max += len / 8; max = Math.min(max, ret.length * (long) sampleEveryNthPoint); for (; i < max; i++) { ret[(int) (i / (long) sampleEveryNthPoint)] = (float) bb.getDouble(); } } else { // / should not happen; catch anyway... max += len / 4; for (; i < max; i++) { ret[(int) (i / (long) sampleEveryNthPoint)] = Float.NaN; } } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[(int) i] == (float) nodatavalue) { ret[(int) i] = Float.NaN; } else { ret[(int) i] *= rescale; } } } catch (Exception e) { logger.error("An error has occurred - probably a file error", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:au.org.ala.layers.intersect.Grid.java
public float[] getGrid() { int maxArrayLength = Integer.MAX_VALUE - 10; if (grid_data != null) { return grid_data; }/*from www .j ava2 s. com*/ Grid loadedAlready = getLoadedGrid(filename); if (loadedAlready != null && loadedAlready.grid_data != null) { return loadedAlready.grid_data; } int length = nrows * ncols; float[] ret = new float[length]; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } byte[] b = new byte[(int) Math.min(afile.length(), maxArrayLength)]; int i = 0; int max = 0; int len; while ((len = afile.read(b)) > 0) { ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("UBYTE")) { max += len; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.get(); if (ret[i] < 0) { ret[i] += 256; } } } else if (datatype.equalsIgnoreCase("BYTE")) { max += len; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.get(); } } else if (datatype.equalsIgnoreCase("SHORT")) { max += len / 2; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getShort(); } } else if (datatype.equalsIgnoreCase("INT")) { max += len / 4; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getInt(); } } else if (datatype.equalsIgnoreCase("LONG")) { max += len / 8; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getLong(); } } else if (datatype.equalsIgnoreCase("FLOAT")) { max += len / 4; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = bb.getFloat(); } } else if (datatype.equalsIgnoreCase("DOUBLE")) { max += len / 8; max = Math.min(max, ret.length); for (; i < max; i++) { ret[i] = (float) bb.getDouble(); } } else { // / should not happen; catch anyway... max += len / 4; for (; i < max; i++) { ret[i] = Float.NaN; } } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("An error has occurred - probably a file error", e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** queue uncrawled urls via the CrawlQueueLoader * //ww w . j av a2s .c o m * @param loader */ public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException { _queueState = QueueState.QUEUEING; int metadataVersion = getMetadata().getVersion(); synchronized (_metadata) { // reset metadata PERIOD int urlCount = _metadata.getUrlCount(); _metadata.clear(); _metadata.setUrlCount(urlCount); } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position); while (true) { // get read lock on position ... try { FileLock lock = fixedDataReader.getChannel().tryLock(position, OnDiskCrawlHistoryItem.ON_DISK_SIZE, false); try { //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position); item.deserialize(fixedDataReader); break; } finally { lock.release(); //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position); } } catch (OverlappingFileLockException e) { LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:" + CCStringUtils.stringifyException(e)); } } // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // setup fingerprint fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); // first, if it has not been crawled ever, crawl it not matter what ... boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS); // if it has been crawled ... check list metadata version ... if (!crawlItem && metadataVersion >= 1) { // ok this is newer version of the list ... // check refresh time if specified ... int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS; if (getMetadata().getRefreshInterval() != 0) { refreshIntervalInSeconds = getMetadata().getRefreshInterval(); } if (item._updateTimestamp > 0) { long timeSinceLastCrawl = item._updateTimestamp; if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) { crawlItem = true; } } } if (crawlItem) { loader.queueURL(fingerprint, url); synchronized (_metadata) { // update queued item count _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1); } } else { updateMetadata(item, _metadata, 0); } // ok update subdomain stats updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem); } flushCachedSubDomainMetadata(); loader.flush(); _queueState = QueueState.QUEUED; } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.ERROR; } finally { fixedDataReader.close(); stringDataReader.close(); } }