List of usage examples for java.io RandomAccessFile seek
public void seek(long pos) throws IOException
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) { synchronized (_metadata) { ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>(); try {//from w w w .j av a 2 s.c o m synchronized (_subDomainMetadataFile) { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try { // skip version file.read(); // read item count int itemCount = file.readInt(); int i = offset; int end = Math.min(i + count, itemCount); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); if (i < itemCount) { file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset)); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (; i < end; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); newMetadata.deserialize(inputBuffer, new BinaryProtocol()); itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata)); } } } finally { file.close(); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); return itemsOut; } }
From source file:okuyama.imdst.util.FileBaseDataMap.java
private long[] getLinePoint(String key, RandomAccessFile raf) throws Exception { long[] ret = { -1, 0 }; long line = -1; long lineCount = 0L; byte[] keyBytes = key.getBytes(); byte[] equalKeyBytes = new byte[keyBytes.length + 1]; byte[] lineBufs = new byte[this.getDataSize]; boolean matchFlg = true; // ???/*from w w w . j av a 2 s. c o m*/ for (int idx = 0; idx < keyBytes.length; idx++) { equalKeyBytes[idx] = keyBytes[idx]; } equalKeyBytes[equalKeyBytes.length - 1] = new Integer(FileBaseDataMap.paddingSymbol).byteValue(); try { raf.seek(0); int readLen = -1; while ((readLen = SystemUtil.diskAccessSync(raf, lineBufs)) != -1) { matchFlg = true; int loop = readLen / lineDataSize; for (int loopIdx = 0; loopIdx < loop; loopIdx++) { int assist = (lineDataSize * loopIdx); matchFlg = true; if (equalKeyBytes[equalKeyBytes.length - 1] == lineBufs[assist + (equalKeyBytes.length - 1)]) { for (int i = 0; i < equalKeyBytes.length; i++) { if (equalKeyBytes[i] != lineBufs[assist + i]) { matchFlg = false; break; } } } else { matchFlg = false; } // ??????? if (matchFlg) { line = lineCount; // ??? if (lineBufs[assist + keyDataLength] == FileBaseDataMap.paddingSymbol) ret[1] = -1; break; } lineCount++; } if (matchFlg) break; } } catch (IOException ie) { throw ie; } catch (Exception e) { throw e; } ret[0] = line; return ret; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void loadSubDomainMetadataFromDisk() throws IOException { LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ... "); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try {// ww w . ja v a 2 s . co m // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); } else { LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH"); RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { //ok rebuild top level metadata as well _metadata.clear(); OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int processedCount = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); // store offset in item item._fileOffset = position; // load from disk item.deserialize(fixedDataReader); try { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // get metadata object for subdomain CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url); // increment url count subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1); // increment top level metadata count _metadata.setUrlCount(_metadata.getUrlCount() + 1); // update top level metadata .. updateMetadata(item, _metadata, 0); // update sub-domain metadata object from item data updateMetadata(item, subDomainMetadata, 0); ++processedCount; } catch (IOException e) { LOG.error("Exception Reading String Data For Item:" + (processedCount + 1)); LOG.error("Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); } if (processedCount % 10000 == 0) { LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items"); } } // ok commit top level metadata to disk as well writeMetadataToDisk(); } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK"); // write metadat to disk writeInitialSubDomainMetadataToDisk(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE"); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private OnDiskCrawlHistoryItem loadOnDiskItemForURLFP(URLFP fingerprint) throws IOException { // see if state is cached in memory ... boolean loadedFromMemory = false; synchronized (this) { if (_tempFixedDataBuffer != null) { loadedFromMemory = true;//from w w w .j a va2s. c o m int low = 0; int high = (int) (_tempFixedDataBufferSize / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1; OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem(); DataInputBuffer inputBuffer = new DataInputBuffer(); int iterationNumber = 0; while (low <= high) { ++iterationNumber; int mid = low + ((high - low) / 2); inputBuffer.reset(_tempFixedDataBuffer, 0, _tempFixedDataBufferSize); inputBuffer.skip(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE); // deserialize itemOut.deserialize(inputBuffer); // now compare it against desired hash value ... int comparisonResult = itemOut.compareFingerprints(fingerprint); if (comparisonResult > 0) high = mid - 1; else if (comparisonResult < 0) low = mid + 1; else { // cache offset itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE; // LOG.info("Found Match. Took:"+ iterationNumber + " iterations"); // and return item return itemOut; } } //LOG.error("Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations"); } } if (!loadedFromMemory) { //load from disk //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + fingerprint.getUrlHash()); RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw"); // allocate buffer upfront byte[] onDiskItemBuffer = new byte[OnDiskCrawlHistoryItem.ON_DISK_SIZE]; DataInputBuffer inputStream = new DataInputBuffer(); //LOG.info("Opened Data File. Searching for match"); try { int low = 0; int high = (int) (file.length() / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1; OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem(); int iterationNumber = 0; while (low <= high) { ++iterationNumber; int mid = low + ((high - low) / 2); // seek to proper location file.seek(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE); // read the data structure file.readFully(onDiskItemBuffer, 0, onDiskItemBuffer.length); // map location in file //MappedByteBuffer memoryBuffer = file.getChannel().map(MapMode.READ_ONLY,mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE,OnDiskCrawlHistoryItem.ON_DISK_SIZE); //DataInputStream inputStream = new DataInputStream(new ByteBufferInputStream(memoryBuffer)); inputStream.reset(onDiskItemBuffer, 0, OnDiskCrawlHistoryItem.ON_DISK_SIZE); // deserialize itemOut.deserialize(inputStream); // memoryBuffer = null; //inputStream = null; // now compare it against desired hash value ... int comparisonResult = itemOut.compareFingerprints(fingerprint); if (comparisonResult > 0) high = mid - 1; else if (comparisonResult < 0) low = mid + 1; else { // cache offset itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE; // LOG.info("Found Match. Took:"+ iterationNumber + " iterations"); // and return item return itemOut; } } //LOG.error("******Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations"); //DEBUG ONLY ! // dumpFixedDataFile(); } finally { file.close(); } } return null; }
From source file:au.org.ala.layers.intersect.Grid.java
float[] getGrid(double xmin, double ymin, double xmax, double ymax) { //expects largest y at the top //expects input ranges inside of grid ranges int width = (int) ((xmax - xmin) / xres); int height = (int) ((ymax - ymin) / yres); int startx = (int) ((xmin - this.xmin) / xres); int endx = startx + width; int starty = (int) ((ymin - this.ymin) / yres); //int endy = starty + height; int length = width * height; float[] ret = new float[length]; int pos = 0;/*from w w w . j a v a 2s . c o m*/ int i; RandomAccessFile afile = null; File f2 = new File(filename + ".GRI"); int size = 4; if (datatype.equals("BYTE") || datatype.equals("UBYTE")) { size = 1; } else if (datatype.equals("SHORT")) { size = 2; } else if (datatype.equals("INT")) { size = 4; } else if (datatype.equals("LONG")) { size = 8; } else if (datatype.equals("FLOAT")) { size = 4; } else if (datatype.equals("DOUBLE")) { size = 8; } try { //read of random access file can throw an exception if (!f2.exists()) { afile = new RandomAccessFile(filename + ".gri", "r"); } else { afile = new RandomAccessFile(filename + ".GRI", "r"); } //seek to first raster afile.seek(((long) this.ncols) * starty * size); //read relevant rasters int readSize = this.ncols * height * size; int readLen = this.ncols * height; byte[] b = new byte[readSize]; afile.read(b); ByteBuffer bb = ByteBuffer.wrap(b); if (byteorderLSB) { bb.order(ByteOrder.LITTLE_ENDIAN); } if (datatype.equalsIgnoreCase("BYTE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.get(); } else { ret[pos++] = bb.get(); } } } else if (datatype.equalsIgnoreCase("UBYTE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.get(); } else { ret[pos] = bb.get(); if (ret[pos] < 0) { ret[pos] += 256; } pos++; } } } else if (datatype.equalsIgnoreCase("SHORT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getShort(); } else { ret[pos++] = bb.getShort(); } } } else if (datatype.equalsIgnoreCase("INT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getInt(); } else { ret[pos++] = bb.getInt(); } } } else if (datatype.equalsIgnoreCase("LONG")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getLong(); } else { ret[pos++] = bb.getLong(); } } } else if (datatype.equalsIgnoreCase("FLOAT")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getFloat(); } else { ret[pos++] = bb.getFloat(); } } } else if (datatype.equalsIgnoreCase("DOUBLE")) { for (i = 0; i < readLen; i++) { int x = i % this.ncols; if (x < startx || x >= endx) { bb.getDouble(); } else { ret[pos++] = (float) bb.getDouble(); } } } else { // / should not happen; catch anyway... for (i = 0; i < length; i++) { ret[i] = Float.NaN; } } //replace not a number for (i = 0; i < length; i++) { if ((float) ret[i] == (float) nodatavalue) { ret[i] = Float.NaN; } else { ret[i] *= rescale; } } } catch (Exception e) { logger.error("GRID: " + e.toString(), e); } finally { if (afile != null) { try { afile.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } grid_data = ret; return ret; }
From source file:io.minio.MinioClient.java
/** * Skips data of up to given length in given input stream. * * @param inputStream Input stream which is intance of {@link RandomAccessFile} or {@link BufferedInputStream}. * @param n Length of bytes to skip. *//*from w w w .j a v a 2s . c o m*/ private void skipStream(Object inputStream, long n) throws IOException, InsufficientDataException { RandomAccessFile file = null; BufferedInputStream stream = null; if (inputStream instanceof RandomAccessFile) { file = (RandomAccessFile) inputStream; } else if (inputStream instanceof BufferedInputStream) { stream = (BufferedInputStream) inputStream; } else { throw new IllegalArgumentException("unsupported input stream object"); } if (file != null) { file.seek(file.getFilePointer() + n); return; } long bytesSkipped; long totalBytesSkipped = 0; while ((bytesSkipped = stream.skip(n - totalBytesSkipped)) >= 0) { totalBytesSkipped += bytesSkipped; if (totalBytesSkipped == n) { return; } } throw new InsufficientDataException( "Insufficient data. bytes skipped " + totalBytesSkipped + " expected " + n); }
From source file:org.opencb.cellbase.lib.db.VariantAnnotationCalculatorTest.java
private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead, Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException { /**//from w w w. j a va2 s . c o m * Loads VEP annotation */ String newLine; int nNonRegulatoryAnnotations = 0; int nReadVariants = 0; String previousChr = ""; String previousPosition = ""; String previousAlt = ""; String alt; long filePointer = 0; if (nVariantsToRead > 0) { while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) { String[] lineFields = newLine.split("\t"); String[] coordinatesParts = lineFields[1].split(":"); if (lineFields[2].equals("deletion")) { alt = "-"; } else { alt = lineFields[2]; } // TODO: Remove this if as refactoring implements consequence types for other variant types // if(!alt.equals("-") && coordinatesParts[1].split("-").length==1) { if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1]) || !previousAlt.equals(alt)) { nReadVariants++; } if (nReadVariants <= nVariantsToRead) { for (String SOname : lineFields[6].split(",")) { if (SOname.equals("nc_transcript_variant")) { SOname = "non_coding_transcript_variant"; } if (!SOname.equals("regulatory_region_variant")) { nNonRegulatoryAnnotations++; } vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0], coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname)); } previousChr = coordinatesParts[0]; previousPosition = coordinatesParts[1]; previousAlt = alt; filePointer = raf.getFilePointer(); } // } } raf.seek(filePointer); } return nNonRegulatoryAnnotations; }
From source file:okuyama.imdst.util.FileBaseDataMap.java
/** * ??value??.<br>//from w w w .j a va2s . c om * * @param key * @param hashCode This is a key value hash code * @return * @throws */ public String get(String key, int hashCode) { byte[] tmpBytes = null; String ret = null; byte[] keyBytes = key.getBytes(); byte[] equalKeyBytes = new byte[keyBytes.length + 1]; byte[] lineBufs = new byte[this.getDataSize]; boolean matchFlg = true; // ??? for (int idx = 0; idx < keyBytes.length; idx++) { equalKeyBytes[idx] = keyBytes[idx]; } equalKeyBytes[equalKeyBytes.length - 1] = new Integer(FileBaseDataMap.paddingSymbol).byteValue(); try { File file = dataFileList[hashCode % numberOfDataFiles]; CacheContainer accessor = (CacheContainer) innerCache.get(file.getAbsolutePath()); RandomAccessFile raf = null; BufferedWriter wr = null; if (accessor == null || accessor.isClosed) { raf = new RandomAccessFile(file, "rwd"); wr = new BufferedWriter(new FileWriter(file, true)); accessor = new CacheContainer(); accessor.raf = raf; accessor.wr = wr; accessor.file = file; innerCache.put(file.getAbsolutePath(), accessor); } else { raf = accessor.raf; } for (int tryIdx = 0; tryIdx < 2; tryIdx++) { try { raf.seek(0); int readLen = -1; while ((readLen = SystemUtil.diskAccessSync(raf, lineBufs)) != -1) { matchFlg = true; int loop = readLen / lineDataSize; for (int loopIdx = 0; loopIdx < loop; loopIdx++) { int assist = (lineDataSize * loopIdx); matchFlg = true; if (equalKeyBytes[equalKeyBytes.length - 1] == lineBufs[assist + (equalKeyBytes.length - 1)]) { for (int i = 0; i < equalKeyBytes.length; i++) { if (equalKeyBytes[i] != lineBufs[assist + i]) { matchFlg = false; break; } } } else { matchFlg = false; } // ??????? if (matchFlg) { tmpBytes = new byte[lineDataSize]; for (int i = 0; i < lineDataSize; i++) { tmpBytes[i] = lineBufs[assist + i]; } break; } } if (matchFlg) break; } break; } catch (IOException ie) { // IOException???1???? if (tryIdx == 1) throw ie; try { if (raf != null) raf.close(); if (wr != null) wr.close(); raf = new RandomAccessFile(file, "rwd"); wr = new BufferedWriter(new FileWriter(file, true)); accessor = new CacheContainer(); accessor.raf = raf; accessor.wr = wr; accessor.file = file; innerCache.put(file.getAbsolutePath(), accessor); } catch (Exception e) { throw e; } } } // ? if (tmpBytes != null) { if (tmpBytes[keyDataLength] != FileBaseDataMap.paddingSymbol) { int i = keyDataLength; int counter = 0; for (; i < tmpBytes.length; i++) { if (tmpBytes[i] == FileBaseDataMap.paddingSymbol) break; counter++; } ret = new String(tmpBytes, keyDataLength, counter, "UTF-8"); } } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:org.apache.hadoop.hive.service.HSSessionItem.java
public boolean uploadModule(String user, String moduleName, byte[] module) throws HiveServerException { boolean res = true; String fname = getHome() + "/pl/lib/" + user + "/" + moduleName; RandomAccessFile raf; File f;/* w ww .j a v a 2 s . com*/ String dname = getHome() + "/pl/lib/" + user; File d = new File(dname); if (!d.exists()) { if (!d.mkdir()) { l4j.error(getSessionName() + " try to mkdir " + dname + " failed."); throw new HiveServerException("Create user library failed."); } } File i = new File(getHome() + "/pl/lib/" + user + "/__init__.py"); if (!i.exists()) { try { i.createNewFile(); } catch (java.io.IOException oe) { } } try { f = new File(fname); if (!f.exists()) { if (!f.createNewFile()) { l4j.error("Try to create file " + fname + " failed."); throw new HiveServerException("Create user package failed."); } } } catch (java.io.IOException ex) { l4j.error(getSessionName() + " try to create file " + fname + " failed w/ " + ex); return false; } if (module.length == 0) { if (!f.delete()) { l4j.error("Try to delete file " + fname + " failed."); throw new HiveServerException("Delete user package " + fname + " failed."); } else { return true; } } try { raf = new RandomAccessFile(f, "rw"); } catch (java.io.FileNotFoundException ex) { l4j.error(getSessionName() + " try to open file " + fname + " failed, not found."); return false; } try { raf.setLength(0); raf.seek(0); raf.write(module); } catch (java.io.IOException ex) { l4j.error(getSessionName() + " try to truncate/write file " + fname + "failed w/ " + ex); return false; } return res; }