List of usage examples for java.io RandomAccessFile readInt
public final int readInt() throws IOException
From source file:org.apache.hadoop.hdfs.server.namenode.TestEditLog.java
@Test public void testEditChecksum() throws Exception { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null;//from w w w .j ava2 s .c o m FileSystem fileSys = null; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build(); cluster.waitActive(); fileSys = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); FSImage fsimage = namesystem.getFSImage(); final FSEditLog editLog = fsimage.getEditLog(); fileSys.mkdirs(new Path("/tmp")); Iterator<StorageDirectory> iter = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>(); while (iter.hasNext()) { sds.add(iter.next()); } editLog.close(); cluster.shutdown(); for (StorageDirectory sd : sds) { File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3); assertTrue(editFile.exists()); long fileLen = editFile.length(); LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.seek(fileLen - 4); // seek to checksum bytes int b = rwf.readInt(); rwf.seek(fileLen - 4); rwf.writeInt(b + 1); rwf.close(); } try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build(); fail("should not be able to start"); } catch (IOException e) { // expected assertNotNull("Cause of exception should be ChecksumException", e.getCause()); assertEquals("Cause of exception should be ChecksumException", ChecksumException.class, e.getCause().getClass()); } }
From source file:dk.statsbiblioteket.util.LineReaderTest.java
public void dumpSpeed2Helper(LineReader lr, RandomAccessFile ra, boolean warmup) throws Exception { int seeks = 10000; Profiler profiler = new Profiler(); profiler.setExpectedTotal(seeks);// www . ja va 2s. co m profiler.setBpsSpan(1000); long size = lr.length(); Random random = new Random(); profiler.reset(); for (int i = 0; i < seeks; i++) { long pos = Math.round(Math.floor(random.nextDouble() * (size - 6))); try { lr.seek(pos); } catch (EOFException e) { fail("Reached EOF at position " + pos); } lr.readInt(); profiler.beat(); } if (!warmup) { System.out.println("Seeked and read an int " + seeks + " times with LR " + "on a file of size " + size + " at " + Math.round(profiler.getBps(true)) + " seeks/second"); } profiler.reset(); for (int i = 0; i < seeks; i++) { long pos = Math.round(Math.floor(random.nextDouble() * (size - 6))); try { ra.seek(pos); } catch (EOFException e) { fail("Reached EOF at position " + pos); } ra.readInt(); profiler.beat(); } if (!warmup) { System.out.println("Seeked and read an int " + seeks + " times with RA " + "on a file of size " + size + " at " + Math.round(profiler.getBps(true)) + " seeks/second"); } }
From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java
private ProxyCrawlHistoryItem readItem(RandomAccessFile fileStream) throws IOException { try {/* www . ja va 2 s . co m*/ // read sync bytes ... fileStream.read(_syncByteBuffer); // validate ... if (!Arrays.equals(_header._sync, _syncByteBuffer)) { throw new IOException("Error Reading Sync Bytes for Item In Checkpoint"); } int checksum = fileStream.readInt(); int payloadSize = fileStream.readShort(); if (payloadSize == 0) { throw new IOException("Invalid Payload Size Reading Item In Checkpoint"); } // read the payload _payloadBuffer.setCapacity(payloadSize); fileStream.read(_payloadBuffer.get(), 0, payloadSize); _crc16in.reset(); _crc16in.update(_payloadBuffer.get(), 0, payloadSize); // if computed checksum does not match file checksum !!! if (_crc16in.getValue() != (long) checksum) { throw new IOException("Checksum Mismatch Expected:" + checksum + " got:" + _crc16in.getValue() + " while Reading Item"); } _payloadInputStream.reset(_payloadBuffer.get(), 0, payloadSize); ProxyCrawlHistoryItem itemOut = new ProxyCrawlHistoryItem(); itemOut.deserialize(_payloadInputStream, new BinaryProtocol()); return itemOut; } catch (Exception e) { LOG.error(CCStringUtils.stringifyException(e)); throw new IOException(e); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {//ww w. j a va 2s .co m inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize); if (startOffset != 0L) { System.out.println("Preseeking to:" + startOffset); reader.seek(startOffset); } Configuration conf = new Configuration(); // read a crawl url from the stream... long recordCount = 0; while (inputStream.getFilePointer() < header._fileSize) { // System.out.println("PRE-SYNC SeekPos:"+ // inputStream.getFilePointer()); if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { // System.out.println("POST-SYNC SeekPos:"+ // inputStream.getFilePointer()); lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { if (recordCount++ % 10000 == 0) { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:" + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:" + url.getContentRaw().getCount() + " LastModified:" + new Date(url.getLastAttemptTime()).toString()); } } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:au.org.ala.layers.dao.ObjectDAOImpl.java
@Override public List<Objects> getObjectsById(String id, int start, int pageSize) { logger.info("Getting object info for fid = " + id); String limit_offset = " limit " + (pageSize < 0 ? "all" : pageSize) + " offset " + start; String sql = "select o.pid as pid, o.id as id, o.name as name, o.desc as description, " + "o.fid as fid, f.name as fieldname, o.bbox, o.area_km, " + "ST_AsText(ST_Centroid(o.the_geom)) as centroid," + "GeometryType(o.the_geom) as featureType from objects o, fields f " + "where o.fid = ? and o.fid = f.id order by o.pid " + limit_offset; List<Objects> objects = jdbcTemplate.query(sql, ParameterizedBeanPropertyRowMapper.newInstance(Objects.class), id); updateObjectWms(objects);/*from ww w . j av a 2 s .c o m*/ // get grid classes if (objects == null || objects.isEmpty()) { objects = new ArrayList<Objects>(); IntersectionFile f = layerIntersectDao.getConfig().getIntersectionFile(id); if (f != null && f.getClasses() != null) { //shape position int pos = 0; for (Entry<Integer, GridClass> c : f.getClasses().entrySet()) { File file = new File(f.getFilePath() + File.separator + c.getKey() + ".wkt.index.dat"); if (f.getType().equals("a") || !file.exists()) { // class pid if (pageSize == -1 || (pos >= start && pos - start < pageSize)) { Objects o = new Objects(); o.setPid(f.getLayerPid() + ":" + c.getKey()); o.setId(f.getLayerPid() + ":" + c.getKey()); o.setName(c.getValue().getName()); o.setFid(f.getFieldId()); o.setFieldname(f.getFieldName()); o.setBbox(c.getValue().getBbox()); o.setArea_km(c.getValue().getArea_km()); o.setWmsurl(getGridClassWms(f.getLayerName(), c.getValue())); objects.add(o); } pos++; if (pageSize != -1 && pos >= start + pageSize) { break; } } else { // polygon pid RandomAccessFile raf = null; try { raf = new RandomAccessFile(file, "r"); long itemSize = (4 + 4 + 4 * 4 + 4); long len = raf.length() / itemSize; // group if (pageSize != -1 && pos + len < start) { pos += len; } else { // number, // character // offset, // minx, // miny, // maxx, // maxy, // area // sq // km int i = 0; if (pageSize != -1 && pos < start) { //the first object requested is in this file, seek to the start i = start - pos; pos += i; raf.seek(i * itemSize); } for (; i < len; i++) { int n = raf.readInt(); /* int charoffset = */ raf.readInt(); float minx = raf.readFloat(); float miny = raf.readFloat(); float maxx = raf.readFloat(); float maxy = raf.readFloat(); float area = raf.readFloat(); if (pageSize == -1 || (pos >= start && pos - start < pageSize)) { Objects o = new Objects(); o.setPid(f.getLayerPid() + ":" + c.getKey() + ":" + n); o.setId(f.getLayerPid() + ":" + c.getKey() + ":" + n); o.setName(c.getValue().getName()); o.setFid(f.getFieldId()); o.setFieldname(f.getFieldName()); o.setBbox("POLYGON((" + minx + " " + miny + "," + minx + " " + maxy + "," + +maxx + " " + maxy + "," + +maxx + " " + miny + "," + +minx + " " + miny + "))"); o.setArea_km(1.0 * area); o.setWmsurl(getGridPolygonWms(f.getLayerName(), n)); objects.add(o); } pos++; if (pageSize != -1 && pos >= start + pageSize) { break; } } } } catch (Exception e) { logger.error(e.getMessage(), e); } finally { if (raf != null) { try { raf.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } if (pageSize != -1 && pos >= start + pageSize) { break; } } } } } return objects; }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount, IntrusiveList<CrawlTarget> targetsOut) throws IOException { int itemsRead = 0; if (logFileName.exists()) { RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); LogFileHeader header = new LogFileHeader(); try {/*from ww w .ja v a2s .co m*/ long headerOffset = readLogFileHeader(file, header); // seelk to appropriate write position if (header._readPos != 0) file.seek(header._readPos); int itemsToRead = Math.min(desiredReadAmount, header._itemCount); PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget(); CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16); for (int i = 0; i < itemsToRead; ++i) { // read length ... int urlDataLen = file.readInt(); long urlDataCRC = file.readLong(); buffer.reset(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } file.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { throw new IOException("Crawl Target Log File Corrupt"); } else { //populate a persistentTarget from the (in memory) data stream DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); persistentTarget.clear(); persistentTarget.readFields(bufferReader); //populate a new crawl target structure ... CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget); targetsOut.addTail(newTarget); } } itemsRead = itemsToRead; // now update header ... header._itemCount -= itemsRead; // now if item count is non zero ... if (header._itemCount != 0) { // set read cursor to next record location header._readPos = file.getFilePointer(); } // otherwise ... else { // reset both cursors ... header._readPos = 0; header._writePos = 0; } // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } } return itemsRead; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void resetSubDomainCounts() throws IOException { LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts."); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS ."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); try {//from w w w .j av a 2 s . c o m // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } // ok reset everything except hashes and first/last url pointers int urlCount = newMetadata.getUrlCount(); long firstRecordOffset = newMetadata.getFirstRecordOffset(); long lastRecordOffset = newMetadata.getLastRecordOffset(); String domainName = newMetadata.getDomainName(); long domainHash = newMetadata.getDomainHash(); // reset newMetadata.clear(); // restore newMetadata.setUrlCount(urlCount); newMetadata.setFirstRecordOffset(firstRecordOffset); newMetadata.setLastRecordOffset(lastRecordOffset); newMetadata.setDomainName(domainName); newMetadata.setDomainHash(domainHash); // serialize it ... outputBuffer.reset(); newMetadata.serialize(outputBuffer, new BinaryProtocol()); // write it back to disk file.seek(orignalPos); // and rewrite it ... file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS"); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) { synchronized (_metadata) { ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>(); try {/*from w w w . ja v a 2 s . c om*/ synchronized (_subDomainMetadataFile) { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try { // skip version file.read(); // read item count int itemCount = file.readInt(); int i = offset; int end = Math.min(i + count, itemCount); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); if (i < itemCount) { file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset)); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (; i < end; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); newMetadata.deserialize(inputBuffer, new BinaryProtocol()); itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata)); } } } finally { file.close(); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); return itemsOut; } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void loadSubDomainMetadataFromDisk() throws IOException { LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ... "); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize]; try {//from w ww . j av a2s . c om // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(fixedDataBlock, 0, fixedDataBlock.length); inputBuffer.reset(fixedDataBlock, fixedDataBlock.length); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK"); } else { LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH"); RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { //ok rebuild top level metadata as well _metadata.clear(); OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); int processedCount = 0; while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { long position = fixedDataReader.getFilePointer(); // store offset in item item._fileOffset = position; // load from disk item.deserialize(fixedDataReader); try { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // get metadata object for subdomain CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url); // increment url count subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1); // increment top level metadata count _metadata.setUrlCount(_metadata.getUrlCount() + 1); // update top level metadata .. updateMetadata(item, _metadata, 0); // update sub-domain metadata object from item data updateMetadata(item, subDomainMetadata, 0); ++processedCount; } catch (IOException e) { LOG.error("Exception Reading String Data For Item:" + (processedCount + 1)); LOG.error("Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); } if (processedCount % 10000 == 0) { LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items"); } } // ok commit top level metadata to disk as well writeMetadataToDisk(); } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:" + stringDataReader.getFilePointer()); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK"); // write metadat to disk writeInitialSubDomainMetadataToDisk(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE"); } }