List of usage examples for java.io RandomAccessFile close
public void close() throws IOException
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount, IntrusiveList<CrawlTarget> targetsOut) throws IOException { int itemsRead = 0; if (logFileName.exists()) { RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); LogFileHeader header = new LogFileHeader(); try {// w w w . j ava2 s . c om long headerOffset = readLogFileHeader(file, header); // seelk to appropriate write position if (header._readPos != 0) file.seek(header._readPos); int itemsToRead = Math.min(desiredReadAmount, header._itemCount); PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget(); CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16); for (int i = 0; i < itemsToRead; ++i) { // read length ... int urlDataLen = file.readInt(); long urlDataCRC = file.readLong(); buffer.reset(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } file.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { throw new IOException("Crawl Target Log File Corrupt"); } else { //populate a persistentTarget from the (in memory) data stream DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); persistentTarget.clear(); persistentTarget.readFields(bufferReader); //populate a new crawl target structure ... CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget); targetsOut.addTail(newTarget); } } itemsRead = itemsToRead; // now update header ... header._itemCount -= itemsRead; // now if item count is non zero ... if (header._itemCount != 0) { // set read cursor to next record location header._readPos = file.getFilePointer(); } // otherwise ... else { // reset both cursors ... header._readPos = 0; header._writePos = 0; } // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } } return itemsRead; }
From source file:gate.util.reporting.DocTimeReporter.java
/** * Provides the functionality to separate out pipeline specific benchmark * entries in separate temporary benchmark files in a temporary folder in the * current working directory.//from w w w . ja v a 2 s . co m * * @param benchmarkFile * An object of type File representing the input benchmark file. * @param report * A file handle to the report file to be written. * @throws BenchmarkReportFileAccessException * if any error occurs while accessing the input benchmark file or * while splitting it. * @throws BenchmarkReportExecutionException * if the given input benchmark file is modified while generating * the report. */ private void splitBenchmarkFile(File benchmarkFile, File report) throws BenchmarkReportFileAccessException, BenchmarkReportInputFileFormatException { File dir = temporaryDirectory; // Folder already exists; then delete all files in the temporary folder if (dir.isDirectory()) { File files[] = dir.listFiles(); for (int count = 0; count < files.length; count++) { if (!files[count].delete()) { throw new BenchmarkReportFileAccessException( "Could not delete files in the folder \"" + temporaryDirectory + "\""); } } } else if (!dir.mkdir()) { throw new BenchmarkReportFileAccessException( "Could not create temporary folder \"" + temporaryDirectory + "\""); } // delete report2 from the filesystem if (getPrintMedia().equalsIgnoreCase(MEDIA_TEXT)) { deleteFile(new File(report.getAbsolutePath() + ".txt")); } else if (getPrintMedia().equalsIgnoreCase(MEDIA_HTML)) { deleteFile(new File(report.getAbsolutePath() + ".html")); } RandomAccessFile in = null; BufferedWriter out = null; try { String logEntry = ""; long fromPos = 0; // File benchmarkFileName; if (getLogicalStart() != null) { fromPos = tail(benchmarkFile, FILE_CHUNK_SIZE); } in = new RandomAccessFile(benchmarkFile, "r"); if (getLogicalStart() != null) { in.seek(fromPos); } ArrayList<String> startTokens = new ArrayList<String>(); String lastStart = ""; Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}"); Matcher matcher = null; File benchmarkFileName = null; while ((logEntry = in.readLine()) != null) { matcher = pattern.matcher(logEntry); String startToken = ""; if (logEntry.matches(".*START.*")) { String[] splittedStartEntry = logEntry.split("\\s"); if (splittedStartEntry.length > 2) { startToken = splittedStartEntry[2]; } else { throw new BenchmarkReportInputFileFormatException(getBenchmarkFile() + " is invalid."); } if (startToken.endsWith("Start")) { continue; } if (!startTokens.contains(startToken)) { // create a new file for the new pipeline startTokens.add(startToken); benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt"); if (!benchmarkFileName.createNewFile()) { throw new BenchmarkReportFileAccessException("Could not create \"" + startToken + "_benchmark.txt" + "\" in directory named \"" + temporaryDirectory + "\""); } out = new BufferedWriter(new FileWriter(benchmarkFileName)); out.write(logEntry); out.newLine(); } } // if a valid benchmark entry then write it to the pipeline specific // file if (matcher != null && matcher.matches() && (validateLogEntry(matcher.group(3), startTokens) || logEntry.matches(".*documentLoaded.*"))) { startToken = matcher.group(3).split("\\.")[0]; if (!(lastStart.equals(startToken))) { if (out != null) { out.close(); } benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt"); out = new BufferedWriter(new FileWriter(benchmarkFileName, true)); } if (out != null) { out.write(logEntry); out.newLine(); } lastStart = startToken; } } } catch (IOException e) { e.printStackTrace(); } finally { try { if (in != null) { in.close(); } if (out != null) { out.close(); } } catch (IOException e) { e.printStackTrace(); } } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
/** resubmit failed items * //from w w w.j a v a 2 s . c o m * @param loader */ public void requeueFailedItems(CrawlQueueLoader loader) throws IOException { synchronized (this) { _queueState = QueueState.QUEUEING; } RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw"); RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw"); try { OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem(); URLFP fingerprint = new URLFP(); while (fixedDataReader.getFilePointer() != fixedDataReader.length()) { item.deserialize(fixedDataReader); boolean queueItem = false; if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) { if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) { queueItem = (item._redirectStatus != 0); if (!queueItem) { if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) { queueItem = true; } } } else { queueItem = (item._crawlStatus != 0); if (!queueItem) { if (item._httpResultCode != 200 && item._httpResultCode != 404) { queueItem = true; } } } if (queueItem) { // seek to string data stringDataReader.seek(item._stringsOffset); // and skip buffer length WritableUtils.readVInt(stringDataReader); // and read primary string String url = stringDataReader.readUTF(); // and spill fingerprint.setDomainHash(item._domainHash); fingerprint.setUrlHash(item._urlFingerprint); loader.queueURL(fingerprint, url); } } } } catch (IOException e) { LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:" + CCStringUtils.stringifyException(e)); _queueState = QueueState.QUEUED; } finally { fixedDataReader.close(); stringDataReader.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void writeInitialSubDomainMetadataToDisk() throws IOException { RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); try {// w w w . j ava 2 s .c om file.writeByte(0); // version file.writeInt(_transientSubDomainStats.size()); ArrayList<CrawlListMetadata> sortedMetadata = new ArrayList<CrawlListMetadata>(); sortedMetadata.addAll(_transientSubDomainStats.values()); _transientSubDomainStats = null; CrawlListMetadata metadataArray[] = sortedMetadata.toArray(new CrawlListMetadata[0]); Arrays.sort(metadataArray, new Comparator<CrawlListMetadata>() { @Override public int compare(CrawlListMetadata o1, CrawlListMetadata o2) { int result = ((Integer) o2.getUrlCount()).compareTo(o1.getUrlCount()); if (result == 0) { result = o1.getDomainName().compareTo(o2.getDomainName()); } return result; } }); DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>(); for (CrawlListMetadata entry : metadataArray) { // reset output buffer outputBuffer.reset(); // write item to disk entry.serialize(outputBuffer, new BinaryProtocol()); if (outputBuffer.getLength() > CrawlListMetadata.Constants.FixedDataSize) { LOG.fatal("Metadata Serialization for List:" + getListId() + " SubDomain:" + entry.getDomainName()); System.out.println("Metadata Serialization for List:" + getListId() + " SubDomain:" + entry.getDomainName()); } // save offset idToOffsetMap.put(entry.getDomainHash(), (int) file.getFilePointer()); // write out fixed data size file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); } // write lookup table _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE); for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) { _offsetLookupTable.writeLong(entry.getKey()); _offsetLookupTable.writeInt(entry.getValue()); } } finally { file.close(); } _transientSubDomainStats = null; }
From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java
private ArrayList<byte[]> getJournaledMessage(FileOffsetLength[] fols) throws IOException { ArrayList<byte[]> logStrings = new ArrayList<byte[]>(); RandomAccessFile raf = null; String filename = null;/* ww w. j av a 2s .c o m*/ for (FileOffsetLength fol : fols) { try { if (raf == null) { raf = new RandomAccessFile(filename = fol.file, "r"); } else if (!filename.equals(fol.file)) { raf.close(); raf = new RandomAccessFile(filename = fol.file, "r"); } logStrings.add(this.getJournaledMessage(fol.file, fol.offset, fol.length, raf)); } catch (IOException e) { if (raf != null) raf.close(); raf = null; } } return logStrings; }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
void resetSubDomainCounts() throws IOException { LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts."); if (_subDomainMetadataFile.exists()) { LOG.info("*** LIST:" + getListId() + " FILE EXISTS ."); RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw"); DataInputBuffer inputBuffer = new DataInputBuffer(); DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize); try {//w ww . j ava2 s.c o m // skip version file.read(); // read item count int itemCount = file.readInt(); LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount); CrawlListMetadata newMetadata = new CrawlListMetadata(); for (int i = 0; i < itemCount; ++i) { long orignalPos = file.getFilePointer(); file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize); try { newMetadata.deserialize(inputBuffer, new BinaryProtocol()); } catch (Exception e) { LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:" + CCStringUtils.stringifyException(e)); } // ok reset everything except hashes and first/last url pointers int urlCount = newMetadata.getUrlCount(); long firstRecordOffset = newMetadata.getFirstRecordOffset(); long lastRecordOffset = newMetadata.getLastRecordOffset(); String domainName = newMetadata.getDomainName(); long domainHash = newMetadata.getDomainHash(); // reset newMetadata.clear(); // restore newMetadata.setUrlCount(urlCount); newMetadata.setFirstRecordOffset(firstRecordOffset); newMetadata.setLastRecordOffset(lastRecordOffset); newMetadata.setDomainName(domainName); newMetadata.setDomainHash(domainHash); // serialize it ... outputBuffer.reset(); newMetadata.serialize(outputBuffer, new BinaryProtocol()); // write it back to disk file.seek(orignalPos); // and rewrite it ... file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize); } } finally { file.close(); } LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS"); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {//from w ww . j ava 2s . com inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize); if (startOffset != 0L) { System.out.println("Preseeking to:" + startOffset); reader.seek(startOffset); } Configuration conf = new Configuration(); // read a crawl url from the stream... long recordCount = 0; while (inputStream.getFilePointer() < header._fileSize) { // System.out.println("PRE-SYNC SeekPos:"+ // inputStream.getFilePointer()); if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { // System.out.println("POST-SYNC SeekPos:"+ // inputStream.getFilePointer()); lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { if (recordCount++ % 10000 == 0) { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:" + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:" + url.getContentRaw().getCount() + " LastModified:" + new Date(url.getLastAttemptTime()).toString()); } } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:org.commoncrawl.service.listcrawler.CrawlList.java
private OnDiskCrawlHistoryItem loadOnDiskItemForURLFP(URLFP fingerprint) throws IOException { // see if state is cached in memory ... boolean loadedFromMemory = false; synchronized (this) { if (_tempFixedDataBuffer != null) { loadedFromMemory = true;/*w w w. ja va 2 s .c om*/ int low = 0; int high = (int) (_tempFixedDataBufferSize / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1; OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem(); DataInputBuffer inputBuffer = new DataInputBuffer(); int iterationNumber = 0; while (low <= high) { ++iterationNumber; int mid = low + ((high - low) / 2); inputBuffer.reset(_tempFixedDataBuffer, 0, _tempFixedDataBufferSize); inputBuffer.skip(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE); // deserialize itemOut.deserialize(inputBuffer); // now compare it against desired hash value ... int comparisonResult = itemOut.compareFingerprints(fingerprint); if (comparisonResult > 0) high = mid - 1; else if (comparisonResult < 0) low = mid + 1; else { // cache offset itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE; // LOG.info("Found Match. Took:"+ iterationNumber + " iterations"); // and return item return itemOut; } } //LOG.error("Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations"); } } if (!loadedFromMemory) { //load from disk //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + fingerprint.getUrlHash()); RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw"); // allocate buffer upfront byte[] onDiskItemBuffer = new byte[OnDiskCrawlHistoryItem.ON_DISK_SIZE]; DataInputBuffer inputStream = new DataInputBuffer(); //LOG.info("Opened Data File. Searching for match"); try { int low = 0; int high = (int) (file.length() / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1; OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem(); int iterationNumber = 0; while (low <= high) { ++iterationNumber; int mid = low + ((high - low) / 2); // seek to proper location file.seek(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE); // read the data structure file.readFully(onDiskItemBuffer, 0, onDiskItemBuffer.length); // map location in file //MappedByteBuffer memoryBuffer = file.getChannel().map(MapMode.READ_ONLY,mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE,OnDiskCrawlHistoryItem.ON_DISK_SIZE); //DataInputStream inputStream = new DataInputStream(new ByteBufferInputStream(memoryBuffer)); inputStream.reset(onDiskItemBuffer, 0, OnDiskCrawlHistoryItem.ON_DISK_SIZE); // deserialize itemOut.deserialize(inputStream); // memoryBuffer = null; //inputStream = null; // now compare it against desired hash value ... int comparisonResult = itemOut.compareFingerprints(fingerprint); if (comparisonResult > 0) high = mid - 1; else if (comparisonResult < 0) low = mid + 1; else { // cache offset itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE; // LOG.info("Found Match. Took:"+ iterationNumber + " iterations"); // and return item return itemOut; } } //LOG.error("******Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations"); //DEBUG ONLY ! // dumpFixedDataFile(); } finally { file.close(); } } return null; }
From source file:com.portfolio.rest.RestServicePortfolio.java
@Path("/portfolios/portfolio/{portfolio-id}") @GET//w ww . ja v a 2 s . c o m @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML, "application/zip", MediaType.APPLICATION_OCTET_STREAM }) public Object getPortfolio(@CookieParam("user") String user, @CookieParam("credential") String token, @QueryParam("group") int groupId, @PathParam("portfolio-id") String portfolioUuid, @Context ServletConfig sc, @Context HttpServletRequest httpServletRequest, @HeaderParam("Accept") String accept, @QueryParam("user") Integer userId, @QueryParam("group") Integer group, @QueryParam("resources") String resource, @QueryParam("files") String files, @QueryParam("export") String export, @QueryParam("lang") String lang) { UserInfo ui = checkCredential(httpServletRequest, user, token, null); Response response = null; try { String portfolio = dataProvider.getPortfolio(new MimeType("text/xml"), portfolioUuid, ui.userId, 0, this.label, resource, "", ui.subId).toString(); if ("faux".equals(portfolio)) { response = Response.status(403).build(); } if (response == null) { Date time = new Date(); Document doc = DomUtils.xmlString2Document(portfolio, new StringBuffer()); NodeList codes = doc.getDocumentElement().getElementsByTagName("code"); // Le premier c'est celui du root Node codenode = codes.item(0); String code = ""; if (codenode != null) code = codenode.getTextContent(); if (export != null) { response = Response.ok(portfolio).header("content-disposition", "attachment; filename = \"" + code + "-" + time + ".xml\"").build(); } else if (resource != null && files != null) { //// Cas du renvoi d'un ZIP /// Temp file in temp directory File tempDir = new File(System.getProperty("java.io.tmpdir", null)); File tempZip = File.createTempFile(portfolioUuid, ".zip", tempDir); FileOutputStream fos = new FileOutputStream(tempZip); ZipOutputStream zos = new ZipOutputStream(fos); // BufferedOutputStream bos = new BufferedOutputStream(zos); /// zos.setComment("Some comment"); /// Write xml file to zip ZipEntry ze = new ZipEntry(portfolioUuid + ".xml"); zos.putNextEntry(ze); byte[] bytes = portfolio.getBytes("UTF-8"); zos.write(bytes); zos.closeEntry(); /// Find all fileid/filename XPath xPath = XPathFactory.newInstance().newXPath(); String filterRes = "//asmResource/fileid"; NodeList nodelist = (NodeList) xPath.compile(filterRes).evaluate(doc, XPathConstants.NODESET); /// Direct link to data // String urlTarget = "http://"+ server + "/user/" + user +"/file/" + uuid +"/"+ lang+ "/ptype/fs"; /* String langatt = ""; if( lang != null ) langatt = "?lang="+lang; else langatt = "?lang=fr"; //*/ /// Fetch all files for (int i = 0; i < nodelist.getLength(); ++i) { Node res = nodelist.item(i); Node p = res.getParentNode(); // resource -> container Node gp = p.getParentNode(); // container -> context Node uuidNode = gp.getAttributes().getNamedItem("id"); String uuid = uuidNode.getTextContent(); String filterName = "./filename[@lang and text()]"; NodeList textList = (NodeList) xPath.compile(filterName).evaluate(p, XPathConstants.NODESET); String filename = ""; if (textList.getLength() != 0) { Element fileNode = (Element) textList.item(0); filename = fileNode.getTextContent(); lang = fileNode.getAttribute("lang"); if ("".equals(lang)) lang = "fr"; } String servlet = httpServletRequest.getRequestURI(); servlet = servlet.substring(0, servlet.indexOf("/", 7)); String server = httpServletRequest.getServerName(); int port = httpServletRequest.getServerPort(); // "http://"+ server + /resources/resource/file/ uuid ? lang= size= // String urlTarget = "http://"+ server + "/user/" + user +"/file/" + uuid +"/"+ lang+ "/ptype/fs"; String url = "http://" + server + ":" + port + servlet + "/resources/resource/file/" + uuid + "?lang=" + lang; HttpGet get = new HttpGet(url); // Transfer sessionid so that local request still get security checked HttpSession session = httpServletRequest.getSession(true); get.addHeader("Cookie", "JSESSIONID=" + session.getId()); // Send request CloseableHttpClient client = HttpClients.createDefault(); CloseableHttpResponse ret = client.execute(get); HttpEntity entity = ret.getEntity(); // Put specific name for later recovery if ("".equals(filename)) continue; int lastDot = filename.lastIndexOf("."); if (lastDot < 0) lastDot = 0; String filenameext = filename.substring(0); /// find extension int extindex = filenameext.lastIndexOf("."); filenameext = uuid + "_" + lang + filenameext.substring(extindex); // Save it to zip file // int length = (int) entity.getContentLength(); InputStream content = entity.getContent(); // BufferedInputStream bis = new BufferedInputStream(entity.getContent()); ze = new ZipEntry(filenameext); try { int totalread = 0; zos.putNextEntry(ze); int inByte; byte[] buf = new byte[4096]; // zos.write(bytes,0,inByte); while ((inByte = content.read(buf)) != -1) { totalread += inByte; zos.write(buf, 0, inByte); } System.out.println("FILE: " + filenameext + " -> " + totalread); content.close(); // bis.close(); zos.closeEntry(); } catch (Exception e) { e.printStackTrace(); } EntityUtils.consume(entity); ret.close(); client.close(); } zos.close(); fos.close(); /// Return zip file RandomAccessFile f = new RandomAccessFile(tempZip.getAbsoluteFile(), "r"); byte[] b = new byte[(int) f.length()]; f.read(b); f.close(); response = Response.ok(b, MediaType.APPLICATION_OCTET_STREAM) .header("content-disposition", "attachment; filename = \"" + code + "-" + time + ".zip") .build(); // Temp file cleanup tempZip.delete(); } else { //try { this.userId = userId; } catch(Exception ex) { this.userId = -1; }; // String returnValue = dataProvider.getPortfolio(new MimeType("text/xml"),portfolioUuid,this.userId, this.groupId, this.label, resource, files).toString(); if (portfolio.equals("faux")) { throw new RestWebApplicationException(Status.FORBIDDEN, "Vous n'avez pas les droits necessaires"); } if (accept.equals(MediaType.APPLICATION_JSON)) { portfolio = XML.toJSONObject(portfolio).toString(); response = Response.ok(portfolio).type(MediaType.APPLICATION_JSON).build(); } else response = Response.ok(portfolio).type(MediaType.APPLICATION_XML).build(); logRestRequest(httpServletRequest, null, portfolio, Status.OK.getStatusCode()); } } } catch (RestWebApplicationException ex) { throw new RestWebApplicationException(Status.FORBIDDEN, ex.getResponse().getEntity().toString()); } catch (SQLException ex) { logRestRequest(httpServletRequest, null, "Portfolio " + portfolioUuid + " not found", Status.NOT_FOUND.getStatusCode()); throw new RestWebApplicationException(Status.NOT_FOUND, "Portfolio " + portfolioUuid + " not found"); } catch (Exception ex) { ex.printStackTrace(); logRestRequest(httpServletRequest, null, ex.getMessage() + "\n\n" + ex.getStackTrace(), Status.INTERNAL_SERVER_ERROR.getStatusCode()); throw new RestWebApplicationException(Status.INTERNAL_SERVER_ERROR, ex.getMessage()); } finally { if (dataProvider != null) dataProvider.disconnect(); } return response; }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {/* w ww . jav a 2 s . c om*/ inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); // read a crawl url from the stream... while (inputStream.getFilePointer() < header._fileSize) { if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { try { lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); try { // and write out appropriate sequence file entries ... writer.writeCrawlURLItem(new Text(url.getUrl()), url); } catch (IOException e) { LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:" + CCStringUtils.stringifyException(e)); throw new URLWriterException(); } } } catch (URLWriterException e) { LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!"); throw e; } catch (Exception e) { LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition + " Exception:" + CCStringUtils.stringifyException(e)); } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }