List of usage examples for javax.management AttributeNotFoundException getLocalizedMessage
public String getLocalizedMessage()
From source file:com.cyberway.issue.crawler.prefetch.PreconditionEnforcer.java
/** Get the maximum time a robots.txt is valid. * * @param curi//from ww w . jav a 2 s. c o m * @return the time a robots.txt is valid in milliseconds. */ public long getRobotsValidityDuration(CrawlURI curi) { Integer d; try { d = (Integer) getAttribute(ATTR_ROBOTS_VALIDITY_DURATION, curi); } catch (AttributeNotFoundException e) { // This should never happen, but if it does, return default logger.severe(e.getLocalizedMessage()); d = DEFAULT_ROBOTS_VALIDITY_DURATION; } // convert from seconds to milliseconds return d.longValue() * 1000; }
From source file:com.aiwoapp.crawler.writer.DBWriterProcessor.java
@Override protected void innerProcess(CrawlURI curi) { UURI uuri = curi.getUURI(); // Current URI. // Only http and https schemes are supported. String scheme = uuri.getScheme(); if (!"http".equalsIgnoreCase(scheme) && !"https".equalsIgnoreCase(scheme)) { return;/* ww w . j a v a 2 s.c om*/ } RecordingInputStream recis = curi.getRecorder().getRecordedInput(); if (0L == recis.getResponseContentLength()) { return; } String baseDir = getPath().getFile().getAbsolutePath(); // Already have a path for this URI. boolean reCrawl = curi.getData().containsKey(A_MIRROR_PATH); /* The file system path, relative to the value of ATTR_PATH, where this resource should be written. The intent is to add later a persistent mapping from URI to path. This will allow a URI to be re-crawled and updated if it has changed. If the resource has already been fetched and written to a file before, the path to that file has already been obtained from the persistent mapping and placed on the AList by some other module, such as the frontier. */ String mps = null; File destFile = null; // Write resource contents to this file. try { if (reCrawl) { mps = (String) curi.getData().get(A_MIRROR_PATH); destFile = new File(baseDir + File.separator + mps); File parent = destFile.getParentFile(); if (null != parent) { FileUtils.ensureWriteableDirectory(parent); } } else { URIToFileReturn r = null; // Return from uriToFile(). try { r = uriToFile(baseDir, curi); } catch (AttributeNotFoundException e) { logger.warning(e.getLocalizedMessage()); return; } destFile = r.getFile(); mps = r.getRelativePath(); } logger.info(uuri.toString() + " -> " + destFile.getPath()); writeToPath(recis, destFile); if (!reCrawl) { curi.getData().put(A_MIRROR_PATH, mps); } } catch (IOException e) { curi.getNonFatalFailures().add(e); } }
From source file:com.cyberway.issue.crawler.writer.MirrorWriterProcessor.java
protected void innerProcess(CrawlURI curi) { if (!curi.isSuccess()) { return;//from ww w .ja va 2 s . c o m } UURI uuri = curi.getUURI(); // Current URI. System.out.println("URL-------------" + uuri); // Only http and https schemes are supported. String scheme = uuri.getScheme(); if (!"http".equalsIgnoreCase(scheme) && !"https".equalsIgnoreCase(scheme)) { return; } RecordingInputStream recis = curi.getHttpRecorder().getRecordedInput(); if (0L == recis.getResponseContentLength()) { return; } String baseDir = null; // Base directory. String baseSeg = null; // ATTR_PATH value. try { baseSeg = (String) getAttribute(ATTR_PATH, curi); } catch (AttributeNotFoundException e) { logger.warning(e.getLocalizedMessage()); return; } // Trim any trailing File.separatorChar characters from baseSeg. while ((baseSeg.length() > 1) && baseSeg.endsWith(File.separator)) { baseSeg = baseSeg.substring(0, baseSeg.length() - 1); } if (0 == baseSeg.length()) { baseDir = getController().getDisk().getPath(); } else if ((new File(baseSeg)).isAbsolute()) { baseDir = baseSeg; } else { baseDir = getController().getDisk().getPath() + File.separator + baseSeg; } // Already have a path for this URI. boolean reCrawl = curi.containsKey(A_MIRROR_PATH); String mps = null; File destFile = null; // Write resource contents to this file. try { if (reCrawl) { mps = curi.getString(A_MIRROR_PATH); destFile = new File(baseDir + File.separator + mps); File parent = destFile.getParentFile(); if (null != parent) { IoUtils.ensureWriteableDirectory(parent); } } else { URIToFileReturn r = null; // Return from uriToFile(). try { r = uriToFile(baseDir, curi); } catch (AttributeNotFoundException e) { logger.warning(e.getLocalizedMessage()); return; } destFile = r.getFile(); mps = r.getRelativePath(); } String mimeType = curi.getContentType(); String filePath = ""; boolean isRepleaceURL = false; if (mimeType != null && (mimeType.toLowerCase().indexOf("css") != -1) && (curi.toString().toLowerCase().endsWith(".css"))) { filePath = "css"; } else if (curi.toString().toLowerCase().indexOf("/images") != -1 && ((curi.toString().toLowerCase().endsWith(".jpg")) || (curi.toString().toLowerCase().endsWith(".gif")) || (curi.toString().toLowerCase().endsWith(".png")) || (curi.toString().toLowerCase().endsWith(".swf")))) { filePath = "images"; } else if (curi.toString().indexOf("docInfo!list.action") > 0 && curi.toString().indexOf("&docid=") == -1) { filePath = "summary_template"; isRepleaceURL = true; } else if (curi.toString().indexOf("docInfo!view.action") > 0 || curi.toString().indexOf("&docid=") > 0) { filePath = "details_template"; isRepleaceURL = true; } else if (curi.toString().indexOf("index.action") > 0) { filePath = "\\"; mps = mps.substring(0, mps.lastIndexOf('\\') + 1) + "index.html"; isRepleaceURL = true; } System.out.println(curi.toString() + "-----------"); if (StringUtils.isNotBlank(filePath)) { if (curi.toString().indexOf("&url=") > 0) { mps = mps.substring(0, mps.lastIndexOf('/')) + mps.substring(mps.lastIndexOf('/') + 1, mps.length()); } mps = mps.replace(".action", ".html"); File file = new File(baseDir + File.separator + mps.substring(0, mps.indexOf("\\")) + File.separator + filePath + File.separator + mps.substring(mps.lastIndexOf("\\", mps.length()))); File parent = file.getParentFile(); if (null != parent) { IoUtils.ensureWriteableDirectory(parent); } /* * if(!file.getParentFile().isDirectory()) * file.getParentFile().mkdir(); */ destFile = file; try { if (isRepleaceURL) { Replace[] replace = new Replace[1]; replace[0] = new ReplaceURL(); String content = replace[0].Replace(recis, uuri); createFile(destFile.getPath(), content); } else { writeToPath(recis, destFile); } } catch (Exception e) { } } else { writeToPath(recis, destFile); } logger.info(uuri.toString() + " -> " + destFile.getPath()); /* * ReplayInputStream inputStream = * recis.getContentReplayInputStream(); byte[] bytes = new * byte[(int)inputStream.getSize()]; inputStream.read(bytes); * System.out.println(new String(bytes,"GB2312")); */ if (!reCrawl) { curi.putString(A_MIRROR_PATH, mps); } } catch (IOException e) { curi.addLocalizedError(this.getName(), e, "Mirror"); } }
From source file:org.archive.modules.writer.TcWriterProcessor.java
@Override protected void innerProcess(CrawlURI curi) { UURI uuri = curi.getUURI(); // Current URI. // Only http and https schemes are supported. String scheme = uuri.getScheme(); if (!"http".equalsIgnoreCase(scheme) && !"https".equalsIgnoreCase(scheme)) { return;//from w ww. j av a 2 s.c om } Recorder recorder = curi.getRecorder(); // recorder.setContentEncoding("utf-8"); RecordingInputStream recis = recorder.getRecordedInput(); if (0L == recis.getResponseContentLength()) { return; } String baseDir = getPath().getFile().getAbsolutePath(); // Already have a path for this URI. boolean reCrawl = curi.getData().containsKey(A_MIRROR_PATH); /* The file system path, relative to the value of ATTR_PATH, where this resource should be written. The intent is to add later a persistent mapping from URI to path. This will allow a URI to be re-crawled and updated if it has changed. If the resource has already been fetched and written to a file before, the path to that file has already been obtained from the persistent mapping and placed on the AList by some other module, such as the frontier. */ String mps = null; File destFile = null; // Write resource contents to this file. try { if (reCrawl) { mps = (String) curi.getData().get(A_MIRROR_PATH); destFile = new File(baseDir + File.separator + mps); File parent = destFile.getParentFile(); if (null != parent) { FileUtils.ensureWriteableDirectory(parent); } } else { URIToFileReturn r = null; // Return from uriToFile(). try { r = uriToFile(baseDir, curi); } catch (AttributeNotFoundException e) { logger.warning(e.getLocalizedMessage()); return; } destFile = r.getFile(); mps = r.getRelativePath(); } /*************************************************************/ System.out.println("url = " + curi.getUURI()); // System.out.println("outlink size = " + curi.getOutLinks().size() + "\toutlink = " + curi.getOutLinks()); /*************************************************************/ /** * */ writeToPath(recis, destFile); if (!reCrawl) { curi.getData().put(A_MIRROR_PATH, mps); } } catch (IOException e) { curi.getNonFatalFailures().add(e); } }