List of usage examples for java.lang Throwable getMessage
public String getMessage()
From source file:com.appeligo.responsetest.ServerResponseChecker.java
/** * @param args/*from www .j a v a 2 s . c o m*/ */ public static void main(String[] args) { PatternLayout pattern = new PatternLayout("%d{ISO8601} %-5p [%-c{1} - %t] - %m%n"); ConsoleAppender consoleAppender = new ConsoleAppender(pattern); LevelRangeFilter infoFilter = new LevelRangeFilter(); infoFilter.setLevelMin(Level.INFO); consoleAppender.addFilter(infoFilter); BasicConfigurator.configure(consoleAppender); String configFile = "/etc/flip.tv/responsetest.xml"; if (args.length > 0) { if (args.length == 2 && args[0].equals("-config")) { configFile = args[1]; } else { log.error("Usage: java " + ServerResponseChecker.class.getName() + " [-config <xmlfile>]"); System.exit(1); } } try { XMLConfiguration config = new XMLConfiguration(configFile); logFile = config.getString("logFile", logFile); servlet = config.getString("servlet", servlet); timeoutSeconds = config.getLong("timeoutSeconds", timeoutSeconds); responseTimeThresholdSeconds = config.getLong("responseTimeThresholdSeconds", responseTimeThresholdSeconds); reporter = config.getString("reporter", reporter); smtpServer = config.getString("smtpServer", smtpServer); smtpUsername = config.getString("smtpUsername", smtpUsername); smtpPassword = config.getString("smtpPassword", smtpPassword); smtpDebug = config.getBoolean("smtpDebug", smtpDebug); mailTo = config.getString("mailTo", mailTo); } catch (ConfigurationException e) { e.printStackTrace(); } marker = logFile + ".mailed"; try { BasicConfigurator.configure(new RollingFileAppender(pattern, logFile, true)); } catch (IOException e1) { e1.printStackTrace(); } // Add email appender SMTPAppender mailme = new SMTPAppender(); LevelRangeFilter warnFilter = new LevelRangeFilter(); warnFilter.setLevelMin(Level.WARN); mailme.addFilter(warnFilter); mailme.setSMTPDebug(smtpDebug); mailme.setSMTPHost(smtpServer); mailme.setTo(mailTo); mailme.setFrom(reporter + " <" + smtpUsername + ">"); mailme.setBufferSize(1); mailme.setSubject(servlet + " Not Responding!"); mailme.setSMTPUsername(smtpUsername); mailme.setSMTPPassword(smtpPassword); mailme.setLayout(new SimpleLayout()); mailme.activateOptions(); mailme.setLayout(pattern); BasicConfigurator.configure(mailme); long before; ConnectionThread connectionThread = new ConnectionThread(); connectionThread.start(); synchronized (connectionThread) { connectionThread.setOkToGo(true); connectionThread.notifyAll(); before = System.currentTimeMillis(); long delay = timeoutSeconds * 1000; while (!done && delay > 0) { try { connectionThread.wait(delay); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } delay -= (System.currentTimeMillis() - before); } } long after = System.currentTimeMillis(); responseMillis = after - before; String reportStatus = "Could not report"; try { StringBuilder sb = new StringBuilder(); sb.append(servlet + "/responsetest/report.action"); sb.append("?reporter=" + URLEncoder.encode(reporter)); sb.append("&status=" + URLEncoder.encode(status)); sb.append("&bytesRead=" + bytesRead); sb.append("&timedOut=" + (!done)); if (throwable == null) { sb.append("&exception=none"); } else { sb.append("&exception=" + URLEncoder.encode(throwable.getClass().getName() + "-" + throwable.getMessage())); } sb.append("&responseMillis=" + responseMillis); URL reportURL = new URL(sb.toString()); connection = (HttpURLConnection) reportURL.openConnection(); connection.connect(); reportStatus = connection.getResponseCode() + " - " + connection.getResponseMessage(); } catch (Throwable t) { reportStatus = t.getClass().getName() + "-" + t.getMessage(); } StringBuilder sb = new StringBuilder(); sb.append(servlet + ": "); sb.append(status + ", " + bytesRead + " bytes, "); if (done) { sb.append("DONE, "); } else { sb.append("TIMED OUT, "); } sb.append(responseMillis + " millisecond response, "); sb.append(" report status=" + reportStatus); File markerFile = new File(marker); if (done && status.startsWith("200") && (throwable == null)) { if ((responseMillis / 1000) < responseTimeThresholdSeconds) { if (markerFile.exists()) { markerFile.delete(); } log.debug(sb.toString()); } else { if (markerFile.exists()) { log.info(sb.toString()); } else { try { new FileOutputStream(marker).close(); log.warn(sb.toString()); } catch (IOException e) { log.info(sb.toString()); log.info("Can't send email alert because could not write marker file: " + marker + ". " + e.getMessage()); } } } } else { if (throwable != null) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); throwable.printStackTrace(pw); sb.append(sw.toString()); } if (markerFile.exists()) { log.info(sb.toString()); } else { try { new FileOutputStream(marker).close(); log.fatal(sb.toString()); // chosen appender layout ignoresThrowable() } catch (IOException e) { log.info(sb.toString()); log.info("Can't send email alert because could not write marker file: " + marker + ". " + e.getMessage()); } } } }
From source file:com.aol.advertising.qiao.bootstrap.Bootstrap.java
/** * @param args// w ww.jav a2 s . c om */ public static void main(String[] args) { registerShutdownHook(); appCtx = new ClassPathXmlApplicationContext(springContext); try { bootstrap = ContextUtils.getBean(Bootstrap.class); bootstrap.init(); bootstrap.start(); } catch (Throwable e) { System.err.println("Failed to start:" + e.getMessage()); e.printStackTrace(); } }
From source file:mitm.application.djigzo.tools.CertManager.java
public static void main(String[] args) throws Exception { CertManager monitor = new CertManager(); try {//www . j a va2s . c o m monitor.handleCommandline(args); } catch (CLIRuntimeException e) { System.err.println(e.getMessage()); System.exit(2); } catch (MissingArgumentException e) { System.err.println("Not all required parameters are specified. " + e); System.exit(3); } catch (ParseException e) { System.err.println("Command line parsing error. " + e); System.exit(4); } catch (WebServiceException e) { Throwable cause = ExceptionUtils.getRootCause(e); if (cause instanceof ConnectException) { System.err.println("Unable to connect to backend. Cause: " + cause.getMessage()); } else { e.printStackTrace(); } System.exit(5); } catch (WSProxyFactoryException e) { e.printStackTrace(); System.exit(6); } catch (WebServiceCheckedException e) { e.printStackTrace(); System.exit(7); } catch (Exception e) { e.printStackTrace(); System.exit(8); } }
From source file:esg.common.shell.ESGFShell.java
public static void main(String[] args) throws IOException { if ((args.length > 0) && (args[0].equals("--help"))) { usage();/* ww w . j ava2 s . co m*/ return; } String hostname = "<?>"; try { hostname = java.net.InetAddress.getLocalHost().getHostName().split("\\.", 2)[0]; } catch (java.net.UnknownHostException e) { log.error(e); } ConsoleReader reader = new ConsoleReader(); reader.setBellEnabled(false); //String debugFile = System.getProperty("java.io.tmpdir")+File.separator+"writer.debug"; //log.trace("("+debugFile+")"); //reader.setDebug(new PrintWriter(new FileWriter(debugFile, true))); PrintWriter writer = new PrintWriter(System.out); ESGFProperties esgfProperties = null; try { esgfProperties = new ESGFProperties(); } catch (Throwable t) { System.out.println(t.getMessage()); } ESGFEnv env = new ESGFEnv(reader, writer, esgfProperties); ESGFShell shell = new ESGFShell(env); String mode = null; String line = null; while ((line = reader.readLine(yellow(shell.getUserName(env) + "@" + hostname) + ":[" + red("esgf-sh") + "]" + (((mode = shell.getMode(env)) == null) ? "" : ":[" + green(mode) + "]") + white_b("> "))) != null) { try { shell.eval(line.trim().split(SEMI_RE), env); } catch (Throwable t) { System.out.println(t.getMessage()); //t.printStackTrace(); env.getWriter().flush(); } } }
From source file:dk.dma.ais.utils.filter.AisFilter.java
public static void main(String[] args) throws Exception { Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override/*from ww w . jav a 2 s . co m*/ public void uncaughtException(Thread t, Throwable e) { System.err.println( "Uncaught exception in thread " + t.getClass().getCanonicalName() + ": " + e.getMessage()); System.exit(-1); } }); final AisFilter aisFilter = new AisFilter(); Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { aisFilter.shutdown(); } }); aisFilter.execute(args); }
From source file:eu.planets_project.tb.gui.backing.admin.wsclient.util.WSClient.java
/** * Invokes an operation using SAAJ// w w w. j av a 2 s.c o m * * @param operation The operation to invoke */ public static void main(String[] args) { try { /*OperationInfo operation = new OperationInfo(); operation.setEncodingStyle("http://schemas.xmlsoap.org/soap/encoding/"); operation.setInputMessageName("HelloWorld_sayHello"); operation.setInputMessageText("<urn:sayHello xmlns:urn='urn:jbosstest'><arg0>Markus</arg0></urn:sayHello>"); operation.setNamespaceURI("urn:jbosstest"); operation.setOutputMessageName("HelloWorld_sayHelloResponse"); operation.setOutputMessageText("<sayHello><return>0</return></sayHello>"); operation.setSoapActionURI(""); operation.setStyle("document"); operation.setTargetMethodName("sayHello"); operation.setTargetObjectURI(null); operation.setTargetURL("http://localhost:8080/HelloWorld/HelloWorld");*/ OperationInfo operation = new OperationInfo(); operation.setEncodingStyle("http://schemas.xmlsoap.org/soap/encoding/"); operation.setInputMessageName("ConversionRateSoapIn"); operation.setInputMessageText( "<ns5:ConversionRate xmlns:ns5='http://www.webserviceX.NET/'><ns5:FromCurrency>EUR</ns5:FromCurrency><ns5:ToCurrency>SKK</ns5:ToCurrency></ns5:ConversionRate>"); operation.setNamespaceURI("http://www.webserviceX.NET/"); operation.setOutputMessageName("ConversionRateSoapOut"); operation.setOutputMessageText( "<ConversionRate><ConversionRateResult>0</ConversionRateResult></ConversionRate>"); operation.setSoapActionURI("http://www.webserviceX.NET/ConversionRate"); operation.setStyle("document"); operation.setTargetMethodName("ConversionRate"); operation.setTargetObjectURI(null); operation.setTargetURL("http://www.webservicex.net/CurrencyConvertor.asmx"); // Determine if the operation style is RPC boolean isRPC = operation.getStyle().equalsIgnoreCase("rpc"); // All connections are created by using a connection factory SOAPConnectionFactory conFactory = SOAPConnectionFactory.newInstance(); // Now we can create a SOAPConnection object using the connection factory SOAPConnection connection = conFactory.createConnection(); // All SOAP messages are created by using a message factory MessageFactory msgFactory = MessageFactory.newInstance(); // Now we can create the SOAP message object SOAPMessage msg = msgFactory.createMessage(); // Get the SOAP part from the SOAP message object SOAPPart soapPart = msg.getSOAPPart(); // The SOAP part object will automatically contain the SOAP envelope SOAPEnvelope envelope = soapPart.getEnvelope(); //envelope.addNamespaceDeclaration("", operation.getNamespaceURI()); if (isRPC) { // Add namespace declarations to the envelope, usually only required for RPC/encoded envelope.addNamespaceDeclaration(XSI_NAMESPACE_PREFIX, XSI_NAMESPACE_URI); envelope.addNamespaceDeclaration(XSD_NAMESPACE_PREFIX, XSD_NAMESPACE_URI); } // Get the SOAP header from the envelope SOAPHeader header = envelope.getHeader(); // The client does not yet support SOAP headers header.detachNode(); // Get the SOAP body from the envelope and populate it SOAPBody body = envelope.getBody(); // Create the default namespace for the SOAP body //body.addNamespaceDeclaration("", operation.getNamespaceURI()); // Add the service information String targetObjectURI = operation.getTargetObjectURI(); if (targetObjectURI == null) { // The target object URI should not be null targetObjectURI = ""; } // Add the service information //Name svcInfo = envelope.createName(operation.getTargetMethodName(), "", targetObjectURI); Name svcInfo = envelope.createName(operation.getTargetMethodName(), "ns2", operation.getNamespaceURI()); SOAPElement svcElem = body.addChildElement(svcInfo); if (isRPC) { // Set the encoding style of the service element svcElem.setEncodingStyle(operation.getEncodingStyle()); } // Add the message contents to the SOAP body Document doc = XMLSupport.readXML(operation.getInputMessageText()); if (doc.hasRootElement()) { // Begin building content buildSoapElement(envelope, svcElem, doc.getRootElement(), isRPC); } //svcElem.addTextNode(operation.getInputMessageText()); //svcElem. // Check for a SOAPAction String soapActionURI = operation.getSoapActionURI(); if (soapActionURI != null && soapActionURI.length() > 0) { // Add the SOAPAction value as a MIME header MimeHeaders mimeHeaders = msg.getMimeHeaders(); mimeHeaders.setHeader("SOAPAction", "\"" + operation.getSoapActionURI() + "\""); } // Save changes to the message we just populated msg.saveChanges(); // Get ready for the invocation URLEndpoint endpoint = new URLEndpoint(operation.getTargetURL()); // Show the URL endpoint message in the log ByteArrayOutputStream msgStream = new ByteArrayOutputStream(); msg.writeTo(msgStream); log.debug("SOAP Message MeasurementTarget URL: " + endpoint.getURL()); log.debug("SOAP Request: " + msgStream.toString()); // Make the call SOAPMessage response = connection.call(msg, endpoint); // Close the connection, we are done with it connection.close(); // Get the content of the SOAP response Source responseContent = response.getSOAPPart().getContent(); // Convert the SOAP response into a JDOM TransformerFactory tFact = TransformerFactory.newInstance(); Transformer transformer = tFact.newTransformer(); JDOMResult jdomResult = new JDOMResult(); transformer.transform(responseContent, jdomResult); // Get the document created by the transform operation Document responseDoc = jdomResult.getDocument(); // Send the response to the Log String strResponse = XMLSupport.outputString(responseDoc); log.debug("SOAP Response from: " + operation.getTargetMethodName() + ": " + strResponse); // Set the response as the output message operation.setOutputMessageText(strResponse); // Return the response generated //return strResponse; } catch (Throwable ex) { log.error("Error invoking operation:"); log.error(ex.getMessage()); } //return ""; }
From source file:esg.node.util.migrate.UserMigrationTool.java
public static void main(String[] args) { try {//from w ww. ja va 2 s. c o m //Enter the connection URI information //setup source connection Properties props = new Properties(); if (args.length >= 4) { for (int i = 0; i < (args.length - 1); i++) { System.out.println(); if ("-U".equals(args[i])) { i++; System.out.print("user = "); if (args[i].startsWith("-")) { --i; continue; } props.setProperty("db.user", args[i]); System.out.print(args[i]); continue; } if ("-h".equals(args[i])) { i++; System.out.print("host = "); if (args[i].startsWith("-")) { --i; continue; } props.setProperty("db.host", args[i]); System.out.print(args[i]); continue; } if ("-p".equals(args[i])) { i++; System.out.print("port = "); if (args[i].startsWith("-")) { --i; continue; } props.setProperty("db.port", args[i]); System.out.print(args[i]); continue; } if ("-d".equals(args[i])) { i++; System.out.print("database = "); if (args[i].startsWith("-")) { --i; continue; } props.setProperty("db.database", args[i]); System.out.print(args[i]); continue; } } System.out.println(); } else { System.out.println("\nUsage:"); System.out.println( " java -jar esgf-security-user-migration-x.x.x.jar -U <username> -h <host> -p <port> -d <database>"); System.out.println(" (hit return and then enter your password)\n"); System.exit(1); } char password[] = null; try { password = PasswordField.getPassword(System.in, "Enter source database password: "); } catch (IOException ioe) { System.err.println("Ooops sumthin' ain't right with the input... :-("); System.exit(1); ioe.printStackTrace(); } if (password == null) { System.out.println("No password entered"); System.exit(1); } props.setProperty("db.password", String.valueOf(password)); System.out.println(); (new UserMigrationTool()).init(props).migrate(); } catch (Throwable t) { System.out.println(t.getMessage()); System.out.println( "\n Sorry, please check your database connection information again, was not able to migrate users :-(\n"); System.exit(1); } System.out.println("\ndone :-)\n"); System.out.println(" Thank you for migrating to the ESGF P2P Node"); System.out.println(" http://esgf.org\n"); }
From source file:mitm.application.djigzo.tools.Monitor.java
public static void main(String[] args) throws ParseException, IOException { Monitor monitor = new Monitor(); try {/*ww w . ja v a 2 s . co m*/ monitor.handleCommandline(args); } catch (CLIRuntimeException e) { System.err.println(e.getMessage()); System.exit(2); } catch (MissingArgumentException e) { System.err.println("Not all required parameters are specified. " + e); System.exit(3); } catch (ParseException e) { System.err.println("Command line parsing error. " + e); System.exit(4); } catch (WebServiceException e) { Throwable cause = ExceptionUtils.getRootCause(e); if (cause instanceof ConnectException) { System.err.println("Unable to connect to backend. Cause: " + cause.getMessage()); } else { e.printStackTrace(); } System.exit(5); } catch (WSProxyFactoryException e) { e.printStackTrace(); System.exit(6); } catch (WebServiceCheckedException e) { e.printStackTrace(); System.exit(7); } }
From source file:com.finderbots.miner2.pinterest.PinterestCrawlAndMinerTool.java
public static void main(String[] args) { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); try {//from w w w. j a va 2 s . co m parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); printUsageAndExit(parser); } // Before we get too far along, see if the domain looks valid. String domain = options.getDomain(); String urlsFile = options.getUrlsFile(); if (domain != null) { validateDomain(domain, parser); } else { if (urlsFile == null) { System.err.println( "Either a target domain should be specified or a file with a list of urls needs to be provided"); printUsageAndExit(parser); } } if (domain != null && urlsFile != null) { System.out.println("Warning: Both domain and urls file list provided - using domain"); } String outputDirName = options.getOutputDir(); if (options.isDebugLogging()) { System.setProperty("bixo.root.level", "DEBUG"); } else { System.setProperty("bixo.root.level", "INFO"); } if (options.getLoggingAppender() != null) { // Set console vs. DRFA vs. something else System.setProperty("bixo.appender", options.getLoggingAppender()); } String logsDir = options.getLogsDir(); if (!logsDir.endsWith("/")) { logsDir = logsDir + "/"; } try { JobConf conf = new JobConf(); Path outputPath = new Path(outputDirName); FileSystem fs = outputPath.getFileSystem(conf); // First check if the user wants to clean if (options.isCleanOutputDir()) { if (fs.exists(outputPath)) { fs.delete(outputPath, true); } } // See if the user isn't starting from scratch then set up the // output directory and create an initial urls subdir. if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir // In the /crawldb dir the input file will have a single URL for the target domain. Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0); String curLoopDirName = curLoopDir.getName(); setLoopLoggerFile(logsDir + curLoopDirName, 0); Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME); if (domain != null) { importOneDomain(domain, crawlDbPath, conf); } else { importUrls(urlsFile, crawlDbPath); } } Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath); if (latestDirPath == null) { System.err.println("No previous cycle output dirs exist in " + outputDirName); printUsageAndExit(parser); } Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); // Set up the start and end loop counts. int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath); int endLoop = startLoop + options.getNumLoops(); // Set up the UserAgent for the fetcher. UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS, CrawlConfig.WEB_ADDRESS); // You also get to customize the FetcherPolicy FetcherPolicy defaultPolicy; if (options.getCrawlDuration() != 0) { defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay()); } else { defaultPolicy = new FetcherPolicy(); } defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE); defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds // COMPLETE for crawling a single site, EFFICIENT for many sites if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE); } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT); } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE); } // It is a good idea to set up a crawl duration when running long crawls as you may // end up in situations where the fetch slows down due to a 'long tail' and by // specifying a crawl duration you know exactly when the crawl will end. int crawlDurationInMinutes = options.getCrawlDuration(); boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION; long targetEndTime = hasEndTime ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE) : FetcherPolicy.NO_CRAWL_END_TIME; // By setting up a url filter we only deal with urls that we want to // instead of all the urls that we extract. BaseUrlFilter urlFilter = null; List<String> patterns = null; String regexUrlFiltersFile = options.getRegexUrlFiltersFile(); if (regexUrlFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile); } else { patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns(); if (domain != null) { String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain; patterns.add(domainPatterStr); } else { String protocolPatterStr = "+(?i)^(http|https)://*"; patterns.add(protocolPatterStr); //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol"); } } urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()])); // get a list of patterns which tell the miner which URLs to include or exclude. patterns.clear(); RegexUrlStringFilter urlsToMineFilter = null; String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile(); AnalyzeHtml analyzer = null; if (regexUrlsToMineFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile); urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()])); analyzer = new AnalyzeHtml(urlsToMineFilter); } // OK, now we're ready to start looping, since we've got our current // settings for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) { // Adjust target end time, if appropriate. if (hasEndTime) { int remainingLoops = (endLoop - curLoop) + 1; long now = System.currentTimeMillis(); long perLoopTime = (targetEndTime - now) / remainingLoops; defaultPolicy.setCrawlEndTime(now + perLoopTime); } Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop); String curLoopDirName = curLoopDirPath.getName(); setLoopLoggerFile(logsDir + curLoopDirName, curLoop); Flow flow = PinterestCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, analyzer, options); flow.complete(); // Writing out .dot files is a good way to verify your flows. flow.writeDOT("valid-flow.dot"); // Update crawlDbPath to point to the latest crawl db crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); } } catch (PlannerException e) { e.writeDOT("failed-flow.dot"); System.err.println("PlannerException: " + e.getMessage()); e.printStackTrace(System.err); System.exit(-1); } catch (Throwable t) { System.err.println("Exception running tool: " + t.getMessage()); t.printStackTrace(System.err); System.exit(-1); } }
From source file:com.finderbots.miner2.tomatoes.RTCriticsCrawlAndMinerTool.java
public static void main(String[] args) { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); try {/*from w ww.j a v a 2 s . co m*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); printUsageAndExit(parser); } // Before we get too far along, see if the domain looks valid. String domain = options.getDomain(); String urlsFile = options.getUrlsFile(); if (domain != null) { validateDomain(domain, parser); } else { if (urlsFile == null) { System.err.println( "Either a target domain should be specified or a file with a list of urls needs to be provided"); printUsageAndExit(parser); } } if (domain != null && urlsFile != null) { System.out.println("Warning: Both domain and urls file list provided - using domain"); } String outputDirName = options.getOutputDir(); if (options.isDebugLogging()) { System.setProperty("bixo.root.level", "DEBUG"); } else { System.setProperty("bixo.root.level", "INFO"); } if (options.getLoggingAppender() != null) { // Set console vs. DRFA vs. something else System.setProperty("bixo.appender", options.getLoggingAppender()); } String logsDir = options.getLogsDir(); if (!logsDir.endsWith("/")) { logsDir = logsDir + "/"; } try { JobConf conf = new JobConf(); Path outputPath = new Path(outputDirName); FileSystem fs = outputPath.getFileSystem(conf); // First check if the user wants to clean if (options.isCleanOutputDir()) { if (fs.exists(outputPath)) { fs.delete(outputPath, true); } } // See if the user isn't starting from scratch then set up the // output directory and create an initial urls subdir. if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir // In the /crawldb dir the input file will have a single URL for the target domain. Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0); String curLoopDirName = curLoopDir.getName(); setLoopLoggerFile(logsDir + curLoopDirName, 0); Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME); if (domain != null) { importOneDomain(domain, crawlDbPath, conf); } else { importUrls(urlsFile, crawlDbPath); } } Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath); if (latestDirPath == null) { System.err.println("No previous cycle output dirs exist in " + outputDirName); printUsageAndExit(parser); } Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); // Set up the start and end loop counts. int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath); int endLoop = startLoop + options.getNumLoops(); // Set up the UserAgent for the fetcher. UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS, CrawlConfig.WEB_ADDRESS); // You also get to customize the FetcherPolicy FetcherPolicy defaultPolicy; if (options.getCrawlDuration() != 0) { defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay()); } else { defaultPolicy = new FetcherPolicy(); } defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE); defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds // COMPLETE for crawling a single site, EFFICIENT for many sites if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE); } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT); } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE); } // It is a good idea to set up a crawl duration when running long crawls as you may // end up in situations where the fetch slows down due to a 'long tail' and by // specifying a crawl duration you know exactly when the crawl will end. int crawlDurationInMinutes = options.getCrawlDuration(); boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION; long targetEndTime = hasEndTime ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE) : FetcherPolicy.NO_CRAWL_END_TIME; // By setting up a url filter we only deal with urls that we want to // instead of all the urls that we extract. BaseUrlFilter urlFilter = null; List<String> patterns = null; String regexUrlFiltersFile = options.getRegexUrlFiltersFile(); if (regexUrlFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile); } else { patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns(); if (domain != null) { String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain; patterns.add(domainPatterStr); } else { String protocolPatterStr = "+(?i)^(http|https)://*"; patterns.add(protocolPatterStr); //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol"); } } urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()])); // get a list of patterns which tell the miner which URLs to include or exclude. patterns.clear(); RegexUrlStringFilter urlsToMineFilter = null; String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile(); MineRTCriticsPreferences prefsAnalyzer = null; if (regexUrlsToMineFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile); urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()])); prefsAnalyzer = new MineRTCriticsPreferences(urlsToMineFilter); } // OK, now we're ready to start looping, since we've got our current // settings for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) { // Adjust target end time, if appropriate. if (hasEndTime) { int remainingLoops = (endLoop - curLoop) + 1; long now = System.currentTimeMillis(); long perLoopTime = (targetEndTime - now) / remainingLoops; defaultPolicy.setCrawlEndTime(now + perLoopTime); } Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop); String curLoopDirName = curLoopDirPath.getName(); setLoopLoggerFile(logsDir + curLoopDirName, curLoop); Flow flow = RTCriticsCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, prefsAnalyzer, options); flow.complete(); // Writing out .dot files is a good way to verify your flows. flow.writeDOT("valid-flow.dot"); // Update crawlDbPath to point to the latest crawl db crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); } } catch (PlannerException e) { e.writeDOT("failed-flow.dot"); System.err.println("PlannerException: " + e.getMessage()); e.printStackTrace(System.err); System.exit(-1); } catch (Throwable t) { System.err.println("Exception running tool: " + t.getMessage()); t.printStackTrace(System.err); System.exit(-1); } }