List of usage examples for java.util.regex Pattern compile
public static Pattern compile(String regex)
From source file:LogExample.java
public static void main(String argv[]) { String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\d+) \"([^\"]+)\" \"([^\"]+)\""; System.out.println("Using RE Pattern:"); System.out.println(logEntryPattern); System.out.println("Input line is:"); System.out.println(logEntryLine); Pattern p = Pattern.compile(logEntryPattern); Matcher matcher = p.matcher(logEntryLine); if (!matcher.matches() || NUM_FIELDS != matcher.groupCount()) { System.err.println("Bad log entry (or problem with RE?):"); System.err.println(logEntryLine); return;/* w w w.j a va2 s . c o m*/ } System.out.println("IP Address: " + matcher.group(1)); System.out.println("Date&Time: " + matcher.group(4)); System.out.println("Request: " + matcher.group(5)); System.out.println("Response: " + matcher.group(6)); System.out.println("Bytes Sent: " + matcher.group(7)); if (!matcher.group(8).equals("-")) System.out.println("Referer: " + matcher.group(8)); System.out.println("Browser: " + matcher.group(9)); }
From source file:com.github.aliteralmind.codelet.examples.non_xbn.PrintJDBlocksStartStopLineNumsXmpl.java
/** <p>The main function.</p> *//*from ww w. ja va 2 s. com*/ public static final void main(String[] as_1RqdJavaSourcePath) { //Read command-line parameter String sJPath = null; try { sJPath = as_1RqdJavaSourcePath[0]; } catch (ArrayIndexOutOfBoundsException aibx) { throw new NullPointerException( "Missing one-and-only required parameter: Path to java source-code file."); } System.out.println("Java source: " + sJPath); //Establish line-iterator Iterator<String> lineItr = null; try { lineItr = FileUtils.lineIterator(new File(sJPath)); //Throws npx if null } catch (IOException iox) { throw new RTIOException("PrintJDBlocksStartStopLinesXmpl", iox); } Pattern pTrmdJDBlockStart = Pattern.compile("^[\\t ]*/\\*\\*"); String sDD = ".."; int lineNum = 1; boolean bInJDBlock = false; while (lineItr.hasNext()) { String sLn = lineItr.next(); if (!bInJDBlock) { if (pTrmdJDBlockStart.matcher(sLn).matches()) { bInJDBlock = true; System.out.print(lineNum + sDD); } } else if (sLn.indexOf("*/") != -1) { bInJDBlock = false; System.out.println(lineNum); } lineNum++; } if (bInJDBlock) { throw new IllegalStateException("Reach end of file. JavaDoc not closed."); } }
From source file:de.citec.csra.elancsv.parser.SimpleParser.java
public static void main(String[] args) throws IOException, ParseException { Options opts = new Options(); opts.addOption("file", true, "Tab-separated ELAN export file to load."); opts.addOption("tier", true, "Tier to analyze. Optional: Append ::num to interpret annotations numerically."); opts.addOption("format", true, "How to read information from the file name. %V -> participant, %A -> annoatator, %C -> condition, e.g. \"%V - %A\""); opts.addOption("help", false, "Print this help and exit"); CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(opts, args); if (cmd.hasOption("help")) { helpExit(opts, "where OPTION includes:"); }//from ww w. ja v a 2 s . c o m String infile = cmd.getOptionValue("file"); if (infile == null) { helpExit(opts, "Error: no file given."); } String format = cmd.getOptionValue("format"); if (format == null) { helpExit(opts, "Error: no format given."); } String tier = cmd.getOptionValue("tier"); if (tier == null) { helpExit(opts, "Error: no tier given."); } // TODO count values in annotations (e.g. search all robot occurrences) String[] tn = tier.split("::"); boolean numeric = false; if (tn.length == 2 && tn[1].equals("num")) { numeric = true; tier = tn[0]; } format = "^" + format + "$"; format = format.replaceFirst("%V", "(?<V>.*?)"); format = format.replaceFirst("%A", "(?<A>.*?)"); format = format.replaceFirst("%C", "(?<C>.*?)"); Pattern pa = Pattern.compile(format); Map<String, Participant> participants = new HashMap<>(); BufferedReader br = new BufferedReader(new FileReader(infile)); String line; int lineno = 0; while ((line = br.readLine()) != null) { String[] parts = line.split("\t"); lineno++; if (parts.length < 5) { System.err.println("WARNING: line '" + lineno + "' too short '" + line + "'"); continue; } Annotation a = new Annotation(Long.valueOf(parts[ElanFormat.START.field]), Long.valueOf(parts[ElanFormat.STOP.field]), Long.valueOf(parts[ElanFormat.DURATION.field]), parts[ElanFormat.VALUE.field]); String tname = parts[ElanFormat.TIER.field]; String file = parts[ElanFormat.FILE.field].replaceAll(".eaf", ""); Matcher m = pa.matcher(file); String vp = file; String condition = "?"; String annotator = "?"; String participantID = vp; if (m.find()) { vp = m.group("V"); if (format.indexOf("<A>") > 0) { annotator = m.group("A"); } if (format.indexOf("<C>") > 0) { condition = m.group("C"); } } participantID = vp + ";" + annotator; if (!participants.containsKey(participantID)) { participants.put(participantID, new Participant(vp, condition, annotator)); } Participant p = participants.get(participantID); if (!p.tiers.containsKey(tname)) { p.tiers.put(tname, new Tier(tname)); } p.tiers.get(tname).annotations.add(a); } Map<String, Map<String, Number>> values = new HashMap<>(); Set<String> rownames = new HashSet<>(); String allCountKey = "c: all values"; String allDurationKey = "d: all values"; String allMeanKey = "m: all values"; for (Map.Entry<String, Participant> e : participants.entrySet()) { // System.out.println(e); Tier t = e.getValue().tiers.get(tier); String participantID = e.getKey(); if (!values.containsKey(participantID)) { values.put(participantID, new HashMap<String, Number>()); } Map<String, Number> row = values.get(participantID); //participant id if (t != null) { row.put(allCountKey, 0l); row.put(allDurationKey, 0l); row.put(allMeanKey, 0l); for (Annotation a : t.annotations) { long countAll = (long) row.get(allCountKey) + 1; long durationAll = (long) row.get(allDurationKey) + a.duration; long meanAll = durationAll / countAll; row.put(allCountKey, countAll); row.put(allDurationKey, durationAll); row.put(allMeanKey, meanAll); if (!numeric) { String countKey = "c: " + a.value; String durationKey = "d: " + a.value; String meanKey = "m: " + a.value; if (!row.containsKey(countKey)) { row.put(countKey, 0l); } if (!row.containsKey(durationKey)) { row.put(durationKey, 0l); } if (!row.containsKey(meanKey)) { row.put(meanKey, 0d); } long count = (long) row.get(countKey) + 1; long duration = (long) row.get(durationKey) + a.duration; double mean = duration * 1.0 / count; row.put(countKey, count); row.put(durationKey, duration); row.put(meanKey, mean); rownames.add(countKey); rownames.add(durationKey); rownames.add(meanKey); } else { String countKey = "c: " + t.name; String sumKey = "s: " + t.name; String meanKey = "m: " + t.name; if (!row.containsKey(countKey)) { row.put(countKey, 0l); } if (!row.containsKey(sumKey)) { row.put(sumKey, 0d); } if (!row.containsKey(meanKey)) { row.put(meanKey, 0d); } double d = 0; try { d = Double.valueOf(a.value); } catch (NumberFormatException ex) { } long count = (long) row.get(countKey) + 1; double sum = (double) row.get(sumKey) + d; double mean = sum / count; row.put(countKey, count); row.put(sumKey, sum); row.put(meanKey, mean); rownames.add(countKey); rownames.add(sumKey); rownames.add(meanKey); } } } } ArrayList<String> list = new ArrayList(rownames); Collections.sort(list); StringBuilder header = new StringBuilder("ID;Annotator;"); header.append(allCountKey); header.append(";"); header.append(allDurationKey); header.append(";"); header.append(allMeanKey); header.append(";"); for (String l : list) { header.append(l); header.append(";"); } System.out.println(header); for (Map.Entry<String, Map<String, Number>> e : values.entrySet()) { StringBuilder row = new StringBuilder(e.getKey()); row.append(";"); if (e.getValue().containsKey(allCountKey)) { row.append(e.getValue().get(allCountKey)); } else { row.append("0"); } row.append(";"); if (e.getValue().containsKey(allDurationKey)) { row.append(e.getValue().get(allDurationKey)); } else { row.append("0"); } row.append(";"); if (e.getValue().containsKey(allMeanKey)) { row.append(e.getValue().get(allMeanKey)); } else { row.append("0"); } row.append(";"); for (String l : list) { if (e.getValue().containsKey(l)) { row.append(e.getValue().get(l)); } else { row.append("0"); } row.append(";"); } System.out.println(row); } }
From source file:org.roda.core.common.SeleniumUtils.java
/** * /* w w w .j av a 2s. c om*/ * @param args: * the first argument is the RODA base url and the second argument is * the driver path * @throws InterruptedException * @throws IOException */ public static void main(String[] args) throws InterruptedException, IOException { if (args.length != 2) { System.err.println("Number of arguments not correct since it is only needed two arguments. " + "The first argument is the RODA base url and the second argument is the driver path"); commandHelp(); System.exit(0); } url = args[0]; driverPath = args[1]; ChromeDriverService service = new ChromeDriverService.Builder().usingDriverExecutable(new File(driverPath)) .usingAnyFreePort().build(); service.start(); driver = new RemoteWebDriver(service.getUrl(), DesiredCapabilities.chrome()); driver.get(url); // welcome page saveHTML(); savePublicPages(); saveLoginPages(); saveHelpPages(); savePlanningPages(); saveAdminPages(); saveIngestPages(); saveSearchPages(); saveBrowsePages(); driver.quit(); service.stop(); for (Entry<String, String> entry : locations.entrySet()) { String location = entry.getKey(); String html = getHTMLSource(location); Pattern expression = Pattern.compile("<div id=\"webaxscore\".*?<span>(.*?)</span>"); Matcher matcher = expression.matcher(html); if (matcher.find()) { System.out.println(location + " | " + locations.get(location) + " | " + matcher.group(1)); } } }
From source file:com.xue777hua.emails.test.Test.java
public static void main(String[] args) { String text = "<div id=\"frag_1\" class=\"page_fragment auth_frag\" data-first=\"true\" data-fid=\"1\"><div class=\"module_topic_paths\"></div><h1 class=\"svTitle\" id=\"tm005\">Effect of inulin and pectin on rheological and thermal properties of potato starch paste and gel</h1><ul class=\"authorGroup noCollab\"><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee493d4\" data-t=\"a\" data-fn=\"Teresa\" data-ln=\"Witczak\" data-pos=\"1\" data-tb=\"\">Teresa Witczak</a><a title=\"Affiliation: a\" href=\"#af005\" class=\"intra_ref auth_aff\" id=\"baf005\"><sup>a</sup></a><sup>, </sup><a title=\"Corresponding author contact information\" href=\"#cor1\" id=\"bcor1\" class=\"intra_ref auth_corr\"><img class=\"imgLazyJSB\" alt=\"Corresponding author contact information\" src=\"/sd/grey_pxl.gif\" data-inlimg=\"/entities/REcor.gif\"><noscript><img alt=\"Corresponding author contact information\" src=\"http://origin-cdn.els-cdn.com/sd/entities/REcor.gif\"></noscript></a><sup>, </sup><a href=\"mailto:t.witczak@ur.krakow.pl\" class=\"auth_mail\"><img class=\"imgLazyJSB\" src=\"/sd/grey_pxl.gif\" alt=\"E-mail the corresponding author\" data-inlimg=\"/entities/REemail.gif\"><noscript><img src=\"http://origin-cdn.els-cdn.com/sd/entities/REemail.gif\" alt=\"E-mail the corresponding author\"></noscript></a>, </li><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee4953c\" data-t=\"a\" data-fn=\"Mariusz\" data-ln=\"Witczak\" data-pos=\"2\" data-tb=\"\">Mariusz Witczak</a><a title=\"Affiliation: a\" href=\"#af005\" class=\"intra_ref auth_aff\" id=\"baf005\"><sup>a</sup></a>, </li><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee495f0\" data-t=\"a\" data-fn=\"Rafał\" data-ln=\"Ziobro\" data-pos=\"3\" data-tb=\"\">Rafa Ziobro</a><a title=\"Affiliation: b\" href=\"#af010\" class=\"intra_ref auth_aff\" id=\"baf010\"><sup>b</sup></a></li></ul><!--VALIDHTML--><ul class=\"affiliation\"><li id=\"af005\"><sup>a</sup> <span id=\"\">Department of Engineering and Machinery for Food Industry, University of Agriculture in Krakow, Balicka 122 Str., 30-149 Krakow, Poland</span></li><li id=\"af010\"><sup>b</sup> <span id=\"\">Department of Carbohydrates Technology, University of Agriculture in Krakow, Balicka 122 Str., 30-149 Krakow, Poland</span></li></ul><!--VALIDHTML--><!--VALIDHTML--><dl class=\"articleDates\"><dd>Received 24 May 2013, Revised 1 October 2013, Accepted 1 October 2013, Available online 11 October 2013</dd></dl><!--VALIDHTML--><div class=\"moreInformation\"></div><div id=\"ppvPlaceHolder\" class=\"hidden\"></div><!--VALIDHTML--><div id=\"showMoreButtons\"></div><dl class=\"extLinks\"><dd class=\"doiLink\"></dd><dd class=\"rightsLink\"></dd></dl><div class=\"articleOAlabelForced\"></div><div id=\"refersToAndreferredToBy\"><dl id=\"referredToBy\" class=\"documentThread\"><!--Referred To By--></dl></div><!--FRAGMENTEND--><div class=\"page_fragment_ind auth_frag\" data-id=\"frag_2\"></div></div>"; String authorList = ""; String articleTitle = ""; // ???title//from w ww .j ava 2 s. c o m Pattern articleTitlePattern = Pattern.compile("<h1.+?svTitle.+?>(.+?)</h1>"); Matcher articleTitleMatcher = articleTitlePattern.matcher(text); while (articleTitleMatcher.find()) { articleTitle = articleTitleMatcher.group(1); articleTitle = StringEscapeUtils.unescapeHtml(articleTitle); articleTitle = StringUtils.stripTags(articleTitle); System.out.println("" + articleTitle); } // ??? Pattern p = Pattern.compile("<ul.+?authorGroup.+?>(.+?)</ul>"); Matcher m = p.matcher(text); while (m.find()) { authorList = m.group(1); } p = Pattern.compile("<li>(.+?)</li>"); m = p.matcher(authorList); while (m.find()) { String authorItem = m.group(1); if (authorItem.contains("mailto")) { Pattern nameEmailPattern = Pattern .compile("data-tb=\"[\\d]{0,}\">(.+?)</a>.*href=\"mailto:(.+?)\" class=\"auth_mail\">"); Matcher nameEmailMatcher = nameEmailPattern.matcher(authorItem); if (nameEmailMatcher.find()) { String name = nameEmailMatcher.group(1); String email = nameEmailMatcher.group(2); name = StringEscapeUtils.unescapeHtml(name); // ???new ArrayList List<String> fieldList = new ArrayList<String>(); fieldList.add(name); fieldList.add(email); fieldList.add(articleTitle); int hashKey = (name + email + articleTitle).hashCode(); System.out.println("?????TitlehashKey:" + name + "|" + email + "|" + articleTitle + "|" + hashKey + ", nameEmailsList"); } } } }
From source file:com.dataartisans.timeoutmonitoring.TimeoutMonitoring.java
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String filePath = params.get("filePath"); String delayStr = params.get("eventDelay"); String sessionTimeoutStr = params.get("sessionTimeout"); if (filePath == null || delayStr == null || sessionTimeoutStr == null) { System.out.println(//from ww w. j av a 2s . com "Job requires the --filePath, --sessionTimeout and --eventDelay option to be specified."); } else { int delay = Integer.parseInt(delayStr); int sessionTimeout = Integer.parseInt(sessionTimeoutStr); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); final String[] inputKeys = { "_context_request_id", "payload:instance_type_id", "timestamp", "event_type", "publisher_id", "_context_user_name", "_context_project_name", "_context_tenant", "_context_project_id" }; final String key = "_context_request_id"; final String[] resultFields = { "_context_request_id", "timestamp" }; final String errorKey = "event_type"; final String errorRegex = "trove.+error"; final String timestampPattern = "yyyy-MM-dd HH:mm:ss.SSSSSS"; final Pattern errorPattern = Pattern.compile(errorRegex); DataStream<String> input = env.readTextFile(filePath); DataStream<JSONObject> jsonObjects = input.map(new MapFunction<String, JSONObject>() { @Override public JSONObject map(String s) throws Exception { return new JSONObject(s); } }); Function<JSONObject, Long> timestampExtractor = new TimestampExtractorFunction("timestamp", timestampPattern); @SuppressWarnings("unchecked") DataStream<JSONObject> sessionMonitoring = JSONSessionMonitoring.createSessionMonitoring(jsonObjects, // input data set inputKeys, // json elements to keep from the input key, // key to group on new JSONObjectPredicateAnd( // session start element new JSONObjectPredicateMatchRegex("publisher_id", Pattern.compile("api.*novactl.*")), new JSONObjectPredicateMatchEquals<>("event_type", "compute.instance.update")), new JSONObjectPredicateMatchEquals<>("event_type", "compute.instance.create.end"), // session end element timestampExtractor, delay, // maximum delay of events sessionTimeout, // session timeout new LatencyWindowFunction(resultFields), // create the latency from the first and last element of the session new LatencyTimeoutFunction(resultFields, sessionTimeout)); TypeInformation<JSONObject> jsonObjectTypeInformation = TypeExtractor.getForClass(JSONObject.class); DataStream<JSONObject> sessionTimeouts = sessionMonitoring.filter(new FilterFunction<JSONObject>() { @Override public boolean filter(JSONObject jsonObject) throws Exception { return jsonObject.has("sessionTimeout"); // we only want to keep the session timeouts } }); DataStream<JSONObject> sessionAlerts = Alert.createAlert(sessionTimeouts, // filtered input for session timeouts "SessionAlerts", // name of operator 5, // number of trigger events 300000, // interval length in which the trigger events have to occur (milliseconds) new JSONObjectAlertFunction("alert", // alert key "sessionTimeout", // alert value "timestamp", // timestamp key timestampPattern // timestamp pattern to generate ), jsonObjectTypeInformation // output type information ); DataStream<JSONObject> troveEvents = jsonObjects.filter(new FilterFunction<JSONObject>() { @Override public boolean filter(JSONObject jsonObject) throws Exception { return errorPattern.matcher(jsonObject.optString(errorKey)).matches(); } }); DataStream<JSONObject> troveAlerts = Alert.createAlert(troveEvents, "TroveAlerts", 3, 50000, new JSONObjectAlertFunction("alert", "troveAlert", "timestamp", timestampPattern), jsonObjectTypeInformation); sessionAlerts.print(); troveAlerts.print(); env.execute("Execute timeout monitoring"); } }
From source file:BookRank.java
/** Grab the sales rank off the web page and log it. */ public static void main(String[] args) throws Exception { Properties p = new Properties(); String title = p.getProperty("title", "NO TITLE IN PROPERTIES"); // The url must have the "isbn=" at the very end, or otherwise // be amenable to being string-catted to, like the default. String url = p.getProperty("url", "http://test.ing/test.cgi?isbn="); // The 10-digit ISBN for the book. String isbn = p.getProperty("isbn", "0000000000"); // The RE pattern (MUST have ONE capture group for the number) String pattern = p.getProperty("pattern", "Rank: (\\d+)"); // Looking for something like this in the input: // <b>QuickBookShop.web Sales Rank: </b> // 26,252/*from ww w .java 2s .com*/ // </font><br> Pattern r = Pattern.compile(pattern); // Open the URL and get a Reader from it. BufferedReader is = new BufferedReader(new InputStreamReader(new URL(url + isbn).openStream())); // Read the URL looking for the rank information, as // a single long string, so can match RE across multi-lines. String input = "input from console"; // System.out.println(input); // If found, append to sales data file. Matcher m = r.matcher(input); if (m.find()) { PrintWriter pw = new PrintWriter(new FileWriter(DATA_FILE, true)); String date = // `date +'%m %d %H %M %S %Y'`; new SimpleDateFormat("MM dd hh mm ss yyyy ").format(new Date()); // Paren 1 is the digits (and maybe ','s) that matched; remove comma Matcher noComma = Pattern.compile(",").matcher(m.group(1)); pw.println(date + noComma.replaceAll("")); pw.close(); } else { System.err.println("WARNING: pattern `" + pattern + "' did not match in `" + url + isbn + "'!"); } // Whether current data found or not, draw the graph, using // external plotting program against all historical data. // Could use gnuplot, R, any other math/graph program. // Better yet: use one of the Java plotting APIs. String gnuplot_cmd = "set term png\n" + "set output \"" + GRAPH_FILE + "\"\n" + "set xdata time\n" + "set ylabel \"Book sales rank\"\n" + "set bmargin 3\n" + "set logscale y\n" + "set yrange [1:60000] reverse\n" + "set timefmt \"%m %d %H %M %S %Y\"\n" + "plot \"" + DATA_FILE + "\" using 1:7 title \"" + title + "\" with lines\n"; Process proc = Runtime.getRuntime().exec("/usr/local/bin/gnuplot"); PrintWriter gp = new PrintWriter(proc.getOutputStream()); gp.print(gnuplot_cmd); gp.close(); }
From source file:com.hygenics.parser.MainApp.java
/** * The entire programs main method./*from w ww .ja v a 2 s . com*/ * * @param args */ public static void main(String[] args) { final Logger log = LoggerFactory.getLogger(MainApp.class); log.info("Starting Parse @ " + Calendar.getInstance().getTime().toString()); ClassPathXmlApplicationContext context = new ClassPathXmlApplicationContext( ("file:" + System.getProperty("beansfile").trim())); log.info("Found beans @ " + System.getProperty("beansfile").trim()); log.info("Starting"); ArrayList<String> results = null; String activity = null; log.info("Obtaining Activities"); ActivitiesStack stack = (ActivitiesStack) context.getBean("ActivitiesStack"); // integers keeping track of bean number to pull (e.g. DumpToText0 or // DumpToText1) // in keeping with the spirit of dumping out data int n = 0, srn = 0, mv = 0, cen = 0, pps = 0, sb = 0, kv = 0, cf = 0, zip = 0, bm = 0, dump = 0, kettle = 0, execute = 0, transfer = 0, sqls = 0, job = 0, parsepages = 0, getpages = 0, map = 0, js = 0, sdump = 0, transforms = 0, execs = 0, gim = 0, ems = 0, jd = 0; Pattern p = Pattern.compile("[A-Za-z]+[0-9]+"); Matcher m; // start stack init log.info("Stack Initialized with Size of " + stack.getSize() + " @ " + Calendar.getInstance().getTime().toString()); while (stack.getSize() > 0) { // pop activity form stack activity = stack.Pop(); log.info("Activities Remaining " + stack.getSize()); m = p.matcher(activity); log.info("\n\nACTIVITY: " + activity + "\n\n"); if (activity.toLowerCase().contains("manualrepconn")) { log.info("Manual Transformation Started @ " + Calendar.getInstance().getTime().toString()); ManualReplacement mrp = (ManualReplacement) context.getBean("ManualRepConn"); mrp.run(); log.info("Manual Transformation Finished @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("cleanfolder")) { log.info("Folder Cleanup Started @ " + Calendar.getInstance().getTime().toString()); CleanFolder c; if (cf == 0 || context.containsBean("CleanFolder")) { c = (CleanFolder) ((context.containsBean("CleanFolder")) ? context.getBean("CleanFolder") : context.getBean("CleanFolder" + cf)); } else { c = (CleanFolder) context.getBean("CleanFolder" + cf); } c.run(); cf++; log.info("File Cleanup Complete @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("zip")) { log.info("Zip File Creation Started @ " + Calendar.getInstance().getTime().toString()); Archiver a; if (zip == 0 || context.containsBean("Zip")) { a = (Archiver) ((context.containsBean("Zip")) ? context.getBean("Zip") : context.getBean("Zip" + zip)); } else { a = (Archiver) context.getBean("Zip" + zip); } a.run(); zip++; log.info("Zip File Creation Complete @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("transfer")) { log.info("File Transfer Started @ " + Calendar.getInstance().getTime().toString()); Upload u; if (transfer == 0 || context.containsBean("FileTransfer")) { u = (Upload) ((context.containsBean("FileTransfer")) ? context.getBean("FileTransfer") : context.getBean("FileTransfer" + transfer)); } else { u = (Upload) context.getBean("FileTransfer" + transfer); } u.run(); transfer++; log.info("File Transfer Complete @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("droptables")) { // drop tables log.info("Dropping Tables @ " + Calendar.getInstance().getTime().toString()); DropTables droptables = (DropTables) context.getBean("DropTables"); droptables.run(); droptables = null; log.info("Done Dropping Tables @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().equals("createtables")) { // create tables log.info("Creating Tables @ " + Calendar.getInstance().getTime().toString()); CreateTable create = (CreateTable) context.getBean("CreateTables"); create.run(); create = null; log.info("Done Creating Tables @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("createtableswithreference")) { // create tables log.info("Creating Tables @ " + Calendar.getInstance().getTime().toString()); CreateTablesWithReference create = (CreateTablesWithReference) context .getBean("CreateTablesWithReference"); create.run(); create = null; log.info("Done Creating Tables @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("truncate")) { // truncate table log.info("Truncating @ " + Calendar.getInstance().getTime().toString()); Truncate truncate = (Truncate) context.getBean("Truncate"); truncate.truncate(); truncate = null; log.info("Truncated @ " + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().equals("enforce")) { // enforce schema log.info("Enforcing Schema @" + Calendar.getInstance().getTime().toString()); ForceConformity ef = (ForceConformity) context.getBean("EnforceStandards"); ef.run(); log.info("Done Enforcing Schema @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().contains("enforcewithreference")) { // enforce schema ForceConformityWithReference ef = (ForceConformityWithReference) context .getBean("EnforceStandardsWithReference"); log.info("Enforcing Schema By Reference @" + Calendar.getInstance().getTime().toString()); ef.run(); log.info("Done Enforcing Schema @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().trim().equals("repconn")) { log.info("Replacing Transformation Connection Information @" + Calendar.getInstance().getTime().toString()); RepConn rep = (RepConn) context.getBean("repconn"); rep.run(); log.info("Finished Replacing Connection Information @" + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("job")) { // run a Pentaho job as opposed to a Pentaho Transformation log.info("Run Job kjb file @" + Calendar.getInstance().getTime().toString()); RunJob kjb = null; if (m.find()) { kjb = (RunJob) context.getBean(activity); } else { kjb = (RunJob) context.getBean("Job" + job); } kjb.run(); kjb = null; log.info("Pentaho Job Complete @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); } else if (activity.toLowerCase().compareTo("execute") == 0) { // Execute a process log.info("Executing Process @" + Calendar.getInstance().getTime().toString()); if (context.containsBean("Execute") && execs == 0) { ExecuteProcess proc = (ExecuteProcess) context.getBean(("Execute")); proc.Execute(); } else { ExecuteProcess proc = (ExecuteProcess) context.getBean(("Execute" + execute)); proc.Execute(); } execs++; log.info("Pages Obtained @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().contains("parsepageswithscala") || activity.toLowerCase().contains("parsepagesscala")) { //parse pages with scala log.info("Parsing Pages with Scala @ " + Calendar.getInstance().getTime().toString()); ScalaParseDispatcher pds = null; if (context.containsBean("ParsePagesScala" + pps)) { pds = (ScalaParseDispatcher) context.getBean("ParsePagesScala" + pps); } else if (context.containsBean("ParsePagesScala") && pps > 0) { pds = (ScalaParseDispatcher) context.getBean("ParsePagesScala"); } pps++; pds.run(); Runtime.getRuntime().gc(); log.info("Finished Parsing Pages with Scala @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("parsepages")) { // Parse Pages with java log.info("Parsing Individual Pages @" + Calendar.getInstance().getTime().toString()); if (context.containsBean("ParsePages") && parsepages == 0) { ParseDispatcher pd = (ParseDispatcher) context.getBean("ParsePages"); pd.run(); pd = null; } else { ParseDispatcher pd = (ParseDispatcher) context.getBean("ParsePages" + parsepages); pd.run(); pd = null; } parsepages++; log.info("Finished Parsing @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().contains("kvparser")) { // Parse Pages using the KV Parser log.info("Parsing Individual Pages with Key Value Pairs @" + Calendar.getInstance().getTime().toString()); if (context.containsBean("KVParser") && parsepages == 0) { KVParser pd = (KVParser) context.getBean("KVParser"); pd.run(); pd = null; } else { KVParser pd = (KVParser) context.getBean("KVParser" + kv); pd.run(); pd = null; } parsepages++; log.info("Finished Parsing @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().contains("parsewithsoup")) { // Parse Pages with Jsoup log.info("Parsing Pages with JSoup @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("ParseJSoup") && js == 0) { ParseJSoup psj = (ParseJSoup) context.getBean("ParseJSoup"); psj.run(); } else { ParseJSoup psj = (ParseJSoup) context.getBean("ParseJSoup" + Integer.toString(js)); psj.run(); } js++; log.info("Finished Parsing @" + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); log.info("Finished Parsing with JSoup @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("breakmultiplescala") || activity.toLowerCase().contains("breakmultiplewithscala")) { log.info("Breaking Records"); BreakMultipleScala bms = null; if (context.containsBean("BreakMultipleScala" + ems)) { bms = (BreakMultipleScala) context.getBean("BreakMultipleScala" + sb); } else { bms = (BreakMultipleScala) context.getBean("BreakMultipleScala"); } bms.run(); bms = null; sb++; Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); log.info("Completed Breaking Tasks"); } else if (activity.toLowerCase().contains("breakmultiple")) { // break apart multi-part records log.info("Breaking apart Records (BreakMultiple) @" + Calendar.getInstance().getTime().toString()); if (context.containsBean("BreakMultiple") && bm == 0) { BreakMultiple br = (BreakMultiple) context.getBean(("BreakMultiple")); br.run(); br = null; } else { BreakMultiple br = (BreakMultiple) context.getBean(("BreakMultiple" + Integer.toString(bm))); br.run(); br = null; } bm++; log.info("Finished Breaking Apart Records @" + Calendar.getInstance().getTime().toString()); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().compareTo("mapper") == 0) { // remap data log.info("Mapping Records @" + Calendar.getInstance().getTime().toString()); if (context.containsBean("Mapper") && map == 0) { Mapper mp = (Mapper) context.getBean("Mapper"); mp.run(); mp = null; } else { Mapper mp = (Mapper) context.getBean("Mapper" + Integer.toString(map)); mp.run(); mp = null; } map++; log.info("Completed Mapping Records @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("getimages")) { // Get Images in a Separate Step log.info("Beggining Image Pull @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("getImages") && gim == 0) { GetImages gi = (GetImages) context.getBean("getImages"); gi.run(); log.info("Image Pull Complete @ " + Calendar.getInstance().getTime().toString()); gi = null; } else { GetImages gi = (GetImages) context.getBean("getImages"); gi.run(); log.info("Image Pull Complete @ " + Calendar.getInstance().getTime().toString()); gi = null; } gim++; Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().compareTo("sql") == 0) { // execute a sql command log.info("Executing SQL Query @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("SQL") && sqls == 0) { ExecuteSQL sql; if (m.find()) { sql = (ExecuteSQL) context.getBean(activity); } else { sql = (ExecuteSQL) context.getBean("SQL"); } sql.execute(); sql = null; } else { ExecuteSQL sql; if (m.find()) { sql = (ExecuteSQL) context.getBean(activity); } else { sql = (ExecuteSQL) context.getBean("SQL" + sqls); } sql.execute(); sql = null; } sqls++; log.info("Finished SQL Query @ " + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().compareTo("kettle") == 0) { // run one or more kettle transformation(s) log.info("Beginning Kettle Transformation @ " + Calendar.getInstance().getTime().toString()); RunTransformation rt = null; if (context.containsBean("kettle") && transforms == 0) { if (m.find()) { rt = (RunTransformation) context.getBean(activity); } else { rt = (RunTransformation) context.getBean(("kettle")); } rt.run(); rt = null; } else { if (m.find()) { rt = (RunTransformation) context.getBean(activity); } else { rt = (RunTransformation) context.getBean(("kettle" + kettle)); } rt.run(); rt = null; } transforms++; log.info("Ending Kettle Transformation @ " + Calendar.getInstance().getTime().toString()); Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); kettle++; } else if (activity.toLowerCase().contains("dumptotext")) { // dump to a text file via java log.info("Dumping to Text @ " + Calendar.getInstance().getTime().toString()); DumptoText dtt = null; if (m.find()) { dtt = (DumptoText) context.getBean(activity); } else { dtt = (DumptoText) context.getBean("DumpToText" + dump); } dtt.run(); dump++; log.info("Completed Dump @ " + Calendar.getInstance().getTime().toString()); dtt = null; Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().equals("jdump")) { log.info("Dumping via JDump @ " + Calendar.getInstance().getTime().toString()); if (jd == 0 && context.containsBean("JDump")) { JDump j = (JDump) context.getBean("JDump"); jd++; j.run(); } else { JDump j = (JDump) context.getBean("JDump" + jd); jd++; j.run(); } Runtime.getRuntime().gc(); System.gc(); log.info("Finished Dumping via JDump @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("jdumpwithreference")) { log.info("Dumping via JDump @ " + Calendar.getInstance().getTime().toString()); if (jd == 0 && context.containsBean("JDumpWithReference")) { JDumpWithReference j = (JDumpWithReference) context.getBean("JDumpWithReference"); jd++; j.run(); } else { JDumpWithReference j = (JDumpWithReference) context.getBean("JDumpWithReference" + jd); jd++; j.run(); } Runtime.getRuntime().gc(); System.gc(); log.info("Finished Dumping via JDump @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().compareTo("commanddump") == 0) { // dump to text using a client side sql COPY TO command log.info("Dumping via SQL @ " + Calendar.getInstance().getTime().toString()); CommandDump d = (CommandDump) context.getBean("dump"); d.run(); d = null; log.info("Completed Dump @ " + Calendar.getInstance().getTime().toString()); // most likely not needed by satisfies my paranoia Runtime.getRuntime().gc(); System.gc(); } else if (activity.toLowerCase().equals("specdump")) { // Specified Dump log.info("Dumping via Specified Tables, Files, and Attributes @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("SpecDump") && sdump == 0) { sdump++; SpecifiedDump sd = (SpecifiedDump) context.getBean("SpecDump"); sd.run(); sd = null; } else if (context.containsBean("SpecDump" + Integer.toString(sdump))) { SpecifiedDump sd = (SpecifiedDump) context.getBean("SpecDump" + Integer.toString(sdump)); sd.run(); sd = null; } sdump++; log.info("Completed Dumping via Specified Tables, Files, and Attributes @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("specdumpwithreference")) { // Specified Dump log.info("Dumping via Specified Tables, Files, and Attributes by Reference @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("SpecDumpWithReference") && sdump == 0) { sdump++; SpecDumpWithReference sd = (SpecDumpWithReference) context.getBean("SpecDumpWithReference"); sd.run(); sd = null; } else if (context.containsBean("SpecDumpWithReference" + Integer.toString(sdump))) { SpecDumpWithReference sd = (SpecDumpWithReference) context .getBean("SpecDumpWithReference" + Integer.toString(sdump)); sd.run(); sd = null; } else { log.info("Bean Not Found For " + activity); } sdump++; log.info("Completed Dumping via Specified Tables, Files, and Attributes @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().compareTo("email") == 0) { // email completion notice log.info("Sending Notice of Completion @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("Email") && ems == 0) { Send s = (Send) context.getBean("Email"); s.run(); s = null; } else { Send s = (Send) context.getBean("Email" + Integer.toString(ems)); s.run(); s = null; } ems++; Runtime.getRuntime().gc(); System.gc(); log.info("Completed Email @ " + Calendar.getInstance().getTime().toString()); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().equals("qa")) { // perform qa log.info("Performing Quality Assurance @ " + Calendar.getInstance().getTime().toString()); if (context.containsBean("QA")) { QualityAssurer qa = (QualityAssurer) context.getBean("QA"); qa.run(); qa = null; } // attempt to hint --> all tasks are really intense so anything // is nice Runtime.getRuntime().gc(); System.gc(); log.info("Completed QA @ " + Calendar.getInstance().getTime().toString()); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); } else if (activity.toLowerCase().equals("notify")) { log.info("Running Notification Tasks"); Notification nt = null; if (context.containsBean("Notify" + Integer.toString(n))) { nt = (Notification) context.getBean("Notify" + Integer.toString(n)); } else { nt = (Notification) context.getBean("Notify"); } nt.run(); nt = null; n++; if (context.containsBean("Email") && ems == 0) { Send s = (Send) context.getBean("Email"); s.run(); s = null; } else if (context.containsBean("Email" + ems)) { Send s = (Send) context.getBean("Email" + Integer.toString(ems)); s.run(); s = null; } ems++; Runtime.getRuntime().gc(); System.gc(); log.info("Free Memory: " + Runtime.getRuntime().freeMemory()); log.info("Completed Notification Tasks"); } else if (activity.toLowerCase().contains("move")) { log.info("Moving Files @ " + Calendar.getInstance().getTime().toString()); MoveFile mf = null; if (context.containsBean("Move" + Integer.toString(mv))) { mf = (MoveFile) context.getBean("Move" + Integer.toString(mv)); } else { mf = (MoveFile) context.getBean("Move"); } mf.run(); mv++; Runtime.getRuntime().gc(); log.info("Finished Moving Files @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("numericalcheck")) { log.info("Checking Counts"); NumericalChecker nc = (NumericalChecker) context.getBean("NumericalChecker"); nc.run(); Runtime.getRuntime().gc(); log.info("Finished Checking Counts @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("runscript")) { log.info("Running Script @ " + Calendar.getInstance().getTime().toString()); RunScript runner = null; if (context.containsBean("RunScript" + srn)) { runner = (RunScript) context.getBean("RunScript" + srn); } else { runner = (RunScript) context.getBean("RunScript"); } runner.run(); srn++; Runtime.getRuntime().gc(); log.info("Finished Running SCript @ " + Calendar.getInstance().getTime().toString()); } else if (activity.toLowerCase().contains("checkexistance")) { log.info("Checking Existance @ " + Calendar.getInstance().getTime().toString()); ExistanceChecker runner = null; if (context.containsBean("CheckExistance" + srn)) { runner = (ExistanceChecker) context.getBean("CheckExistance" + cen); } else { runner = (ExistanceChecker) context.getBean("CheckExistance"); } runner.run(); cen++; Runtime.getRuntime().gc(); log.info("Finished Checking Existance @ " + Calendar.getInstance().getTime().toString()); } else { log.info("Activity " + activity + " does not exist!"); } } log.info("Completed Parse @ " + Calendar.getInstance().getTime().toString()); context.destroy(); context.close(); }
From source file:com.mmj.app.common.util.SpiderHtmlUtils.java
public static void main(String[] args) { Pattern pattern = Pattern.compile("[http|https]+[://]+[0-9A-Za-z:/[-]_#[?][=][.][&]]*"); Matcher matcher = pattern.matcher("http://haitao.smzdm.com/youhui/307563"); System.out.println(matcher.matches()); }
From source file:PopClean.java
public static void main(String args[]) { try {//from ww w .ja v a2 s . co m String hostname = null, username = null, password = null; int port = 110; int sizelimit = -1; String subjectPattern = null; Pattern pattern = null; Matcher matcher = null; boolean delete = false; boolean confirm = true; // Handle command-line arguments for (int i = 0; i < args.length; i++) { if (args[i].equals("-user")) username = args[++i]; else if (args[i].equals("-pass")) password = args[++i]; else if (args[i].equals("-host")) hostname = args[++i]; else if (args[i].equals("-port")) port = Integer.parseInt(args[++i]); else if (args[i].equals("-size")) sizelimit = Integer.parseInt(args[++i]); else if (args[i].equals("-subject")) subjectPattern = args[++i]; else if (args[i].equals("-debug")) debug = true; else if (args[i].equals("-delete")) delete = true; else if (args[i].equals("-force")) // don't confirm confirm = false; } // Verify them if (hostname == null || username == null || password == null || sizelimit == -1) usage(); // Make sure the pattern is a valid regexp if (subjectPattern != null) { pattern = Pattern.compile(subjectPattern); matcher = pattern.matcher(""); } // Say what we are going to do System.out .println("Connecting to " + hostname + " on port " + port + " with username " + username + "."); if (delete) { System.out.println("Will delete all messages longer than " + sizelimit + " bytes"); if (subjectPattern != null) System.out.println("that have a subject matching: [" + subjectPattern + "]"); } else { System.out.println("Will list subject lines for messages " + "longer than " + sizelimit + " bytes"); if (subjectPattern != null) System.out.println("that have a subject matching: [" + subjectPattern + "]"); } // If asked to delete, ask for confirmation unless -force is given if (delete && confirm) { System.out.println(); System.out.print("Do you want to proceed (y/n) [n]: "); System.out.flush(); BufferedReader console = new BufferedReader(new InputStreamReader(System.in)); String response = console.readLine(); if (!response.equals("y")) { System.out.println("No messages deleted."); System.exit(0); } } // Connect to the server, and set up streams s = new Socket(hostname, port); in = new BufferedReader(new InputStreamReader(s.getInputStream())); out = new PrintWriter(new OutputStreamWriter(s.getOutputStream())); // Read the welcome message from the server, confirming it is OK. System.out.println("Connected: " + checkResponse()); // Now log in send("USER " + username); // Send username, wait for response send("PASS " + password); // Send password, wait for response System.out.println("Logged in"); // Check how many messages are waiting, and report it String stat = send("STAT"); StringTokenizer t = new StringTokenizer(stat); System.out.println(t.nextToken() + " messages in mailbox."); System.out.println("Total size: " + t.nextToken()); // Get a list of message numbers and sizes send("LIST"); // Send LIST command, wait for OK response. // Now read lines from the server until we get . by itself List msgs = new ArrayList(); String line; for (;;) { line = in.readLine(); if (line == null) throw new IOException("Unexpected EOF"); if (line.equals(".")) break; msgs.add(line); } // Now loop through the lines we read one at a time. // Each line should specify the message number and its size. int nummsgs = msgs.size(); for (int i = 0; i < nummsgs; i++) { String m = (String) msgs.get(i); StringTokenizer st = new StringTokenizer(m); int msgnum = Integer.parseInt(st.nextToken()); int msgsize = Integer.parseInt(st.nextToken()); // If the message is too small, ignore it. if (msgsize <= sizelimit) continue; // If we're listing messages, or matching subject lines // find the subject line for this message String subject = null; if (!delete || pattern != null) { subject = getSubject(msgnum); // get the subject line // If we couldn't find a subject, skip the message if (subject == null) continue; // If this subject does not match the pattern, then // skip the message if (pattern != null) { matcher.reset(subject); if (!matcher.matches()) continue; } // If we are listing, list this message if (!delete) { System.out.println("Subject " + msgnum + ": " + subject); continue; // so we never delete it } } // If we were asked to delete, then delete the message if (delete) { send("DELE " + msgnum); if (pattern == null) System.out.println("Deleted message " + msgnum); else System.out.println("Deleted message " + msgnum + ": " + subject); } } // When we're done, log out and shutdown the connection shutdown(); } catch (Exception e) { // If anything goes wrong print exception and show usage System.err.println(e); usage(); // Always try to shutdown nicely so the server doesn't hang on us shutdown(); } }