List of usage examples for java.util.regex Pattern DOTALL
int DOTALL
To view the source code for java.util.regex Pattern DOTALL.
Click Source Link
From source file:com.android.dialer.lookup.yellowpages.YellowPagesApi.java
private String[] parseNameWebsiteUnitedStates() { Pattern regexNameAndWebsite = Pattern.compile("<a href=\"([^>]+?)\"[^>]+?class=\"url[^>]+?>([^<]+)</a>", Pattern.DOTALL); String name = null;//w w w . j a va 2 s. c o m String website = null; Matcher m = regexNameAndWebsite.matcher(mOutput); if (m.find()) { website = m.group(1).trim(); name = m.group(2).trim(); } return new String[] { name, website }; }
From source file:org.opennms.netmgt.provision.adapters.link.config.linkadapter.LinkPattern.java
/** * <p>setPattern</p>/*w ww .j a v a 2 s. co m*/ * * @param pattern a {@link java.lang.String} object. */ public void setPattern(final String pattern) { if (pattern != null) { m_compiledPattern = Pattern.compile(pattern, Pattern.CANON_EQ | Pattern.DOTALL); m_pattern = pattern; } }
From source file:com.formulasearchengine.mathosphere.mlp.text.PlaintextDocumentBuilder.java
@Override public void endDocument() { String doc = WikiTextUtils.subsup(writer.toString()); // remove remaining/undetected templates doc = Pattern.compile("\\{\\{[^\\{]*?\\}\\}").matcher(doc).replaceAll(""); doc = Pattern.compile("\\u2016[^\\u2016]*?\\u2016").matcher(doc).replaceAll(""); // remove dangling lines doc = Pattern.compile("(:?\\A|\\n)\\s*[\\*\\|:].*").matcher(doc).replaceAll(""); doc = Pattern.compile("\\}\\}\\s*").matcher(doc).replaceAll(""); // remove undetected emphasis tags doc = Pattern.compile("'{2,}").matcher(doc).replaceAll(""); // comments//from ww w. j a v a 2 s. c o m doc = Pattern.compile("<!--.*?-->", Pattern.DOTALL).matcher(doc).replaceAll(""); // headings doc = Pattern.compile("([=]{2,4})[^\\n]*?\\1", Pattern.DOTALL).matcher(doc).replaceAll(""); // references doc = Pattern.compile("<references>.*?</references>", Pattern.DOTALL).matcher(doc).replaceAll(""); doc = Pattern.compile("<ref[^>/]*>.*?</ref>", Pattern.DOTALL).matcher(doc).replaceAll(""); doc = Pattern.compile("<ref[^>]*>").matcher(doc).replaceAll(""); doc = Pattern.compile("</ref[^>]*>").matcher(doc).replaceAll(""); // empty/unknown inline tags and non inline tags doc = Pattern.compile("<([^ >]+)[^>]*>(.*?)</\\1>").matcher(doc).replaceAll("$2"); doc = Pattern.compile("<([^ >]+)[^>]*/?>").matcher(doc).replaceAll(" "); // fix for undetected links doc = Pattern.compile("\\[\\[([^\\|]*)|([^\\]]*)]]").matcher(doc).replaceAll("$2"); doc = Pattern.compile("\\[\\[[^\\[\\]]*]]").matcher(doc).replaceAll(""); // strip unneeded linebreaks, etc. doc = Pattern.compile("\\n+").matcher(doc).replaceAll(" "); doc = Pattern.compile("\\s+").matcher(doc).replaceAll(" "); // remove language links doc = Pattern.compile("[a-z]{2,3}:.*??").matcher(doc).replaceAll(""); // remove misc quotation symbols doc = Pattern.compile("'|\\\"").matcher(doc).replaceAll(""); // reposition plurals into links doc = Pattern.compile("?(\\w)").matcher(doc).replaceAll("$1?"); // good hackers trim! doc = doc.trim(); this.result = doc; }
From source file:org.rifidi.emulator.reader.thingmagic.commandobjects.SetCommand.java
public SetCommand(String command, ThingMagicReaderSharedResources tmsr) throws CommandCreationException { // TODO Auto-generated constructor stub this.command = command; this.tmsr = tmsr; List<String> tokens = new ArrayList<String>(); logger.debug("Parsing command: " + command); Pattern tokenizer = Pattern.compile( // anything less... "[^\\s\\w,<>=\\(\\)\\u0027]|" + // groups we are looking for... "\\w+|" + "\\u0027|" + "\\s*<>\\*|" + "\\s*>=\\s*|" + "\\s*<=\\s*|" + "\\s*=\\s*|" + "\\s*,\\s*|" + "\\s*>\\s*|" + "\\s*<\\s*|" + "\\s?+|" + "\\(|" + "\\)|", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher tokenFinder = tokenizer.matcher(command.toLowerCase().trim()); while (tokenFinder.find()) { String temp = tokenFinder.group(); /*// w w w. ja va 2 s .c o m * no need to add empty strings at tokens. */ // TODO: Figure out why we are getting empty stings as tokens. if (temp.equals("")) continue; tokens.add(temp); } ListIterator<String> tokenIterator = tokens.listIterator(); String token = tokenIterator.next(); if (!token.equals("set")) throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); try { token = tokenIterator.next(); if (!token.matches(WHITE_SPACE)) { throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); } /* * here possible syntax can diverge into two possible directions... */ token = tokenIterator.next(); if (token.matches("auto")) { logger.debug("Expecting to turn on or off AutoMode"); /* * here possible syntax can diverge into two possible directions */ token = tokenIterator.next(); if (token.matches(EQUALS_WITH_WS)) { logger.debug("Expecting to turn of AutoMode"); setUpStop(tokenIterator); } else if (token.matches(WHITE_SPACE)) { logger.debug("Expecting to turn of AutoMode"); setUpStart(tokenIterator); } else { throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); } } else if (token.matches("repeat")) { logger.debug("Expecting to set cursorListRepeatDelay"); setUpCursorListDelay(tokenIterator); } else { throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); } // check if the command correctly ends in a semicolon if (tokenIterator.hasNext()) { token = tokenIterator.next(); if (token.matches(WHITE_SPACE)) { token = tokenIterator.next(); } if (!token.equals(";")) { throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); } } else { throw new CommandCreationException("Error 0100: syntax error at '\n'"); } } catch (NoSuchElementException e) { /* * if we get here... we run out of tokens prematurely... Our job now * is to walk backwards to find the last non space tokens and throw * an exception saying that there is an syntax error at that point. */ /* * look for the last offending command block that is not a series of * whitespaces. */ token = tokenIterator.previous(); while (token.matches(WHITE_SPACE)) { token = tokenIterator.previous(); } logger.debug("Premature end of token list detected."); throw new CommandCreationException("Error 0100: syntax error at '" + token + "'"); } }
From source file:org.fourthline.cling.transport.impl.RecoverGENAEventProcessor.java
private String fixXMLEncodedLastChange(String xml) { Pattern pattern = Pattern.compile("<LastChange>(.*)</LastChange>", Pattern.DOTALL); Matcher matcher = pattern.matcher(xml); if (matcher.find() && matcher.groupCount() == 1) { String xmlEncodedLastChange = matcher.group(1); if (XmlPullParserUtils.isNullOrEmpty(xmlEncodedLastChange)) return xml; xmlEncodedLastChange = xmlEncodedLastChange.trim(); String fixedXmlEncodedLastChange; // first look if LastChange text is XML encoded (some renderers will sent it not XML encoded) if (xmlEncodedLastChange.charAt(0) == '<') { fixedXmlEncodedLastChange = StringEscapeUtils.escapeXml(xmlEncodedLastChange); log.warning("fixed LastChange that was not XML encoded"); } else {/* w w w . j a v a 2 s. c om*/ // delete potential funky characters (at least found in the Philips NP2900 that inserts garbage HTML) fixedXmlEncodedLastChange = StringUtils.replaceChars(xmlEncodedLastChange, "<>", null); if (fixedXmlEncodedLastChange.equals(xmlEncodedLastChange)) { // no change return xml; } log.warning("deleted invalid characters in LastChange"); } String fixedXml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + "<e:propertyset xmlns:e=\"urn:schemas-upnp-org:event-1-0\">" + "<e:property>" + "<LastChange>" + fixedXmlEncodedLastChange + "</LastChange>" + "</e:property>" + "</e:propertyset>"; return fixedXml; } return xml; }
From source file:org.echocat.jomon.runtime.jaxb.PatternAdapter.java
@Nonnull protected String toFlagsAsString(@Nonnegative int flags) { final StringBuilder sb = new StringBuilder(); if ((flags & Pattern.CASE_INSENSITIVE) != 0) { sb.append('i'); }/*w ww. ja v a2 s . c o m*/ if ((flags & Pattern.MULTILINE) != 0) { sb.append('m'); } if ((flags & Pattern.DOTALL) != 0) { sb.append('s'); } if ((flags & Pattern.COMMENTS) != 0) { sb.append('x'); } return sb.toString(); }
From source file:com.gorillalogic.fonemonkey.PropertyUtil.java
/** * A better wildcard pattern matcher (written by Justin). * // ww w.j av a 2 s . c om * @param src * the string to search for the pattern * @param pattern * the wildcard pattern (containing * and ?) * @return true if matches, otherwise false */ private static boolean matchWildcard(String src, String pattern) { // first, escape everything in the pattern that's not a wildcard char (either * or ?) StringBuilder sb = new StringBuilder(); for (char c : pattern.toCharArray()) { if ("*?".indexOf(c) != -1) { sb.append(c); } else { // not wildcard char, so escape it sb.append("\\Q").append(c).append("\\E"); } } // 1. replace * (or repeated *'s) with .* // 2. replace ? with . pattern = sb.toString().replaceAll("\\*+", ".*").replaceAll("\\?", "."); Pattern p = Pattern.compile(pattern, Pattern.DOTALL | Pattern.MULTILINE); return p.matcher(src).matches(); }
From source file:org.apache.pdfbox.pdfparser.VisualSignatureParser.java
private void skipToNextObj() throws IOException { byte[] b = new byte[16]; Pattern p = Pattern.compile("\\d+\\s+\\d+\\s+obj.*", Pattern.DOTALL); /* Read a buffer of data each time to see if it starts with a * known keyword. This is not the most efficient design, but we should * rarely be needing this function. We could update this to use the * circular buffer, like in readUntilEndStream(). *///from w w w .jav a 2s . c o m while (!pdfSource.isEOF()) { int l = pdfSource.read(b); if (l < 1) { break; } String s = new String(b, "US-ASCII"); if (s.startsWith("trailer") || s.startsWith("xref") || s.startsWith("startxref") || s.startsWith("stream") || p.matcher(s).matches()) { pdfSource.unread(b); break; } else { pdfSource.unread(b, 1, l - 1); } } }
From source file:com.globalsight.util.edit.SegmentUtil.java
public List<String> getInternalWords(String src) { List<String> words = new ArrayList<String>(); List<String> ids = new ArrayList<String>(); ids.addAll(InternalTextUtil.getInternalIndex(src)); for (int i = 0; i < ids.size(); i++) { String id = (String) ids.get(i); Object[] ob = { id };/*from w w w . ja v a 2s . co m*/ String regex = MessageFormat.format(REGEX_ALL, ob); Pattern pattern = Pattern.compile(regex, Pattern.DOTALL); Matcher matcher = pattern.matcher(src); while (matcher.find()) { String s = matcher.group(1); s = s.replaceAll("<[^/]*>.*?</.*?>", ""); words.add(s); } String regex2 = MessageFormat.format(REGEX_ALL_2, ob); Pattern pattern2 = Pattern.compile(regex2, Pattern.DOTALL); Matcher matcher2 = pattern2.matcher(src); while (matcher2.find()) { String s = matcher2.group(1); s = s.replaceAll("<[^/]*>.*?</.*?>", ""); words.add(s); } } return words; }
From source file:org.apache.hadoop.hbase.filter.RegexStringComparator.java
/** * Constructor/*from www . j a v a 2s . c om*/ * Adds Pattern.DOTALL to the underlying Pattern * @param expr a valid regular expression */ public RegexStringComparator(String expr) { this(expr, Pattern.DOTALL); }