Example usage for java.util.regex Pattern DOTALL

List of usage examples for java.util.regex Pattern DOTALL

Introduction

In this page you can find the example usage for java.util.regex Pattern DOTALL.

Prototype

int DOTALL

To view the source code for java.util.regex Pattern DOTALL.

Click Source Link

Document

Enables dotall mode.

Usage

From source file:com.android.dialer.lookup.yellowpages.YellowPagesApi.java

private String[] parseNameWebsiteUnitedStates() {
    Pattern regexNameAndWebsite = Pattern.compile("<a href=\"([^>]+?)\"[^>]+?class=\"url[^>]+?>([^<]+)</a>",
            Pattern.DOTALL);
    String name = null;//w w w  . j a va  2 s. c o m
    String website = null;

    Matcher m = regexNameAndWebsite.matcher(mOutput);
    if (m.find()) {
        website = m.group(1).trim();
        name = m.group(2).trim();
    }

    return new String[] { name, website };
}

From source file:org.opennms.netmgt.provision.adapters.link.config.linkadapter.LinkPattern.java

/**
 * <p>setPattern</p>/*w ww  .j a  v  a  2 s. co m*/
 *
 * @param pattern a {@link java.lang.String} object.
 */
public void setPattern(final String pattern) {
    if (pattern != null) {
        m_compiledPattern = Pattern.compile(pattern, Pattern.CANON_EQ | Pattern.DOTALL);
        m_pattern = pattern;
    }
}

From source file:com.formulasearchengine.mathosphere.mlp.text.PlaintextDocumentBuilder.java

@Override
public void endDocument() {
    String doc = WikiTextUtils.subsup(writer.toString());

    // remove remaining/undetected templates
    doc = Pattern.compile("\\{\\{[^\\{]*?\\}\\}").matcher(doc).replaceAll("");
    doc = Pattern.compile("\\u2016[^\\u2016]*?\\u2016").matcher(doc).replaceAll("");

    // remove dangling lines
    doc = Pattern.compile("(:?\\A|\\n)\\s*[\\*\\|:].*").matcher(doc).replaceAll("");
    doc = Pattern.compile("\\}\\}\\s*").matcher(doc).replaceAll("");

    // remove undetected emphasis tags
    doc = Pattern.compile("'{2,}").matcher(doc).replaceAll("");

    // comments//from  ww w.  j a v a  2  s.  c o m
    doc = Pattern.compile("<!--.*?-->", Pattern.DOTALL).matcher(doc).replaceAll("");

    // headings
    doc = Pattern.compile("([=]{2,4})[^\\n]*?\\1", Pattern.DOTALL).matcher(doc).replaceAll("");

    // references
    doc = Pattern.compile("<references>.*?</references>", Pattern.DOTALL).matcher(doc).replaceAll("");
    doc = Pattern.compile("<ref[^>/]*>.*?</ref>", Pattern.DOTALL).matcher(doc).replaceAll("");
    doc = Pattern.compile("<ref[^>]*>").matcher(doc).replaceAll("");
    doc = Pattern.compile("</ref[^>]*>").matcher(doc).replaceAll("");

    // empty/unknown inline tags and non inline tags
    doc = Pattern.compile("<([^ >]+)[^>]*>(.*?)</\\1>").matcher(doc).replaceAll("$2");
    doc = Pattern.compile("<([^ >]+)[^>]*/?>").matcher(doc).replaceAll(" ");

    // fix for undetected links
    doc = Pattern.compile("\\[\\[([^\\|]*)|([^\\]]*)]]").matcher(doc).replaceAll("$2");
    doc = Pattern.compile("\\[\\[[^\\[\\]]*]]").matcher(doc).replaceAll("");

    // strip unneeded linebreaks, etc.
    doc = Pattern.compile("\\n+").matcher(doc).replaceAll(" ");
    doc = Pattern.compile("\\s+").matcher(doc).replaceAll(" ");

    // remove language links
    doc = Pattern.compile("[a-z]{2,3}:.*??").matcher(doc).replaceAll("");

    // remove misc quotation symbols
    doc = Pattern.compile("'|\\\"").matcher(doc).replaceAll("");
    // reposition plurals into links
    doc = Pattern.compile("?(\\w)").matcher(doc).replaceAll("$1?");

    // good hackers trim!
    doc = doc.trim();

    this.result = doc;
}

From source file:org.rifidi.emulator.reader.thingmagic.commandobjects.SetCommand.java

public SetCommand(String command, ThingMagicReaderSharedResources tmsr) throws CommandCreationException {
    // TODO Auto-generated constructor stub
    this.command = command;
    this.tmsr = tmsr;

    List<String> tokens = new ArrayList<String>();

    logger.debug("Parsing command: " + command);

    Pattern tokenizer = Pattern.compile(
            // anything less...
            "[^\\s\\w,<>=\\(\\)\\u0027]|" +
            // groups we are looking for...
                    "\\w+|" + "\\u0027|" + "\\s*<>\\*|" + "\\s*>=\\s*|" + "\\s*<=\\s*|" + "\\s*=\\s*|"
                    + "\\s*,\\s*|" + "\\s*>\\s*|" + "\\s*<\\s*|" + "\\s?+|" + "\\(|" + "\\)|",
            Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
    Matcher tokenFinder = tokenizer.matcher(command.toLowerCase().trim());

    while (tokenFinder.find()) {
        String temp = tokenFinder.group();
        /*//  w  w w. ja  va 2  s .c o  m
         * no need to add empty strings at tokens.
         */
        // TODO: Figure out why we are getting empty stings as tokens.
        if (temp.equals(""))
            continue;
        tokens.add(temp);
    }

    ListIterator<String> tokenIterator = tokens.listIterator();

    String token = tokenIterator.next();

    if (!token.equals("set"))
        throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");

    try {

        token = tokenIterator.next();

        if (!token.matches(WHITE_SPACE)) {
            throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");
        }

        /*
         * here possible syntax can diverge into two possible directions...
         */
        token = tokenIterator.next();
        if (token.matches("auto")) {
            logger.debug("Expecting to turn on or off AutoMode");

            /*
             * here possible syntax can diverge into two possible directions
             */
            token = tokenIterator.next();
            if (token.matches(EQUALS_WITH_WS)) {
                logger.debug("Expecting to turn of AutoMode");
                setUpStop(tokenIterator);
            } else if (token.matches(WHITE_SPACE)) {
                logger.debug("Expecting to turn of AutoMode");
                setUpStart(tokenIterator);
            } else {
                throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");
            }

        } else if (token.matches("repeat")) {
            logger.debug("Expecting to set cursorListRepeatDelay");
            setUpCursorListDelay(tokenIterator);
        } else {
            throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");
        }

        // check if the command correctly ends in a semicolon
        if (tokenIterator.hasNext()) {
            token = tokenIterator.next();

            if (token.matches(WHITE_SPACE)) {
                token = tokenIterator.next();
            }

            if (!token.equals(";")) {
                throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");
            }
        } else {
            throw new CommandCreationException("Error 0100:     syntax error at '\n'");
        }

    } catch (NoSuchElementException e) {
        /*
         * if we get here... we run out of tokens prematurely... Our job now
         * is to walk backwards to find the last non space tokens and throw
         * an exception saying that there is an syntax error at that point.
         */

        /*
         * look for the last offending command block that is not a series of
         * whitespaces.
         */

        token = tokenIterator.previous();
        while (token.matches(WHITE_SPACE)) {
            token = tokenIterator.previous();
        }
        logger.debug("Premature end of token list detected.");
        throw new CommandCreationException("Error 0100:     syntax error at '" + token + "'");

    }

}

From source file:org.fourthline.cling.transport.impl.RecoverGENAEventProcessor.java

private String fixXMLEncodedLastChange(String xml) {
    Pattern pattern = Pattern.compile("<LastChange>(.*)</LastChange>", Pattern.DOTALL);
    Matcher matcher = pattern.matcher(xml);

    if (matcher.find() && matcher.groupCount() == 1) {

        String xmlEncodedLastChange = matcher.group(1);

        if (XmlPullParserUtils.isNullOrEmpty(xmlEncodedLastChange))
            return xml;

        xmlEncodedLastChange = xmlEncodedLastChange.trim();

        String fixedXmlEncodedLastChange;

        // first look if LastChange text is XML encoded (some renderers will sent it not XML encoded)
        if (xmlEncodedLastChange.charAt(0) == '<') {
            fixedXmlEncodedLastChange = StringEscapeUtils.escapeXml(xmlEncodedLastChange);
            log.warning("fixed LastChange that was not XML encoded");
        } else {/* w w  w  .  j a  v a 2 s.  c  om*/
            // delete potential funky characters (at least found in the Philips NP2900 that inserts garbage HTML)
            fixedXmlEncodedLastChange = StringUtils.replaceChars(xmlEncodedLastChange, "<>", null);
            if (fixedXmlEncodedLastChange.equals(xmlEncodedLastChange)) {
                // no change
                return xml;
            }

            log.warning("deleted invalid characters in LastChange");
        }

        String fixedXml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
                + "<e:propertyset xmlns:e=\"urn:schemas-upnp-org:event-1-0\">" + "<e:property>" + "<LastChange>"
                + fixedXmlEncodedLastChange + "</LastChange>" + "</e:property>" + "</e:propertyset>";

        return fixedXml;

    }

    return xml;
}

From source file:org.echocat.jomon.runtime.jaxb.PatternAdapter.java

@Nonnull
protected String toFlagsAsString(@Nonnegative int flags) {
    final StringBuilder sb = new StringBuilder();
    if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
        sb.append('i');
    }/*w  ww. ja v  a2 s . c  o  m*/
    if ((flags & Pattern.MULTILINE) != 0) {
        sb.append('m');
    }
    if ((flags & Pattern.DOTALL) != 0) {
        sb.append('s');
    }
    if ((flags & Pattern.COMMENTS) != 0) {
        sb.append('x');
    }
    return sb.toString();
}

From source file:com.gorillalogic.fonemonkey.PropertyUtil.java

/**
 * A better wildcard pattern matcher (written by Justin).
 * // ww  w.j av  a  2 s .  c  om
 * @param src
 *            the string to search for the pattern
 * @param pattern
 *            the wildcard pattern (containing * and ?)
 * @return true if matches, otherwise false
 */
private static boolean matchWildcard(String src, String pattern) {
    // first, escape everything in the pattern that's not a wildcard char (either * or ?)
    StringBuilder sb = new StringBuilder();
    for (char c : pattern.toCharArray()) {
        if ("*?".indexOf(c) != -1) {
            sb.append(c);
        } else {
            // not wildcard char, so escape it
            sb.append("\\Q").append(c).append("\\E");
        }
    }

    // 1. replace * (or repeated *'s) with .*
    // 2. replace ? with .
    pattern = sb.toString().replaceAll("\\*+", ".*").replaceAll("\\?", ".");

    Pattern p = Pattern.compile(pattern, Pattern.DOTALL | Pattern.MULTILINE);

    return p.matcher(src).matches();
}

From source file:org.apache.pdfbox.pdfparser.VisualSignatureParser.java

private void skipToNextObj() throws IOException {
    byte[] b = new byte[16];
    Pattern p = Pattern.compile("\\d+\\s+\\d+\\s+obj.*", Pattern.DOTALL);
    /* Read a buffer of data each time to see if it starts with a
     * known keyword. This is not the most efficient design, but we should
     * rarely be needing this function. We could update this to use the
     * circular buffer, like in readUntilEndStream().
     *///from w  w w .jav a  2s  . c o  m
    while (!pdfSource.isEOF()) {
        int l = pdfSource.read(b);
        if (l < 1) {
            break;
        }
        String s = new String(b, "US-ASCII");
        if (s.startsWith("trailer") || s.startsWith("xref") || s.startsWith("startxref")
                || s.startsWith("stream") || p.matcher(s).matches()) {
            pdfSource.unread(b);
            break;
        } else {
            pdfSource.unread(b, 1, l - 1);
        }
    }
}

From source file:com.globalsight.util.edit.SegmentUtil.java

public List<String> getInternalWords(String src) {
    List<String> words = new ArrayList<String>();
    List<String> ids = new ArrayList<String>();
    ids.addAll(InternalTextUtil.getInternalIndex(src));

    for (int i = 0; i < ids.size(); i++) {
        String id = (String) ids.get(i);
        Object[] ob = { id };/*from   w  w w  .  ja v a  2s  . co  m*/
        String regex = MessageFormat.format(REGEX_ALL, ob);
        Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
        Matcher matcher = pattern.matcher(src);

        while (matcher.find()) {
            String s = matcher.group(1);
            s = s.replaceAll("<[^/]*>.*?</.*?>", "");
            words.add(s);
        }

        String regex2 = MessageFormat.format(REGEX_ALL_2, ob);
        Pattern pattern2 = Pattern.compile(regex2, Pattern.DOTALL);
        Matcher matcher2 = pattern2.matcher(src);

        while (matcher2.find()) {
            String s = matcher2.group(1);
            s = s.replaceAll("<[^/]*>.*?</.*?>", "");
            words.add(s);
        }
    }

    return words;
}

From source file:org.apache.hadoop.hbase.filter.RegexStringComparator.java

/**
 * Constructor/*from  www  .  j  a v  a 2s  .  c om*/
 * Adds Pattern.DOTALL to the underlying Pattern
 * @param expr a valid regular expression
 */
public RegexStringComparator(String expr) {
    this(expr, Pattern.DOTALL);
}