Example usage for org.apache.commons.lang3 StringUtils countMatches

List of usage examples for org.apache.commons.lang3 StringUtils countMatches

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils countMatches.

Prototype

public static int countMatches(final CharSequence str, final char ch) 

Source Link

Document

Counts how many times the char appears in the given string.

A null or empty ("") String input returns 0 .

 StringUtils.countMatches(null, *)       = 0 StringUtils.countMatches("", *)         = 0 StringUtils.countMatches("abba", 0)  = 0 StringUtils.countMatches("abba", 'a')   = 2 StringUtils.countMatches("abba", 'b')  = 2 StringUtils.countMatches("abba", 'x') = 0 

Usage

From source file:org.totschnig.myexpenses.export.CategoryInfo.java

/**
 *
 * @return name, whwere we get rid of class, reduce to two levels
 *//* w w  w.j a  v  a  2 s.  c om*/
private String extractCategoryName() {
    String result = getName();
    int i = result.indexOf('/');
    if (i != -1) {
        result = result.substring(0, i);
    }
    if (StringUtils.countMatches(result, ':') > 1) {
        String parts[] = result.split(":");
        result = parts[0] + ":" + parts[1];
    }
    return result;
}

From source file:org.trimou.engine.interpolation.BracketDotKeySplitter.java

@Override
public Iterator<String> split(final String key) {

    final int matches = StringUtils.countMatches(key, "[\"");

    if (matches == 0) {
        return super.split(key);
    }//from ww w. j av  a 2 s. c o m

    final Matcher matcher = pattern.matcher(key);
    final StringBuffer buffer = new StringBuffer();
    final Map<String, String> literalMap;
    int idx = 0;

    if (matches == 1) {
        if (matcher.find()) {
            literalMap = Collections.singletonMap(PREFIX + idx, matcher.group(2));
            matcher.appendReplacement(buffer, "." + PREFIX + idx);
        } else {
            literalMap = Collections.emptyMap();
        }
    } else {
        literalMap = new HashMap<String, String>(4);
        while (matcher.find()) {
            String id = PREFIX + idx;
            literalMap.put(id, matcher.group(2));
            matcher.appendReplacement(buffer, "." + id);
            idx++;
        }
    }
    matcher.appendTail(buffer);

    return Iterators.transform(super.split(buffer.toString()), new Function<String, String>() {
        @Override
        public String apply(String input) {
            return literalMap.containsKey(input) ? literalMap.get(input) : input;
        }
    });
}

From source file:org.voltdb.HsqlBackend.java

VoltTable runSQLWithSubstitutions(final SQLStmt stmt, ParameterSet params, byte[] paramJavaTypes) {
    //HSQLProcedureWrapper does nothing smart. it just implements this interface with runStatement()
    StringBuilder sqlOut = new StringBuilder(stmt.getText().length() * 2);

    assert (paramJavaTypes != null);

    int lastIndex = 0;
    String sql = stmt.getText();/*from w  ww.j a v  a  2  s.  com*/

    // if there's no ? in the statmemt, then zero out any auto-parameterization
    int paramCount = StringUtils.countMatches(sql, "?");
    if (paramCount == 0) {
        params = ParameterSet.emptyParameterSet();
        paramJavaTypes = new byte[0];
    }

    Object[] paramObjs = params.toArray();
    for (int i = 0; i < paramObjs.length; i++) {
        int nextIndex = sql.indexOf('?', lastIndex);
        if (nextIndex == -1)
            throw new RuntimeException("SQL Statement has more arguments than params.");
        sqlOut.append(sql, lastIndex, nextIndex);
        lastIndex = nextIndex + 1;

        VoltType type = VoltType.get(paramJavaTypes[i]);

        if (VoltType.isNullVoltType(paramObjs[i])) {
            sqlOut.append("NULL");
        } else if (paramObjs[i] instanceof TimestampType) {
            if (type != VoltType.TIMESTAMP)
                throw new RuntimeException("Inserting date into mismatched column type in HSQL.");
            TimestampType d = (TimestampType) paramObjs[i];
            // convert VoltDB's microsecond granularity to millis.
            Timestamp t = new Timestamp(d.getTime() / 1000);
            sqlOut.append('\'').append(t.toString()).append('\'');
        } else if (paramObjs[i] instanceof byte[]) {
            if (type == VoltType.STRING) {
                // Convert from byte[] -> String; escape single quotes
                try {
                    sqlOut.append(sqlEscape(new String((byte[]) paramObjs[i], "UTF-8")));
                } catch (UnsupportedEncodingException e) {
                    // should NEVER HAPPEN
                    System.err.println("FATAL: Your JVM doens't support UTF-&");
                    System.exit(-1);
                }
            } else if (type == VoltType.VARBINARY) {
                // Convert from byte[] -> String; using hex
                sqlOut.append(sqlEscape(Encoder.hexEncode((byte[]) paramObjs[i])));
            } else {
                throw new RuntimeException(
                        "Inserting string/varbinary (bytes) into mismatched column type in HSQL.");
            }
        } else if (paramObjs[i] instanceof String) {
            if (type != VoltType.STRING)
                throw new RuntimeException("Inserting string into mismatched column type in HSQL.");
            // Escape single quotes
            sqlOut.append(sqlEscape((String) paramObjs[i]));
        } else {
            if (type == VoltType.TIMESTAMP) {
                long t = Long.parseLong(paramObjs[i].toString());
                TimestampType d = new TimestampType(t);
                // convert VoltDB's microsecond granularity to millis
                Timestamp ts = new Timestamp(d.getTime() * 1000);
                sqlOut.append('\'').append(ts.toString()).append('\'');
            } else
                sqlOut.append(paramObjs[i].toString());
        }
    }
    sqlOut.append(sql, lastIndex, sql.length());

    return runDML(sqlOut.toString());
}

From source file:org.voltdb.planner.PlannerTestCase.java

/** A helper here where the junit test can assert success */
private List<AbstractPlanNode> compileWithJoinOrderToFragments(String sql, boolean planForSinglePartition,
        String joinOrder) {// w  w  w .j a va2 s. co m
    // Yes, we ARE assuming that test queries don't contain quoted question marks.
    int paramCount = StringUtils.countMatches(sql, "?");
    return compileWithJoinOrderToFragments(sql, paramCount, planForSinglePartition, joinOrder);
}

From source file:org.voltdb.planner.PlannerTestCase.java

private AbstractPlanNode compileWithCountedParamsAndJoinOrder(String sql, String joinOrder) throws Exception {
    // Yes, we ARE assuming that test queries don't contain quoted question marks.
    int paramCount = StringUtils.countMatches(sql, "?");
    return compileSPWithJoinOrder(sql, paramCount, joinOrder);
}

From source file:org.voltdb.planner.PlannerTestCase.java

/** A helper here where the junit test can assert success */
protected AbstractPlanNode compile(String sql) {
    // Yes, we ARE assuming that test queries don't contain quoted question marks.
    int paramCount = StringUtils.countMatches(sql, "?");
    return compileSPWithJoinOrder(sql, paramCount, null);
}

From source file:org.voltdb.planner.PlannerTestCase.java

/** A helper here where the junit test can assert success */
protected AbstractPlanNode compileForSinglePartition(String sql) {
    // Yes, we ARE assuming that test queries don't contain quoted question marks.
    int paramCount = StringUtils.countMatches(sql, "?");
    boolean m_infer = m_byDefaultInferPartitioning;
    boolean m_forceSP = m_byDefaultInferPartitioning;
    m_byDefaultInferPartitioning = false;
    m_byDefaultPlanForSinglePartition = true;

    AbstractPlanNode pn = compileSPWithJoinOrder(sql, paramCount, null);
    m_byDefaultInferPartitioning = m_infer;
    m_byDefaultPlanForSinglePartition = m_forceSP;
    return pn;/*from  ww w .  ja v a  2s  .co  m*/
}

From source file:org.voltdb.regressionsuites.TestInsertIntoSelectSuite.java

private static int numberOfParametersNeeded(String procName) {
    List<String> stmts = mapOfAllGeneratedStatements().get(procName);
    int numParams = StringUtils.countMatches(stmts.get(0), "?");
    return numParams;
}

From source file:org.voyanttools.trombone.input.extract.TikaExtractorTest.java

@Test
public void testFormats() throws IOException, URISyntaxException {
    Storage storage = TestHelper.getDefaultTestStorage();
    StoredDocumentSourceStorage storeDocumentSourceStorage = storage.getStoredDocumentSourceStorage();
    FlexibleParameters parameters = new FlexibleParameters();
    StoredDocumentSourceExtractor extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            parameters);/*from   w w  w.j  a v  a2 s . c o m*/

    InputSource inputSource;
    StoredDocumentSource storedDocumentSource;
    StoredDocumentSource extractedStoredDocumentSource;
    DocumentMetadata metadata;
    String contents;

    String line;

    line = FileUtils.readLines(TestHelper.getResource("formats/chars_utf8.txt")).get(0).trim();
    line = line.substring(line.indexOf("I"));

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars_utf8.txt"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("chars_utf8", metadata.getTitle());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have two paragraphs in text", StringUtils.countMatches(contents, "<p>") == 2);
    assertTrue("ensure we've escaped & in text", contents.contains("&amp;") == true);
    assertTrue("ensure we have some content in text", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.pages"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for Pages document", "Titre du document test de Pages", metadata.getTitle());
    assertEquals("author for Pages document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for Pages document", "test, Pages", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in Pages", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.doc"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for MSWord (.doc) document", "Titre du document test de MSWord", metadata.getTitle());
    assertEquals("author for MSWord (.doc) document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for MSWord (.doc) document", "test, MSWord", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in MSWord (.doc)", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.docx"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for MSWord (.docx) document", "Titre du document test de MSWord", metadata.getTitle());
    assertEquals("author for MSWord (.docx) document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for MSWord (.docx) document", "test, MSWord", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in MSWord (.docx)", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.rtf"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for RTF document", "Titre du document test de RTF", metadata.getTitle());
    assertEquals("author for RTF document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for RTF document", "test, RTF", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in RTF", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.pdf"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for PDF document", "Titre du document test de PDF", metadata.getTitle());
    assertEquals("author for PDF document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for PDF document", "test, PDF", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in PDF", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars_utf8.htm"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for HTML document", "Titre du document test de HTML", metadata.getTitle());
    assertEquals("author for HTML document", "Stfan Sinclair", metadata.getAuthor());
    assertEquals("keywords for HTML document", "test, HTML", metadata.getKeywords());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("strip out script tag from html", contents.contains("script") == false);
    assertTrue("strip out style tag from html", contents.contains("style") == false);
    assertTrue("ensure we have some content in html", contents.contains(line) == true);

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars.xlsx"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for XLSX document", "chars", metadata.getTitle());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("strip out script tag from html", contents.contains("script") == false);
    assertTrue("strip out style tag from html", contents.contains("style") == false);
    assertTrue("ensure we have some content in html", contents.contains(line) == true);

    storage.destroy();
}

From source file:org.voyanttools.trombone.input.extract.XmlExtractorTest.java

@Test
public void test() throws IOException {
    Storage storage = TestHelper.getDefaultTestStorage();
    StoredDocumentSourceStorage storeDocumentSourceStorage = storage.getStoredDocumentSourceStorage();
    FlexibleParameters parameters = new FlexibleParameters();
    StoredDocumentSourceExtractor extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            parameters);/*from w  w  w.  j av a2  s  .  c  om*/

    InputSource inputSource;
    StoredDocumentSource storedDocumentSource;
    StoredDocumentSource extractedStoredDocumentSource;
    DocumentMetadata metadata;
    String contents;

    String line = FileUtils.readLines(TestHelper.getResource("formats/chars_utf8.txt")).get(0).trim();
    line = line.substring(line.indexOf("I"));

    inputSource = new FileInputSource(TestHelper.getResource("formats/chars_utf8.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("", metadata.getTitle());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have some content in XML", contents.contains(line) == true);

    // try with xmlContentXpath parameter and multiple nodes
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "xmlContentXpath=//p" }));
    inputSource = new FileInputSource(TestHelper.getResource("formats/chars_utf8.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("title for XML document", "", metadata.getTitle());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have stripped out other content", contents.contains("<body>") == false);
    assertTrue("ensure we have some content in XML with multiple nodes for the xmlContentXPath parameter",
            contents.contains(line) == true);

    // try with xmlContentXpath parameter and single node
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "xmlContentXpath=//body" }));
    inputSource = new FileInputSource(TestHelper.getResource("formats/chars_utf8.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("title for XML document", "", metadata.getTitle());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue("ensure we have stripped out other content", contents.contains("<head>") == false);
    assertTrue("ensure we have some content in XML with a single node xmlContentXpath parameter",
            contents.contains(line) == true);

    // try with RSS input format implicit (no inputFormat)
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, new FlexibleParameters());
    inputSource = new FileInputSource(TestHelper.getResource("xml/rss.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("title for RSS feed", "Website Feed", metadata.getTitle());
    //      assertEquals("author for RSS feed", "Me (me@example.com)", metadata.getAuthor());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertFalse(contents.contains("<!--")); // make sure we've stripped out XML comments during extraction
    assertTrue("ensure we have stripped out other content in RSS feed", contents.contains("<link>") == false);
    assertTrue("ensure we have three lines of description in RSS feed",
            StringUtils.countMatches(contents, "<description>") == 2);

    // try with RSS input format (explicit)
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "inputFormat=RSS" }));
    inputSource = new FileInputSource(TestHelper.getResource("xml/rss.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("title for RSS feed", "Website Feed", metadata.getTitle());
    //      assertEquals("author for RSS feed", "Me (me@example.com)", metadata.getAuthor());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertFalse(contents.contains("<!--")); // make sure we've stripped out XML comments during extraction
    assertTrue("ensure we have stripped out other content in RSS feed", contents.contains("<link>") == false);
    assertTrue("ensure we have three lines of description in RSS feed",
            StringUtils.countMatches(contents, "<description>") == 2);

    // try with XML
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "inputFormat=XML" }));
    inputSource = new FileInputSource(TestHelper.getResource("xml/rss.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals(0, metadata.getTitle().length());
    //      assertEquals("author for RSS feed", "Me (me@example.com)", metadata.getAuthor());
    contents = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertFalse(contents.contains("<!--")); // make sure we've stripped out XML comments during extraction

    // make sure that we can keep multiple values for metadata
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "xmlTitleXpath=//title" }));
    inputSource = new FileInputSource(TestHelper.getResource("xml/rss.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    assertEquals("title for RSS feed", "Website Feed", metadata.getTitle());

    // make sure we can join string values
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "xmlTitleXpath=string-join(//title,'--')" }));
    inputSource = new FileInputSource(TestHelper.getResource("xml/rss.xml"));
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals("Website Feed--A Special Event--Announcing new Products", metadata.getTitle());

    // make sure we recognize XML in a string
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, new FlexibleParameters());
    inputSource = new StringInputSource("<a><b>c</b><b>d</b></a>");
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals(DocumentFormat.XML, metadata.getDocumentFormat());

    // make sure we recognize HTML in a string
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, new FlexibleParameters());
    inputSource = new StringInputSource("<html><body><div>c</div><div>d</div></body></html>");
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals(DocumentFormat.HTML, metadata.getDocumentFormat());

    // make sure we find XPath in string XML
    extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage,
            new FlexibleParameters(new String[] { "xmlContentXpath=//b", "xmlTitleXpath=//b[1]" }));
    inputSource = new StringInputSource("<a><b>c</b><b>d &amp; e</b><z>x</z></a>");
    storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
    extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
    metadata = extractedStoredDocumentSource.getMetadata();
    // this should be blank rather than the title tag (for generic XML)
    assertEquals(DocumentFormat.XML, metadata.getDocumentFormat());
    assertEquals("c", metadata.getTitle());
    String string = IOUtils.toString(storeDocumentSourceStorage
            .getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
    assertTrue(string.contains("<a>") && string.contains("<b>") && !string.contains("<z>"));

    storage.destroy();

}