Example usage for org.apache.commons.lang3 StringUtils countMatches

List of usage examples for org.apache.commons.lang3 StringUtils countMatches

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils countMatches.

Prototype

public static int countMatches(final CharSequence str, final char ch) 

Source Link

Document

Counts how many times the char appears in the given string.

A null or empty ("") String input returns 0 .

 StringUtils.countMatches(null, *)       = 0 StringUtils.countMatches("", *)         = 0 StringUtils.countMatches("abba", 0)  = 0 StringUtils.countMatches("abba", 'a')   = 2 StringUtils.countMatches("abba", 'b')  = 2 StringUtils.countMatches("abba", 'x') = 0 

Usage

From source file:org.languagetool.dev.wikipedia.SuggestionReplacerTest.java

@Test
public void testCompleteText() throws Exception {
    InputStream stream = SuggestionReplacerTest.class
            .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia.txt");
    String origMarkup = IOUtils.toString(stream, "utf-8");
    JLanguageTool langTool = new JLanguageTool(new GermanyGerman() {
        @Override//  w w  w.  j a v  a  2 s.c o  m
        protected synchronized List<AbstractPatternRule> getPatternRules() {
            return Collections.emptyList();
        }
    });
    langTool.disableRule(GermanSpellerRule.RULE_ID);
    langTool.disableRule("DE_AGREEMENT");
    langTool.disableRule("GERMAN_WORD_REPEAT_BEGINNING_RULE");
    langTool.disableRule("COMMA_PARENTHESIS_WHITESPACE");
    langTool.disableRule("DE_CASE");
    langTool.disableRule("ABKUERZUNG_LEERZEICHEN");
    langTool.disableRule("TYPOGRAFISCHE_ANFUEHRUNGSZEICHEN");
    langTool.disableRule("OLD_SPELLING");
    langTool.disableRule("DE_TOO_LONG_SENTENCE_40");
    langTool.disableRule("PUNCTUATION_PARAGRAPH_END");
    PlainTextMapping mapping = filter.filter(origMarkup);
    List<RuleMatch> matches = langTool.check(mapping.getPlainText());
    assertThat("Expected 3 matches, got: " + matches, matches.size(), is(3));
    int oldPos = 0;
    for (RuleMatch match : matches) {
        SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup,
                new ErrorMarker("<s>", "</s>"));
        List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match);
        assertThat(ruleMatchApplications.size(), is(1));
        RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0);
        assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(),
                "absichtlicher absichtlicher"), is(2));
        int pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher</s> Fehler");
        if (pos == -1) {
            // markup area varies because our mapping is sometimes a bit off:
            pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher Fehler</s>");
        }
        assertTrue("Found correction at: " + pos, pos > oldPos);
        oldPos = pos;
    }
}

From source file:org.languagetool.dev.wikipedia.SuggestionReplacerTest.java

@Test
public void testCompleteText2() throws Exception {
    InputStream stream = SuggestionReplacerTest.class
            .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia2.txt");
    String origMarkup = IOUtils.toString(stream, "utf-8");
    JLanguageTool langTool = new JLanguageTool(germanyGerman);
    PlainTextMapping mapping = filter.filter(origMarkup);
    langTool.disableRule("PUNCTUATION_PARAGRAPH_END"); //  added to prevent crash; TODO: check if needed
    List<RuleMatch> matches = langTool.check(mapping.getPlainText());
    assertTrue("Expected >= 30 matches, got: " + matches, matches.size() >= 30);
    for (RuleMatch match : matches) {
        SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup,
                new ErrorMarker("<s>", "</s>"));
        List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match);
        if (ruleMatchApplications.isEmpty()) {
            continue;
        }/*from   ww  w . j a  v  a2  s  .c o m*/
        RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0);
        assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(), "<s>"), is(1));
    }
}

From source file:org.languagetool.server.ApiV2Test.java

@Test
public void testLanguages() throws IOException {
    String json = new ApiV2(null, null).getLanguages();
    assertTrue(json.contains("\"German (Germany)\""));
    assertTrue(json.contains("\"de\""));
    assertTrue(json.contains("\"de-DE\""));
    assertTrue(StringUtils.countMatches(json, "\"name\"") >= 43);
}

From source file:org.languagetool.server.UserDictTest.java

@Test
public void testHTTPServer() throws Exception {
    HTTPServerConfig config = new HTTPServerConfig(HTTPTools.getDefaultPort());
    config.setDatabaseDriver("org.hsqldb.jdbcDriver");
    config.setDatabaseUrl("jdbc:hsqldb:mem:testdb");
    config.setDatabaseUsername("");
    config.setDatabasePassword("");
    config.setSecretTokenKey("myfoo");
    config.setCacheSize(100);//from   www.j a va 2 s  .c  o  m
    DatabaseAccess.init(config);
    // no need to also create test tables for logging
    DatabaseLogger.getInstance().disableLogging();
    try {
        DatabaseAccess.createAndFillTestTables();
        HTTPServer server = new HTTPServer(config);
        try {
            server.run();
            Language enUS = Languages.getLanguageForShortCode("en-US");
            runTests(enUS, "This is Mysurname.", "This is Mxsurname.", "Mysurname", "MORFOLOGIK_RULE_EN_US");
            runTests(enUS, "Mysurname is my name.", "Mxsurname is my name.", "Mysurname",
                    "MORFOLOGIK_RULE_EN_US");
            Language deDE = Languages.getLanguageForShortCode("de-DE");
            runTests(deDE, "Das ist Meinname.", "Das ist Mxinname.", "Meinname", "GERMAN_SPELLER_RULE");
            runTests(deDE, "Meinname steht hier.", "Mxinname steht hier.", "Meinname", "GERMAN_SPELLER_RULE");
            runTests(deDE, "Hier steht Schckl.", "Das ist Schckl.", "Schckl", "GERMAN_SPELLER_RULE");
            String res = check(deDE, "Hier steht Schockl", USERNAME1, API_KEY1);
            assertThat(StringUtils.countMatches(res, "GERMAN_SPELLER_RULE"), is(1)); // 'Schckl' accepted, but not 'Schockl' (NOTE: depends on encoding/collation of database) 
            try {
                System.out.println("=== Testing multi word insertion now, ignore stack trace: ===");
                addWord("multi word", USERNAME1, API_KEY1);
                fail("Should not be able to insert multi words");
            } catch (IOException ignore) {
            }
        } finally {
            server.stop();
        }
    } finally {
        DatabaseAccess.deleteTestTables();
    }
}

From source file:org.languagetool.server.UserDictTest.java

private String assertRuleMatch(int expectedTypoCount, String input, Language lang, String errorRuleId,
        String username, String apiKey) throws IOException {
    String json = check(lang, input, username, apiKey);
    int realTypoCount = StringUtils.countMatches(json, errorRuleId);
    //System.out.println(json);
    assertThat("Expected " + expectedTypoCount + " rule matches (id " + errorRuleId + ") for '" + input
            + "', got " + realTypoCount, realTypoCount, is(expectedTypoCount));
    return json;/*from www .j  a  va2 s.  com*/
}

From source file:org.languagetool.tagging.uk.CompoundTagger.java

@Nullable
private List<AnalyzedToken> doGuessCompoundTag(String word) {
    int dashIdx = word.lastIndexOf('-');
    if (dashIdx == word.length() - 1)
        return null;

    int firstDashIdx = word.indexOf('-');
    if (firstDashIdx == 0)
        return null;

    boolean startsWithDigit = Character.isDigit(word.charAt(0));

    if (!startsWithDigit && dashIdx != firstDashIdx) {
        int dashCount = StringUtils.countMatches(word, "-");

        if (dashCount >= 2 && dashIdx > firstDashIdx + 1) {
            List<AnalyzedToken> tokens = doGuessMultiHyphens(word, firstDashIdx, dashIdx);
            if (tokens != null)
                return tokens;
        }/*w  w w  .  j av a2s  . c om*/

        if (dashCount == 2 && dashIdx > firstDashIdx + 1) {
            return doGuessTwoHyphens(word, firstDashIdx, dashIdx);
        }

        return null;
    }

    String leftWord = word.substring(0, dashIdx);
    String rightWord = word.substring(dashIdx + 1);

    boolean dashPrefixMatch = dashPrefixes.contains(leftWord) || dashPrefixes.contains(leftWord.toLowerCase())
            || DASH_PREFIX_LAT_PATTERN.matcher(leftWord).matches();

    if (!dashPrefixMatch && (startsWithDigit || word.matches("[XLIV]+-.*"))) {
        return matchDigitCompound(word, leftWord, rightWord);
    }

    if (Character.isDigit(rightWord.charAt(0))) {
        return matchNumberedProperNoun(word, leftWord, rightWord);
    }

    // ..., ... ?? 
    //TODO:   : -?
    if (LEFT_INVALID.contains(leftWord.toLowerCase())) {
        List<TaggedWord> rightWdList = tagEitherCase(rightWord);

        rightWdList = PosTagHelper.filter2(rightWdList, Pattern.compile("(noun|adj)(?!.*pron).*"));

        if (rightWdList.isEmpty())
            return null;

        String lemma = leftWord + "-" + rightWdList.get(0).getLemma();
        String extraTag = StringTools.isCapitalizedWord(rightWord) ? "" : ":bad";
        rightWdList = PosTagHelper.addIfNotContains(rightWdList, extraTag, lemma);
        return ukrainianTagger.asAnalyzedTokenListForTaggedWordsInternal(word, rightWdList);
    }

    // wrong: -
    if (leftWord.equalsIgnoreCase("") && Character.isLowerCase(rightWord.charAt(0)))
        return null;

    List<TaggedWord> leftWdList = tagAsIsAndWithLowerCase(leftWord);

    // ?-, -, -, -, -

    if (rightPartsWithLeftTagMap.containsKey(rightWord) && !PosTagHelper.hasPosTagPart2(leftWdList, "abbr")) {

        if (leftWdList.isEmpty())
            return null;

        Pattern leftTagRegex = rightPartsWithLeftTagMap.get(rightWord);

        List<AnalyzedToken> leftAnalyzedTokens = ukrainianTagger
                .asAnalyzedTokenListForTaggedWordsInternal(leftWord, leftWdList);
        List<AnalyzedToken> newAnalyzedTokens = new ArrayList<>(leftAnalyzedTokens.size());

        // ignore -
        if (rightWord.equals("")
                && LemmaHelper.hasLemma(leftAnalyzedTokens, Arrays.asList("", "", "")))
            return null;

        for (AnalyzedToken analyzedToken : leftAnalyzedTokens) {
            String posTag = analyzedToken.getPOSTag();
            if (posTag != null && (leftWord.equals("") && posTag.contains("adv"))
                    || (leftTagRegex.matcher(posTag).matches())) {
                newAnalyzedTokens.add(new AnalyzedToken(word, posTag, analyzedToken.getLemma()));
            }
        }

        return newAnalyzedTokens.isEmpty() ? null : newAnalyzedTokens;
    }

    // -?, -?

    if (leftWord.equalsIgnoreCase("") && rightWord.endsWith("?")) {
        rightWord += "";
    }

    // ??-

    if (Character.isUpperCase(leftWord.charAt(0)) && LemmaHelper.CITY_AVENU.contains(rightWord)) {
        return PosTagHelper.generateTokensForNv(word, "f", ":prop");
    }

    List<TaggedWord> rightWdList = tagEitherCase(rightWord);

    if (rightWdList.isEmpty()) {

        if (word.startsWith("")) {
            // ?-?
            Matcher napivMatcher = Pattern.compile("(.+?)-(.+)").matcher(word);
            if (napivMatcher.matches()) {
                List<TaggedWord> napivLeftWdList = tagAsIsAndWithLowerCase(napivMatcher.group(1));
                List<TaggedWord> napivRightWdList = tagAsIsAndWithLowerCase(napivMatcher.group(2));

                List<AnalyzedToken> napivLeftAnalyzedTokens = ukrainianTagger
                        .asAnalyzedTokenListForTaggedWordsInternal(napivMatcher.group(1), napivLeftWdList);
                List<AnalyzedToken> napivRightAnalyzedTokens = ukrainianTagger
                        .asAnalyzedTokenListForTaggedWordsInternal(napivMatcher.group(2), napivRightWdList);

                List<AnalyzedToken> tagMatch = tagMatch(word, napivLeftAnalyzedTokens,
                        napivRightAnalyzedTokens);
                if (tagMatch != null) {
                    return tagMatch;
                }
            }
        }

        return null;
    }

    List<AnalyzedToken> rightAnalyzedTokens = ukrainianTagger
            .asAnalyzedTokenListForTaggedWordsInternal(rightWord, rightWdList);

    // -
    if (leftWord.length() == 1 && Character.isUpperCase(leftWord.charAt(0))
            && LemmaHelper.hasLemma(rightAnalyzedTokens, Arrays.asList(""))) {

        return generateTokensWithRighInflected(word, leftWord, rightAnalyzedTokens, IPOSTag.adj.getText());
    }

    if (leftWord.equalsIgnoreCase("")) {
        if (rightWord.endsWith("")) {
            return poAdvMatch(word, rightAnalyzedTokens, ADJ_TAG_FOR_PO_ADV_MIS);
        } else if (rightWord.endsWith("?")) {
            return poAdvMatch(word, rightAnalyzedTokens, ADJ_TAG_FOR_PO_ADV_NAZ);
        }
        return null;
    }

    // exclude: -, ?-

    List<AnalyzedToken> leftAnalyzedTokens = ukrainianTagger.asAnalyzedTokenListForTaggedWordsInternal(leftWord,
            leftWdList);

    if (PosTagHelper.hasPosTagPart(leftAnalyzedTokens, "&pron")
            && !PosTagHelper.hasPosTagPart(leftAnalyzedTokens, "numr"))
        return null;

    if (!leftWord.equalsIgnoreCase(rightWord)
            && PosTagHelper.hasPosTag(rightAnalyzedTokens, "(part|conj).*|.*?:&pron.*")
            && !(PosTagHelper.hasPosTag(leftAnalyzedTokens, "numr.*")
                    && PosTagHelper.hasPosTag(rightAnalyzedTokens, "numr.*")))
        return null;

    // -

    if (Character.isUpperCase(rightWord.charAt(0))) {
        if (word.startsWith("-")) {
            List<AnalyzedToken> newAnalyzedTokens = new ArrayList<>(rightAnalyzedTokens.size());

            for (AnalyzedToken rightAnalyzedToken : rightAnalyzedTokens) {
                String rightPosTag = rightAnalyzedToken.getPOSTag();

                if (rightPosTag == null)
                    continue;

                if (NOUN_SING_V_ROD_REGEX.matcher(rightPosTag).matches()) {
                    for (String vid : PosTagHelper.VIDMINKY_MAP.keySet()) {
                        if (vid.equals("v_kly"))
                            continue;
                        String posTag = rightPosTag.replace("v_rod", vid) + ":ua_1992";
                        newAnalyzedTokens.add(new AnalyzedToken(word, posTag, word));
                    }
                }
            }

            return newAnalyzedTokens;
        } else {
            // we don't want ?- but want ???-?
            if (StringTools.isCapitalizedWord(rightWord) || leftWord.endsWith("")
                    || PosTagHelper.hasPosTag(rightAnalyzedTokens, Pattern.compile("adj.*"))) {

                // tag ?/noun  ? adj
                List<TaggedWord> rightWdList2 = tagAsIsAndWithLowerCase(rightWord);
                List<AnalyzedToken> rightAnalyzedTokens2 = ukrainianTagger
                        .asAnalyzedTokenListForTaggedWordsInternal(rightWord, rightWdList2);

                List<AnalyzedToken> match = tryOWithAdj(word, leftWord, rightAnalyzedTokens2);
                if (match != null)
                    return match;
            }

            return null;
        }
    }

    // TODO: ua_2019
    // ?-?

    if (dashPrefixMatch) {
        List<AnalyzedToken> newTokens = new ArrayList<>();
        if (leftWord.length() == 1 && leftWord.matches("[a-zA-Z--]")) {
            List<AnalyzedToken> newTokensAdj = getNvPrefixLatWithAdjMatch(word, rightAnalyzedTokens, leftWord);
            if (newTokensAdj != null) {
                newTokens.addAll(newTokensAdj);
            }
        }
        List<AnalyzedToken> newTokensNoun = getNvPrefixNounMatch(word, rightAnalyzedTokens, leftWord);
        if (newTokensNoun != null) {
            newTokens.addAll(newTokensNoun);
        }
        return newTokens;
    }

    // don't allow: -, -, ?-

    // allow -!

    if (!PosTagHelper.hasPosTag(leftAnalyzedTokens, "intj.*")) {
        String noDashWord = word.replace("-", "");
        List<TaggedWord> noDashWordList = tagAsIsAndWithLowerCase(noDashWord);
        List<AnalyzedToken> noDashAnalyzedTokens = ukrainianTagger
                .asAnalyzedTokenListForTaggedWordsInternal(noDashWord, noDashWordList);

        if (!noDashAnalyzedTokens.isEmpty())
            return null;
    }

    // -, -, -

    if (!leftWdList.isEmpty() && leftWord.length() > 2) {
        List<AnalyzedToken> tagMatch = tagMatch(word, leftAnalyzedTokens, rightAnalyzedTokens);
        if (tagMatch != null) {
            return tagMatch;
        }
    }

    List<AnalyzedToken> match = tryOWithAdj(word, leftWord, rightAnalyzedTokens);
    if (match != null)
        return match;

    compoundDebugLogger.logUnknownCompound(word);

    return null;
}

From source file:org.languagetool.tools.RuleMatchAsXmlSerializerTest.java

@Test
public void testLanguageAttributes() throws IOException {
    String xml1 = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, NORMAL_API, LANG,
            Collections.<String>emptyList());
    assertTrue(xml1.contains("shortname=\"xx-XX\""));
    assertTrue(xml1.contains("name=\"Testlanguage\""));
    String xml2 = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, LANG,
            new FakeLanguage());
    assertTrue(xml2.contains("shortname=\"xx-XX\""));
    assertTrue(xml2.contains("name=\"Testlanguage\""));
    assertTrue(xml2.contains("shortname=\"yy\""));
    assertTrue(xml2.contains("name=\"FakeLanguage\""));
    assertThat(StringUtils.countMatches(xml2, "<matches"), is(1));
    assertThat(StringUtils.countMatches(xml2, "</matches>"), is(1));
}

From source file:org.languagetool.tools.RuleMatchAsXmlSerializerTest.java

@Test
public void testApiModes() throws IOException {
    String xmlStart = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, START_API,
            LANG, Collections.<String>emptyList());
    assertThat(StringUtils.countMatches(xmlStart, "<matches"), is(1));
    assertThat(StringUtils.countMatches(xmlStart, "</matches>"), is(0));
    String xmlMiddle = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, CONTINUE_API,
            LANG, Collections.<String>emptyList());
    assertThat(StringUtils.countMatches(xmlMiddle, "<matches"), is(0));
    assertThat(StringUtils.countMatches(xmlMiddle, "</matches>"), is(0));
    String xmlEnd = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, END_API, LANG,
            Collections.<String>emptyList());
    assertThat(StringUtils.countMatches(xmlEnd, "<matches"), is(0));
    assertThat(StringUtils.countMatches(xmlEnd, "</matches>"), is(1));
    String xml = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, NORMAL_API, LANG,
            Collections.<String>emptyList());
    assertThat(StringUtils.countMatches(xml, "<matches"), is(1));
    assertThat(StringUtils.countMatches(xml, "</matches>"), is(1));
}

From source file:org.ligoj.app.plugin.prov.aws.in.ProvAwsPriceImportResource.java

private void instalS3Price(final UpdateContext context, final AwsS3Price csv, final ProvLocation location) {
    // Resolve the type
    final String name = mapStorageToApi.get(csv.getVolumeType());
    if (name == null) {
        log.warn("Unknown storage type {}, ignored", csv.getVolumeType());
        return;//from w w  w .  j  a  v  a 2s . co  m
    }

    final ProvStorageType type = context.getStorageTypesMerged().computeIfAbsent(name, n -> {
        final ProvStorageType t = context.getStorageTypes().computeIfAbsent(name, n2 -> {
            // New storage type
            final ProvStorageType newType = new ProvStorageType();
            newType.setName(n2);
            newType.setNode(context.getNode());
            return newType;
        });

        // Update storage details
        t.setAvailability(toPercent(csv.getAvailability()));
        t.setDurability9(StringUtils.countMatches(StringUtils.defaultString(csv.getDurability()), '9'));
        t.setOptimized(ProvStorageOptimized.DURABILITY);
        t.setLatency(name.equals("glacier") ? Rate.WORST : Rate.MEDIUM);
        t.setDescription(
                "{\"class\":\"" + csv.getStorageClass() + "\",\"type\":\"" + csv.getVolumeType() + "\"}");
        stRepository.saveAndFlush(t);
        return t;
    });

    // Update the price as needed
    saveAsNeeded(context.getPreviousStorage().computeIfAbsent(location.getName() + name, r -> {
        final ProvStoragePrice p = new ProvStoragePrice();
        p.setLocation(location);
        p.setType(type);
        p.setCode(csv.getSku());
        return p;
    }), csv.getPricePerUnit(), p -> spRepository.save(p));
}

From source file:org.ligoj.app.resource.plugin.LigojPluginListener.java

/**
 * Determine the plug-in type and check it regarding the contact and the convention.
 *
 * @param plugin//w w  w.  j  a  v  a2s. co m
 *            The plug-in resource.
 * @return The checked {@link PluginType}
 */
protected PluginType determinePluginType(final ServicePlugin plugin) {
    // Determine the type from the key by convention
    final PluginType result = PluginType.values()[StringUtils.countMatches(plugin.getKey(), ':')];

    // Double check the convention with related interface
    final PluginType interfaceType;
    if (plugin instanceof ToolPlugin) {
        interfaceType = PluginType.TOOL;
    } else {
        interfaceType = PluginType.SERVICE;
    }
    if (interfaceType != result) {
        throw new TechnicalException(
                String.format("Incompatible type from the key (%s -> %s) vs type from the interface (%s)",
                        plugin.getKey(), result, interfaceType));
    }
    return result;
}