List of usage examples for org.apache.commons.lang3 StringUtils countMatches
public static int countMatches(final CharSequence str, final char ch)
Counts how many times the char appears in the given string.
A null or empty ("") String input returns 0 .
StringUtils.countMatches(null, *) = 0 StringUtils.countMatches("", *) = 0 StringUtils.countMatches("abba", 0) = 0 StringUtils.countMatches("abba", 'a') = 2 StringUtils.countMatches("abba", 'b') = 2 StringUtils.countMatches("abba", 'x') = 0
From source file:org.languagetool.dev.wikipedia.SuggestionReplacerTest.java
@Test public void testCompleteText() throws Exception { InputStream stream = SuggestionReplacerTest.class .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia.txt"); String origMarkup = IOUtils.toString(stream, "utf-8"); JLanguageTool langTool = new JLanguageTool(new GermanyGerman() { @Override// w w w. j a v a 2 s.c o m protected synchronized List<AbstractPatternRule> getPatternRules() { return Collections.emptyList(); } }); langTool.disableRule(GermanSpellerRule.RULE_ID); langTool.disableRule("DE_AGREEMENT"); langTool.disableRule("GERMAN_WORD_REPEAT_BEGINNING_RULE"); langTool.disableRule("COMMA_PARENTHESIS_WHITESPACE"); langTool.disableRule("DE_CASE"); langTool.disableRule("ABKUERZUNG_LEERZEICHEN"); langTool.disableRule("TYPOGRAFISCHE_ANFUEHRUNGSZEICHEN"); langTool.disableRule("OLD_SPELLING"); langTool.disableRule("DE_TOO_LONG_SENTENCE_40"); langTool.disableRule("PUNCTUATION_PARAGRAPH_END"); PlainTextMapping mapping = filter.filter(origMarkup); List<RuleMatch> matches = langTool.check(mapping.getPlainText()); assertThat("Expected 3 matches, got: " + matches, matches.size(), is(3)); int oldPos = 0; for (RuleMatch match : matches) { SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup, new ErrorMarker("<s>", "</s>")); List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match); assertThat(ruleMatchApplications.size(), is(1)); RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0); assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(), "absichtlicher absichtlicher"), is(2)); int pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher</s> Fehler"); if (pos == -1) { // markup area varies because our mapping is sometimes a bit off: pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher Fehler</s>"); } assertTrue("Found correction at: " + pos, pos > oldPos); oldPos = pos; } }
From source file:org.languagetool.dev.wikipedia.SuggestionReplacerTest.java
@Test public void testCompleteText2() throws Exception { InputStream stream = SuggestionReplacerTest.class .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia2.txt"); String origMarkup = IOUtils.toString(stream, "utf-8"); JLanguageTool langTool = new JLanguageTool(germanyGerman); PlainTextMapping mapping = filter.filter(origMarkup); langTool.disableRule("PUNCTUATION_PARAGRAPH_END"); // added to prevent crash; TODO: check if needed List<RuleMatch> matches = langTool.check(mapping.getPlainText()); assertTrue("Expected >= 30 matches, got: " + matches, matches.size() >= 30); for (RuleMatch match : matches) { SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup, new ErrorMarker("<s>", "</s>")); List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match); if (ruleMatchApplications.isEmpty()) { continue; }/*from ww w . j a v a2 s .c o m*/ RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0); assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(), "<s>"), is(1)); } }
From source file:org.languagetool.server.ApiV2Test.java
@Test public void testLanguages() throws IOException { String json = new ApiV2(null, null).getLanguages(); assertTrue(json.contains("\"German (Germany)\"")); assertTrue(json.contains("\"de\"")); assertTrue(json.contains("\"de-DE\"")); assertTrue(StringUtils.countMatches(json, "\"name\"") >= 43); }
From source file:org.languagetool.server.UserDictTest.java
@Test public void testHTTPServer() throws Exception { HTTPServerConfig config = new HTTPServerConfig(HTTPTools.getDefaultPort()); config.setDatabaseDriver("org.hsqldb.jdbcDriver"); config.setDatabaseUrl("jdbc:hsqldb:mem:testdb"); config.setDatabaseUsername(""); config.setDatabasePassword(""); config.setSecretTokenKey("myfoo"); config.setCacheSize(100);//from www.j a va 2 s .c o m DatabaseAccess.init(config); // no need to also create test tables for logging DatabaseLogger.getInstance().disableLogging(); try { DatabaseAccess.createAndFillTestTables(); HTTPServer server = new HTTPServer(config); try { server.run(); Language enUS = Languages.getLanguageForShortCode("en-US"); runTests(enUS, "This is Mysurname.", "This is Mxsurname.", "Mysurname", "MORFOLOGIK_RULE_EN_US"); runTests(enUS, "Mysurname is my name.", "Mxsurname is my name.", "Mysurname", "MORFOLOGIK_RULE_EN_US"); Language deDE = Languages.getLanguageForShortCode("de-DE"); runTests(deDE, "Das ist Meinname.", "Das ist Mxinname.", "Meinname", "GERMAN_SPELLER_RULE"); runTests(deDE, "Meinname steht hier.", "Mxinname steht hier.", "Meinname", "GERMAN_SPELLER_RULE"); runTests(deDE, "Hier steht Schckl.", "Das ist Schckl.", "Schckl", "GERMAN_SPELLER_RULE"); String res = check(deDE, "Hier steht Schockl", USERNAME1, API_KEY1); assertThat(StringUtils.countMatches(res, "GERMAN_SPELLER_RULE"), is(1)); // 'Schckl' accepted, but not 'Schockl' (NOTE: depends on encoding/collation of database) try { System.out.println("=== Testing multi word insertion now, ignore stack trace: ==="); addWord("multi word", USERNAME1, API_KEY1); fail("Should not be able to insert multi words"); } catch (IOException ignore) { } } finally { server.stop(); } } finally { DatabaseAccess.deleteTestTables(); } }
From source file:org.languagetool.server.UserDictTest.java
private String assertRuleMatch(int expectedTypoCount, String input, Language lang, String errorRuleId, String username, String apiKey) throws IOException { String json = check(lang, input, username, apiKey); int realTypoCount = StringUtils.countMatches(json, errorRuleId); //System.out.println(json); assertThat("Expected " + expectedTypoCount + " rule matches (id " + errorRuleId + ") for '" + input + "', got " + realTypoCount, realTypoCount, is(expectedTypoCount)); return json;/*from www .j a va2 s. com*/ }
From source file:org.languagetool.tagging.uk.CompoundTagger.java
@Nullable private List<AnalyzedToken> doGuessCompoundTag(String word) { int dashIdx = word.lastIndexOf('-'); if (dashIdx == word.length() - 1) return null; int firstDashIdx = word.indexOf('-'); if (firstDashIdx == 0) return null; boolean startsWithDigit = Character.isDigit(word.charAt(0)); if (!startsWithDigit && dashIdx != firstDashIdx) { int dashCount = StringUtils.countMatches(word, "-"); if (dashCount >= 2 && dashIdx > firstDashIdx + 1) { List<AnalyzedToken> tokens = doGuessMultiHyphens(word, firstDashIdx, dashIdx); if (tokens != null) return tokens; }/*w w w . j av a2s . c om*/ if (dashCount == 2 && dashIdx > firstDashIdx + 1) { return doGuessTwoHyphens(word, firstDashIdx, dashIdx); } return null; } String leftWord = word.substring(0, dashIdx); String rightWord = word.substring(dashIdx + 1); boolean dashPrefixMatch = dashPrefixes.contains(leftWord) || dashPrefixes.contains(leftWord.toLowerCase()) || DASH_PREFIX_LAT_PATTERN.matcher(leftWord).matches(); if (!dashPrefixMatch && (startsWithDigit || word.matches("[XLIV]+-.*"))) { return matchDigitCompound(word, leftWord, rightWord); } if (Character.isDigit(rightWord.charAt(0))) { return matchNumberedProperNoun(word, leftWord, rightWord); } // ..., ... ?? //TODO: : -? if (LEFT_INVALID.contains(leftWord.toLowerCase())) { List<TaggedWord> rightWdList = tagEitherCase(rightWord); rightWdList = PosTagHelper.filter2(rightWdList, Pattern.compile("(noun|adj)(?!.*pron).*")); if (rightWdList.isEmpty()) return null; String lemma = leftWord + "-" + rightWdList.get(0).getLemma(); String extraTag = StringTools.isCapitalizedWord(rightWord) ? "" : ":bad"; rightWdList = PosTagHelper.addIfNotContains(rightWdList, extraTag, lemma); return ukrainianTagger.asAnalyzedTokenListForTaggedWordsInternal(word, rightWdList); } // wrong: - if (leftWord.equalsIgnoreCase("") && Character.isLowerCase(rightWord.charAt(0))) return null; List<TaggedWord> leftWdList = tagAsIsAndWithLowerCase(leftWord); // ?-, -, -, -, - if (rightPartsWithLeftTagMap.containsKey(rightWord) && !PosTagHelper.hasPosTagPart2(leftWdList, "abbr")) { if (leftWdList.isEmpty()) return null; Pattern leftTagRegex = rightPartsWithLeftTagMap.get(rightWord); List<AnalyzedToken> leftAnalyzedTokens = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(leftWord, leftWdList); List<AnalyzedToken> newAnalyzedTokens = new ArrayList<>(leftAnalyzedTokens.size()); // ignore - if (rightWord.equals("") && LemmaHelper.hasLemma(leftAnalyzedTokens, Arrays.asList("", "", ""))) return null; for (AnalyzedToken analyzedToken : leftAnalyzedTokens) { String posTag = analyzedToken.getPOSTag(); if (posTag != null && (leftWord.equals("") && posTag.contains("adv")) || (leftTagRegex.matcher(posTag).matches())) { newAnalyzedTokens.add(new AnalyzedToken(word, posTag, analyzedToken.getLemma())); } } return newAnalyzedTokens.isEmpty() ? null : newAnalyzedTokens; } // -?, -? if (leftWord.equalsIgnoreCase("") && rightWord.endsWith("?")) { rightWord += ""; } // ??- if (Character.isUpperCase(leftWord.charAt(0)) && LemmaHelper.CITY_AVENU.contains(rightWord)) { return PosTagHelper.generateTokensForNv(word, "f", ":prop"); } List<TaggedWord> rightWdList = tagEitherCase(rightWord); if (rightWdList.isEmpty()) { if (word.startsWith("")) { // ?-? Matcher napivMatcher = Pattern.compile("(.+?)-(.+)").matcher(word); if (napivMatcher.matches()) { List<TaggedWord> napivLeftWdList = tagAsIsAndWithLowerCase(napivMatcher.group(1)); List<TaggedWord> napivRightWdList = tagAsIsAndWithLowerCase(napivMatcher.group(2)); List<AnalyzedToken> napivLeftAnalyzedTokens = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(napivMatcher.group(1), napivLeftWdList); List<AnalyzedToken> napivRightAnalyzedTokens = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(napivMatcher.group(2), napivRightWdList); List<AnalyzedToken> tagMatch = tagMatch(word, napivLeftAnalyzedTokens, napivRightAnalyzedTokens); if (tagMatch != null) { return tagMatch; } } } return null; } List<AnalyzedToken> rightAnalyzedTokens = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(rightWord, rightWdList); // - if (leftWord.length() == 1 && Character.isUpperCase(leftWord.charAt(0)) && LemmaHelper.hasLemma(rightAnalyzedTokens, Arrays.asList(""))) { return generateTokensWithRighInflected(word, leftWord, rightAnalyzedTokens, IPOSTag.adj.getText()); } if (leftWord.equalsIgnoreCase("")) { if (rightWord.endsWith("")) { return poAdvMatch(word, rightAnalyzedTokens, ADJ_TAG_FOR_PO_ADV_MIS); } else if (rightWord.endsWith("?")) { return poAdvMatch(word, rightAnalyzedTokens, ADJ_TAG_FOR_PO_ADV_NAZ); } return null; } // exclude: -, ?- List<AnalyzedToken> leftAnalyzedTokens = ukrainianTagger.asAnalyzedTokenListForTaggedWordsInternal(leftWord, leftWdList); if (PosTagHelper.hasPosTagPart(leftAnalyzedTokens, "&pron") && !PosTagHelper.hasPosTagPart(leftAnalyzedTokens, "numr")) return null; if (!leftWord.equalsIgnoreCase(rightWord) && PosTagHelper.hasPosTag(rightAnalyzedTokens, "(part|conj).*|.*?:&pron.*") && !(PosTagHelper.hasPosTag(leftAnalyzedTokens, "numr.*") && PosTagHelper.hasPosTag(rightAnalyzedTokens, "numr.*"))) return null; // - if (Character.isUpperCase(rightWord.charAt(0))) { if (word.startsWith("-")) { List<AnalyzedToken> newAnalyzedTokens = new ArrayList<>(rightAnalyzedTokens.size()); for (AnalyzedToken rightAnalyzedToken : rightAnalyzedTokens) { String rightPosTag = rightAnalyzedToken.getPOSTag(); if (rightPosTag == null) continue; if (NOUN_SING_V_ROD_REGEX.matcher(rightPosTag).matches()) { for (String vid : PosTagHelper.VIDMINKY_MAP.keySet()) { if (vid.equals("v_kly")) continue; String posTag = rightPosTag.replace("v_rod", vid) + ":ua_1992"; newAnalyzedTokens.add(new AnalyzedToken(word, posTag, word)); } } } return newAnalyzedTokens; } else { // we don't want ?- but want ???-? if (StringTools.isCapitalizedWord(rightWord) || leftWord.endsWith("") || PosTagHelper.hasPosTag(rightAnalyzedTokens, Pattern.compile("adj.*"))) { // tag ?/noun ? adj List<TaggedWord> rightWdList2 = tagAsIsAndWithLowerCase(rightWord); List<AnalyzedToken> rightAnalyzedTokens2 = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(rightWord, rightWdList2); List<AnalyzedToken> match = tryOWithAdj(word, leftWord, rightAnalyzedTokens2); if (match != null) return match; } return null; } } // TODO: ua_2019 // ?-? if (dashPrefixMatch) { List<AnalyzedToken> newTokens = new ArrayList<>(); if (leftWord.length() == 1 && leftWord.matches("[a-zA-Z--]")) { List<AnalyzedToken> newTokensAdj = getNvPrefixLatWithAdjMatch(word, rightAnalyzedTokens, leftWord); if (newTokensAdj != null) { newTokens.addAll(newTokensAdj); } } List<AnalyzedToken> newTokensNoun = getNvPrefixNounMatch(word, rightAnalyzedTokens, leftWord); if (newTokensNoun != null) { newTokens.addAll(newTokensNoun); } return newTokens; } // don't allow: -, -, ?- // allow -! if (!PosTagHelper.hasPosTag(leftAnalyzedTokens, "intj.*")) { String noDashWord = word.replace("-", ""); List<TaggedWord> noDashWordList = tagAsIsAndWithLowerCase(noDashWord); List<AnalyzedToken> noDashAnalyzedTokens = ukrainianTagger .asAnalyzedTokenListForTaggedWordsInternal(noDashWord, noDashWordList); if (!noDashAnalyzedTokens.isEmpty()) return null; } // -, -, - if (!leftWdList.isEmpty() && leftWord.length() > 2) { List<AnalyzedToken> tagMatch = tagMatch(word, leftAnalyzedTokens, rightAnalyzedTokens); if (tagMatch != null) { return tagMatch; } } List<AnalyzedToken> match = tryOWithAdj(word, leftWord, rightAnalyzedTokens); if (match != null) return match; compoundDebugLogger.logUnknownCompound(word); return null; }
From source file:org.languagetool.tools.RuleMatchAsXmlSerializerTest.java
@Test public void testLanguageAttributes() throws IOException { String xml1 = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, NORMAL_API, LANG, Collections.<String>emptyList()); assertTrue(xml1.contains("shortname=\"xx-XX\"")); assertTrue(xml1.contains("name=\"Testlanguage\"")); String xml2 = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, LANG, new FakeLanguage()); assertTrue(xml2.contains("shortname=\"xx-XX\"")); assertTrue(xml2.contains("name=\"Testlanguage\"")); assertTrue(xml2.contains("shortname=\"yy\"")); assertTrue(xml2.contains("name=\"FakeLanguage\"")); assertThat(StringUtils.countMatches(xml2, "<matches"), is(1)); assertThat(StringUtils.countMatches(xml2, "</matches>"), is(1)); }
From source file:org.languagetool.tools.RuleMatchAsXmlSerializerTest.java
@Test public void testApiModes() throws IOException { String xmlStart = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, START_API, LANG, Collections.<String>emptyList()); assertThat(StringUtils.countMatches(xmlStart, "<matches"), is(1)); assertThat(StringUtils.countMatches(xmlStart, "</matches>"), is(0)); String xmlMiddle = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, CONTINUE_API, LANG, Collections.<String>emptyList()); assertThat(StringUtils.countMatches(xmlMiddle, "<matches"), is(0)); assertThat(StringUtils.countMatches(xmlMiddle, "</matches>"), is(0)); String xmlEnd = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, END_API, LANG, Collections.<String>emptyList()); assertThat(StringUtils.countMatches(xmlEnd, "<matches"), is(0)); assertThat(StringUtils.countMatches(xmlEnd, "</matches>"), is(1)); String xml = SERIALIZER.ruleMatchesToXml(Collections.<RuleMatch>emptyList(), "Fake", 5, NORMAL_API, LANG, Collections.<String>emptyList()); assertThat(StringUtils.countMatches(xml, "<matches"), is(1)); assertThat(StringUtils.countMatches(xml, "</matches>"), is(1)); }
From source file:org.ligoj.app.plugin.prov.aws.in.ProvAwsPriceImportResource.java
private void instalS3Price(final UpdateContext context, final AwsS3Price csv, final ProvLocation location) { // Resolve the type final String name = mapStorageToApi.get(csv.getVolumeType()); if (name == null) { log.warn("Unknown storage type {}, ignored", csv.getVolumeType()); return;//from w w w . j a v a 2s . co m } final ProvStorageType type = context.getStorageTypesMerged().computeIfAbsent(name, n -> { final ProvStorageType t = context.getStorageTypes().computeIfAbsent(name, n2 -> { // New storage type final ProvStorageType newType = new ProvStorageType(); newType.setName(n2); newType.setNode(context.getNode()); return newType; }); // Update storage details t.setAvailability(toPercent(csv.getAvailability())); t.setDurability9(StringUtils.countMatches(StringUtils.defaultString(csv.getDurability()), '9')); t.setOptimized(ProvStorageOptimized.DURABILITY); t.setLatency(name.equals("glacier") ? Rate.WORST : Rate.MEDIUM); t.setDescription( "{\"class\":\"" + csv.getStorageClass() + "\",\"type\":\"" + csv.getVolumeType() + "\"}"); stRepository.saveAndFlush(t); return t; }); // Update the price as needed saveAsNeeded(context.getPreviousStorage().computeIfAbsent(location.getName() + name, r -> { final ProvStoragePrice p = new ProvStoragePrice(); p.setLocation(location); p.setType(type); p.setCode(csv.getSku()); return p; }), csv.getPricePerUnit(), p -> spRepository.save(p)); }
From source file:org.ligoj.app.resource.plugin.LigojPluginListener.java
/** * Determine the plug-in type and check it regarding the contact and the convention. * * @param plugin//w w w. j a v a2s. co m * The plug-in resource. * @return The checked {@link PluginType} */ protected PluginType determinePluginType(final ServicePlugin plugin) { // Determine the type from the key by convention final PluginType result = PluginType.values()[StringUtils.countMatches(plugin.getKey(), ':')]; // Double check the convention with related interface final PluginType interfaceType; if (plugin instanceof ToolPlugin) { interfaceType = PluginType.TOOL; } else { interfaceType = PluginType.SERVICE; } if (interfaceType != result) { throw new TechnicalException( String.format("Incompatible type from the key (%s -> %s) vs type from the interface (%s)", plugin.getKey(), result, interfaceType)); } return result; }