List of usage examples for org.apache.commons.lang3 StringUtils isNumeric
public static boolean isNumeric(final CharSequence cs)
Checks if the CharSequence contains only Unicode digits.
From source file:org.languagetool.rules.de.AgreementRule.java
/** * Search for modifiers (such as "sehr", "1,4 Meter") which can expand a * determiner - adjective - noun group ("ein hohes Haus" -> "ein sehr hohes Haus", * "ein 500 Meter hohes Haus") and return the index of the first non-modifier token ("Haus") * @param startAt index of array where to start searching for modifier * @return index of first non-modifier token *///from w ww .j a v a2s .c o m private int getPosAfterModifier(int startAt, AnalyzedTokenReadings[] tokens) { if ((startAt + 1) < tokens.length && MODIFIERS.contains(tokens[startAt].getToken())) { startAt++; } if ((startAt + 1) < tokens.length && (StringUtils.isNumeric(tokens[startAt].getToken()) || tokens[startAt].hasPosTag("ZAL"))) { int posAfterModifier = startAt + 1; if ((startAt + 3) < tokens.length && ",".equals(tokens[startAt + 1].getToken()) && StringUtils.isNumeric(tokens[startAt + 2].getToken())) { posAfterModifier = startAt + 3; } if (StringUtils.endsWithAny(tokens[posAfterModifier].getToken(), "gramm", "Gramm", "Meter", "meter")) { return posAfterModifier + 1; } } return startAt; }
From source file:org.languagetool.rules.de.CaseRule.java
private boolean isNominalization(int i, AnalyzedTokenReadings[] tokens, String token, AnalyzedTokenReadings lowercaseReadings) { AnalyzedTokenReadings nextReadings = i < tokens.length - 1 ? tokens[i + 1] : null; // TODO: "vor Schlimmerem", "Er hatte Schlimmes zu befrchten" // TODO: wir finden den Fehler in "Die moderne Wissenschaftlich" nicht, weil nicht alle // Substantivierungen in den Morphy-Daten stehen (z.B. "Grte" fehlt) und wir deshalb nur // eine Abfrage machen, ob der erste Buchstabe gro ist. if (StringTools.startsWithUppercase(token) && !isNumber(token) && !(hasNounReading(nextReadings) || (nextReadings != null && StringUtils.isNumeric(nextReadings.getToken()))) && !token.matches("Alle[nm]")) { if (lowercaseReadings != null && lowercaseReadings.hasPosTag("PRP:LOK+TMP+CAU:DAT+AKK")) { return false; }/*from www . jav a 2s . c o m*/ // Ignore "das Dmmste, was je..." but not "das Dmmste Kind" AnalyzedTokenReadings prevToken = i > 0 ? tokens[i - 1] : null; AnalyzedTokenReadings prevPrevToken = i >= 2 ? tokens[i - 2] : null; AnalyzedTokenReadings prevPrevPrevToken = i >= 3 ? tokens[i - 3] : null; String prevTokenStr = prevToken != null ? prevToken.getToken() : ""; if (StringUtils.equalsAny(prevTokenStr, "und", "oder", "beziehungsweise") && prevPrevToken != null && (tokens[i].hasPartialPosTag("SUB") && tokens[i].hasPartialPosTag(":ADJ")) || //"das dabei Erlernte und Erlebte ist ..." -> 'Erlebte' is correct here (prevPrevToken.hasPartialPosTag("SUB") && !hasNounReading(nextReadings) && // "die Ausgaben fr Umweltschutz und Soziales" lowercaseReadings != null && lowercaseReadings.hasPartialPosTag("ADJ"))) { return true; } if (lowercaseReadings != null && lowercaseReadings.hasPosTag("PA1:PRD:GRU:VER")) { // "aus sechs berwiegend muslimischen Lndern" return false; } return (prevToken != null && ("irgendwas".equals(prevTokenStr) || "aufs".equals(prevTokenStr) || isNumber(prevTokenStr))) || (hasPartialTag(prevToken, "ART", "PRO:") && !(((i < 4 && tokens.length > 4) || prevToken.getReadings().size() == 1 || prevPrevToken.hasLemma("sein")) && prevToken.hasPosTagStartingWith("PRO:PER:NOM:")) && !prevToken.hasPartialPosTag(":STD")) || // "die Verurteilten", "etwas Verrcktes", "ihr Bestes" (hasPartialTag(prevPrevPrevToken, "ART") && hasPartialTag(prevPrevToken, "PRP") && hasPartialTag(prevToken, "SUB")) || // "die zum Tode Verurteilten" (hasPartialTag(prevPrevToken, "PRO:", "PRP") && hasPartialTag(prevToken, "ADJ", "ADV", "PA2", "PA1")) || // "etwas schn Verrcktes", "mit aufgewhltem Innerem" (hasPartialTag(prevPrevPrevToken, "PRO:", "PRP") && hasPartialTag(prevPrevToken, "ADJ", "ADV") && hasPartialTag(prevToken, "ADJ", "ADV", "PA2")) || // "etwas ganz schn Verrcktes" (tokens[i].hasPosTagStartingWith("SUB:") && hasPartialTag(prevToken, "GEN") && !hasPartialTag(nextReadings, "PKT")); // "Parks Vertraute Choi Soon Sil ist zu drei Jahren Haft verurteilt worden." } return false; }
From source file:org.languagetool.rules.de.CaseRule.java
private boolean isNumber(String token) { if (StringUtils.isNumeric(token)) { return true; }//from w ww . j a va2s . c o m AnalyzedTokenReadings lookup = lookup(StringTools.lowercaseFirstChar(token)); return lookup != null && lookup.hasPosTag("ZAL"); }
From source file:org.languagetool.rules.de.CaseRule.java
private boolean isAdjectiveAsNoun(int i, AnalyzedTokenReadings[] tokens, AnalyzedTokenReadings lowercaseReadings) { AnalyzedTokenReadings prevToken = i > 0 ? tokens[i - 1] : null; AnalyzedTokenReadings nextReadings = i < tokens.length - 1 ? tokens[i + 1] : null; AnalyzedTokenReadings prevLowercaseReadings = null; if (i > 1 && sentenceStartExceptions.contains(tokens[i - 2].getToken())) { prevLowercaseReadings = lookup(prevToken.getToken().toLowerCase()); }// w ww .ja v a 2 s .co m // ignore "Der Versuch, Neues zu lernen / Gutes zu tun / Spannendes auszuprobieren" boolean isPossiblyFollowedByInfinitive = nextReadings != null && nextReadings.getToken().equals("zu"); boolean isFollowedByInfinitive = nextReadings != null && !isPossiblyFollowedByInfinitive && nextReadings.hasPartialPosTag("EIZ"); boolean isFollowedByPossessiveIndicator = nextReadings != null && POSSESSIVE_INDICATORS.contains(nextReadings.getToken()); boolean isUndefQuantifier = prevToken != null && UNDEFINED_QUANTIFIERS.contains(prevToken.getToken().toLowerCase()); boolean isPrevDeterminer = prevToken != null && (hasPartialTag(prevToken, "ART", "PRP", "ZAL") || hasPartialTag(prevLowercaseReadings, "ART", "PRP", "ZAL")) && !prevToken.hasPartialPosTag(":STD"); boolean isPrecededByVerb = prevToken != null && prevToken.matchesPosTagRegex("VER:(MOD:|AUX:)?[1-3]:.*") && !prevToken.hasLemma("sein"); if (!isPrevDeterminer && !isUndefQuantifier && !(isPossiblyFollowedByInfinitive || isFollowedByInfinitive) && !(isPrecededByVerb && lowercaseReadings != null && hasPartialTag(lowercaseReadings, "ADJ:", "PA") && nextReadings != null && !StringUtils.equalsAny(nextReadings.getToken(), "und", "oder", ",")) && !(isFollowedByPossessiveIndicator && hasPartialTag(lowercaseReadings, "ADJ", "VER")) // "Wacht auf, Verdammte dieser Welt!" && !(prevToken != null && prevToken.hasPosTag("KON:UNT") && !hasNounReading(nextReadings) && nextReadings != null && !nextReadings.hasPosTag("KON:NEB"))) { AnalyzedTokenReadings prevPrevToken = i > 1 && prevToken != null && prevToken.hasPartialPosTag("ADJ") ? tokens[i - 2] : null; // Another check to avoid false alarms for "eine Gruppe Aufstndischer starb" if (!isPrecededByVerb && lowercaseReadings != null && prevToken != null) { if (prevToken.hasPartialPosTag("SUB:") && lowercaseReadings.matchesPosTagRegex("(ADJ|PA2):GEN:PLU:MAS:GRU:SOL.*")) { return nextReadings != null && !nextReadings.hasPartialPosTag("SUB:"); } else if (nextReadings != null && nextReadings.getReadingsLength() == 1 && prevToken.hasPosTagStartingWith("PRO:PER:NOM:") && nextReadings.hasPosTag("ADJ:PRD:GRU")) { // avoid false alarm "Weil er Unmndige sexuell missbraucht haben soll,..." return true; } } // Another check to avoid false alarms for "ein politischer Revolutionr" if (!hasPartialTag(prevPrevToken, "ART", "PRP", "ZAL")) { return false; } } // ignore "die Ausgewhlten" but not "die Ausgewhlten Leute": for (AnalyzedToken reading : tokens[i].getReadings()) { String posTag = reading.getPOSTag(); if ((posTag == null || posTag.contains("ADJ")) && !hasNounReading(nextReadings) && !StringUtils.isNumeric(nextReadings != null ? nextReadings.getToken() : "")) { if (posTag == null && hasPartialTag(lowercaseReadings, "PRP:LOK", "PA2:PRD:GRU:VER", "PA1:PRD:GRU:VER", "ADJ:PRD:KOM", "ADV:TMP")) { // skip to avoid a false true for, e.g. "Die Zahl ging auf ber 1.000 zurck."/ "Dies gilt schon lange als berholt." / "Bis Bald!" // but not for "Er versuchte, Neues zu wagen." } else { return true; } } } return false; }
From source file:org.languagetool.rules.de.CompoundCoherencyRule.java
@Override public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException { List<RuleMatch> ruleMatches = new ArrayList<>(); Map<String, List<String>> normToTextOccurrences = new HashMap<>(); int pos = 0;/*w w w. j a v a 2 s . com*/ for (AnalyzedSentence sentence : sentences) { AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); for (AnalyzedTokenReadings atr : tokens) { String lemmaOrNull = getLemma(atr); String token = atr.getToken(); if (token.isEmpty()) { continue; } // The whole implementation could be simpler, but this way we also catch cases where // the word (and this its lemma) isn't known. String lemma = lemmaOrNull != null ? lemmaOrNull : token; String normToken = lemma.replace("-", "").toLowerCase(); if (StringUtils.isNumeric(normToken)) { // avoid messages about "2-3" and "23" both being used break; } List<String> textOcc = normToTextOccurrences.get(normToken); if (textOcc != null) { if (textOcc.stream().noneMatch(f -> f.equalsIgnoreCase(lemma))) { String other = textOcc.get(0); if (containsHyphenInside(other) || containsHyphenInside(token)) { String msg = "Uneinheitliche Verwendung von Bindestrichen. Der Text enthlt sowohl '" + token + "' als auch '" + other + "'."; RuleMatch ruleMatch = new RuleMatch(this, sentence, pos + atr.getStartPos(), pos + atr.getEndPos(), msg); if (token.replace("-", "").equalsIgnoreCase(other.replace("-", ""))) { // might be different inflected forms, so only suggest if really just the hyphen is different: ruleMatch.setSuggestedReplacement(other); } ruleMatches.add(ruleMatch); } } } else { List<String> l = new ArrayList<>(); l.add(lemma); normToTextOccurrences.putIfAbsent(normToken, l); } } pos += sentence.getText().length(); } return toRuleMatchArray(ruleMatches); }
From source file:org.languagetool.rules.SentenceWhitespaceRule.java
@Override public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException { boolean isFirstSentence = true; boolean prevSentenceEndsWithWhitespace = false; boolean prevSentenceEndsWithNumber = false; List<RuleMatch> ruleMatches = new ArrayList<>(); int pos = 0;//from ww w. j av a 2 s . c o m for (AnalyzedSentence sentence : sentences) { AnalyzedTokenReadings[] tokens = sentence.getTokens(); if (isFirstSentence) { isFirstSentence = false; } else { if (!prevSentenceEndsWithWhitespace && tokens.length > 1) { int startPos = 0; String firstToken = tokens[1].getToken(); int endPos = firstToken.length(); RuleMatch ruleMatch = new RuleMatch(this, sentence, pos + startPos, pos + endPos, getMessage(prevSentenceEndsWithNumber)); ruleMatch.setSuggestedReplacement(" " + firstToken); ruleMatches.add(ruleMatch); } } if (tokens.length > 0) { String lastToken = tokens[tokens.length - 1].getToken(); prevSentenceEndsWithWhitespace = lastToken.trim().isEmpty() && lastToken.length() == 1; } if (tokens.length > 1) { String prevLastToken = tokens[tokens.length - 2].getToken(); prevSentenceEndsWithNumber = StringUtils.isNumeric(prevLastToken); } pos += sentence.getText().length(); } return toRuleMatchArray(ruleMatches); }
From source file:org.lockss.hasher.SimpleHasher.java
/** * Handles the specification of the type of hashing operation to be performed. * //from w w w. ja va 2 s . com * @param params * A HasherParams with the parameters that define the hashing * operation. * @param result * A HasherResult where to store the result of the hashing operation. * @return a String with any error message. */ public String processHashTypeParam(HasherParams params, HasherResult result) { final String DEBUG_HEADER = "processHashTypeParam(): "; if (log.isDebug2()) log.debug2(DEBUG_HEADER + "Starting..."); HashType hashType = null; String errorMessage = null; if (StringUtil.isNullString(params.getHashType())) { hashType = DEFAULT_HASH_TYPE; } else if (StringUtils.isNumeric(params.getHashType())) { try { int hashTypeInt = Integer.parseInt(params.getHashType()); hashType = hashTypeCompat[hashTypeInt]; if (hashType == null) throw new ArrayIndexOutOfBoundsException(); params.setHashType(hashType.toString()); } catch (ArrayIndexOutOfBoundsException aioobe) { result.setRunnerStatus(HasherStatus.Error); errorMessage = "Unknown hash type: " + params.getHashType(); result.setRunnerError(errorMessage); return errorMessage; } catch (RuntimeException re) { result.setRunnerStatus(HasherStatus.Error); errorMessage = "Can't parse hash type: " + params.getHashType() + re.getMessage(); result.setRunnerError(errorMessage); return errorMessage; } } else { try { hashType = HashType.valueOf(params.getHashType()); } catch (IllegalArgumentException iae) { log.warning(DEBUG_HEADER, iae); result.setRunnerStatus(HasherStatus.Error); errorMessage = "Unknown hash type: " + params.getHashType() + " - " + iae.getMessage(); result.setRunnerError(errorMessage); return errorMessage; } } result.setHashType(hashType); return errorMessage; }
From source file:org.mqnaas.network.api.request.DateAdapter.java
/** * Builds a {@link Date} object from the given UNIX timestamp. *//*from www . jav a 2s . c o m*/ @Override public Date unmarshal(String v) throws Exception { if (!StringUtils.isNumeric(v)) throw new IllegalArgumentException("Wrong date format. UNIX timestamp expected."); return new Date(Long.valueOf(v) * 1000); }
From source file:org.mskcc.cbio.oncokb.util.NcbiEUtils.java
private static void purifyInput(Set<String> pmids) { for (String pmid : pmids) { if (pmid != null) { pmid = pmid.trim();// w ww. jav a2 s.com } if (!StringUtils.isNumeric(pmid)) { System.out.println("pmid has to be a numeric string, but the input is '" + pmid + "'"); } } }
From source file:org.omnaest.utils.beans.adapter.source.SourcePropertyAccessorDecoratorPropertyNameTemplate.java
private static String[] processPropertyNameWithTemplate(String propertyName, PropertyMetaInformation propertyMetaInformation) { ////from w w w . ja va2 s . c o m String[] retval = new String[] { propertyName }; if (propertyMetaInformation != null) { // PropertyNameTemplate propertyNameTemplate = propertyMetaInformation .getPropertyAnnotationAutowiredContainer().getValue(PropertyNameTemplate.class); if (propertyNameTemplate == null) { propertyNameTemplate = propertyMetaInformation.getClassAnnotationAutowiredContainer() .getValue(PropertyNameTemplate.class); } // if (propertyNameTemplate != null) { // final Class<? extends ElementConverter<?, String>>[] additionalArgumentConverterTypes = propertyNameTemplate .additionalArgumentConverterTypes(); final String primaryTemplate = propertyNameTemplate.value(); final String[] alternativeTemplateValues = propertyNameTemplate.alternativeValues(); final String[] templates = ArrayUtils.add(alternativeTemplateValues, 0, primaryTemplate); if (primaryTemplate != null) { retval = new String[0]; } for (String template : templates) { if (template != null) { // final String TAG_PROPERTYNAME = "\\{(?iu)propertyname(?-iu)\\}"; final String TAG_PARAMETER = "\\{(\\d)\\}"; Assert.isTrue( Pattern.matches("(" + TAG_PROPERTYNAME + "|" + TAG_PARAMETER + "|[^\\{\\}])+", template), "PropertyNameTemplate of property " + propertyName + " has an invalid format."); // String templateWithValues = template.replaceAll(TAG_PROPERTYNAME, propertyName); // StringBuffer stringBuffer = new StringBuffer(); Matcher matcher = Pattern.compile(TAG_PARAMETER).matcher(templateWithValues); while (matcher.find()) { // String group = matcher.group(1); // Assert.isTrue(StringUtils.isNumeric(group), "Parameter index position within PropertyNameTemplate of property " + propertyName + " has to be a valid number. Found: " + group); int additionalArgumentIndexPosition = Integer.valueOf(group); // Object[] additionalArguments = propertyMetaInformation.getAdditionalArguments(); int parameterIndexPositionMax = additionalArguments.length - 1; Assert.isTrue( additionalArgumentIndexPosition >= 0 && additionalArgumentIndexPosition <= parameterIndexPositionMax, "Parameter index position within PropertyNameTemplate of property " + propertyName + " has to be between 0 and " + parameterIndexPositionMax); // final Object additionalArgument = determineAdditionalArgument( additionalArgumentConverterTypes, additionalArgumentIndexPosition, additionalArguments); final String additionalArgumentString = String.valueOf(additionalArgument); matcher.appendReplacement(stringBuffer, additionalArgumentString); } matcher.appendTail(stringBuffer); // retval = ArrayUtils.add(retval, stringBuffer.toString()); } } } } return retval; }