List of usage examples for org.apache.commons.lang3 StringUtils isAlpha
public static boolean isAlpha(final CharSequence cs)
Checks if the CharSequence contains only Unicode letters.
null will return false .
From source file:org.ahp.commons.validator.ValidatorUtil.java
public static boolean isAlpha(String pInputString) { return StringUtils.isAlpha(pInputString); }
From source file:org.apache.flink.table.runtime.functions.SqlFunctionUtils.java
public static boolean isAlpha(Object obj) { if (obj == null) { return false; }/*from w w w .j a va 2 s.co m*/ if (!(obj instanceof String)) { return false; } String s = obj.toString(); if ("".equals(s)) { return false; } return StringUtils.isAlpha(s); }
From source file:org.apache.lucene.index.collocations.CollocationExtractor.java
public void extract(CollocationIndexer logger) throws IOException { // TermEnum te = reader.terms(new Term(fieldName, "")); // http://stackoverflow.com/questions/19208523/how-to-get-all-terms-in-index-directory-created-by-lucene-4-4-0 Terms terms = MultiFields.getTerms(this.reader, this.fieldName); TermsEnum te = terms.iterator(null); BytesRef bytesRef = null;/* w w w . j a v a 2s .co m*/ while (te.next() != null) { // iterate item A bytesRef = te.term(); if (!StringUtils.isAlpha(bytesRef.utf8ToString())) { continue; } // only process non-numbers /* if (!fieldName.equals(bytesRef.field())) { break; } */ processTerm(bytesRef, logger, slopSize); } }
From source file:org.apache.lucene.index.collocations.CollocationExtractor.java
/** * Called for every term in the index//from www . j a va 2 s . c om * docsAndPositions, possible speed up by http://lucene.apache.org/core/4_2_0/core/org/apache/lucene/index/TermsEnum.html * http://stackoverflow.com/questions/15771843/get-word-position-in-document-with-lucene * Migration Guide: http://lucene.apache.org/core/4_8_1/MIGRATE.html * http://stackoverflow.com/questions/15370652/retrieving-all-term-positions-from-docsandpositionsenum * @param bytesRef * @param logger * @param slop * @throws IOException */ void processTerm(BytesRef bytesRef, CollocationIndexer logger, int slop) throws IOException { Term term = new Term(this.fieldName, bytesRef); if (!filter.processTerm(term.text())) { return; } System.out.println("Processing term: " + term); // TermEnum te = reader.terms(term); // int numDocsForTerm = Math.min(te.docFreq(), maxNumDocsToAnalyze); int numDocsForTerm = Math.min(this.reader.docFreq(term), maxNumDocsToAnalyze); int totalNumDocs = reader.numDocs(); float percent = (float) numDocsForTerm / (float) totalNumDocs; isTermTooPopularOrNotPopularEnough(term, percent); // get a list of all the docs with this term // Apache Lucene Migration Guide // TermDocs td = reader.termDocs(term); // get dpe in first hand DocsAndPositionsEnum dpe = MultiFields.getTermPositionsEnum(this.reader, null, this.fieldName, bytesRef); HashMap<String, CollocationScorer> phraseTerms = new HashMap<String, CollocationScorer>(); int MAX_TERMS_PER_DOC = 100000; BitSet termPos = new BitSet(MAX_TERMS_PER_DOC); int numDocsAnalyzed = 0; // for all docs that contain this term int docSeq; while ((docSeq = dpe.nextDoc()) != DocsEnum.NO_MORE_DOCS) { int docId = dpe.docID(); // System.out.println("Processing docId: "+docId); numDocsAnalyzed++; if (numDocsAnalyzed > maxNumDocsToAnalyze) { break; } // get TermPositions for matching doc // TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(docId, fieldName); // String[] terms_str = tpv.getTerms(); Terms tv = this.reader.getTermVector(docId, this.fieldName); TermsEnum te = tv.iterator(null); // TODO refactor iteration List<String> terms_list = new ArrayList<>(); while (te.next() != null) { terms_list.add(te.term().utf8ToString()); } String[] terms_str = terms_list.toArray(new String[terms_list.size()]); // System.out.println("terms_str: "+Arrays.toString(terms_str)); termPos.clear(); int index = recordAllPositionsOfTheTermInCurrentDocumentBitset(docSeq, term, termPos, tv, terms_str); // now look at all OTHER terms_str in this doc and see if they are // positioned in a pre-defined sized window around the current term /* for (int j = 0; j < terms_str.length; j++) { if (j == index) { // (item A) continue; } if (!filter.processTerm(terms_str[j])) { continue; } if (!StringUtils.isAlpha(terms_str[j])) { continue; } // sequential code boolean matchFound = false; for (int k = 0; ((k < dpe.freq()) && (!matchFound)); k++) { try { // inefficient // iterate through all other items (item B) Integer position = dpe.nextPosition(); Integer startpos = Math.max(0, position - slop); Integer endpos = position + slop; matchFound = populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos, terms_str, j, matchFound, startpos, endpos); } catch (ArrayIndexOutOfBoundsException e) { e.printStackTrace(); break; } catch (IOException e) { e.printStackTrace(); break; } } } */ /// boolean[] matchFound = new boolean[terms_str.length]; // single match is sufficient, no duplicate process for (int j = 0; j < matchFound.length; j++) matchFound[j] = false; for (int k = 0; (k < dpe.freq()); k++) { Integer position = dpe.nextPosition(); Integer startpos = Math.max(0, position - slop); Integer endpos = position + slop; for (int j = 0; j < terms_str.length && !matchFound[j]; j++) { if (j == index) { // (item A) continue; } if (!filter.processTerm(terms_str[j])) { continue; } if (!StringUtils.isAlpha(terms_str[j])) { continue; } // inefficient // iterate through all other items (item B) populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos, terms_str, j, matchFound, startpos, endpos); } } } // end docs loop sortTopTermsAndAddToCollocationsIndexForThisTerm(logger, phraseTerms); }
From source file:org.xwiki.rendering.util.IdGenerator.java
/** * Generate a unique id attribute using the passed text as the seed value. The generated id complies with the XHTML * specification. Extract from <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML RFC</a>: * <p>/* w ww . j a v a2 s . c o m*/ * <code> When defining fragment identifiers to be backward-compatible, only strings matching the pattern * [A-Za-z][A-Za-z0-9:_.-]* should be used.</code> * </p> * * @param prefix the prefix of the identifier. Has to match [a-zA-Z]. * @param text the text used to generate the unique id * @return the unique id. For example "Hello world" will generate prefix + "Helloworld". */ public String generateUniqueId(String prefix, String text) { // Verify that the passed prefix contains only alpha characters since the generated id must be a valid HTML id. if (StringUtils.isEmpty(prefix) || !StringUtils.isAlpha(prefix)) { throw new IllegalArgumentException( "The prefix [" + prefix + "] should only contain alphanumerical characters and not be empty."); } String idPrefix = (prefix != null ? prefix : "") + normalizeId(text); int occurence = 0; String id = idPrefix; while (this.generatedIds.contains(id)) { occurence++; id = idPrefix + "-" + occurence; } // Save the generated id so that the next call to this method will not generate the same id. this.generatedIds.add(id); return id; }
From source file:tech.sirwellington.alchemy.test.junit.runners.GenerateStringTest.java
@Test public void testValues() { System.out.println("testValues"); AlchemyGenerator<String> result = GenerateString.Values.createGeneratorFor(annotation); assertThat(result, notNullValue());/*from w w w . ja v a2s.c o m*/ String string = result.get(); assertThat(string, not(isEmptyOrNullString())); if (type == UUID) { int uuidLength = java.util.UUID.randomUUID().toString().length(); assertThat(string.length(), is(uuidLength)); } else { assertThat(string.length(), is(length)); } switch (type) { case ALPHABETIC: assertThat(StringUtils.isAlpha(string), is(true)); break; case ALPHANUMERIC: assertThat(StringUtils.isAlphanumeric(string), is(true)); break; case HEXADECIMAL: assertThat(string.matches("[A-Fa-f0-9]+"), is(true)); break; //No additional assertions } }
From source file:TextCleaning.SpellCheckingMethods.java
public String repeatedCharacters(String currTerm) { String toReturn = currTerm;/*from www . jav a2 s . c o m*/ Integer index = null; Set<RepeatedLetters> setRL = new HashSet(); int count = 1; char[] chars = currTerm.toCharArray(); char currChar; char previousChar = 0; for (int i = 0; i < chars.length; i++) { currChar = chars[i]; if (i > 0) { previousChar = chars[i - 1]; } if (previousChar == currChar && StringUtils.isAlpha(String.valueOf(previousChar))) { if (index == null) { index = i - 1; } count++; } else { if (count > 1) { setRL.add(new RepeatedLetters(previousChar, index, count)); count = 1; } index = null; } if (i == (chars.length - 1) && count > 1) { setRL.add(new RepeatedLetters(previousChar, index, count)); } } boolean loop = true; int loopsCounter = 0; while (loop) { loopsCounter++; if (loopsCounter > 5) { break; } for (RepeatedLetters rl : setRL) { String letter = String.valueOf(rl.getCurrChar()); String toReplace; String subs; String toBeReplaced; //if two same letters are found if (rl.getCount() > 1) { toBeReplaced = currTerm.substring(rl.getIndex(), rl.getIndex() + rl.getCount()); ///if these are actually 3 letters or more, test if by replacing them by 2 letters we have a match in the heuristics if (rl.getCount() > 2) { toReplace = letter + letter; subs = StringUtils.replace(toReturn, toBeReplaced, toReplace); if (HLoader.getMapHeuristics().containsKey(subs.toLowerCase())) { toReturn = subs; loop = false; break; } else if (toReturn.endsWith(toReplace) && !toReturn.contains(" ")) { toReturn = StringUtils.replace(toReturn, toBeReplaced, letter); loop = true; break; } } // and maybe that if they are just one, this is a match too? (as in "boredd" meaning "bored") // toReplace = letter; // subs = StringUtils.replace(toReturn, toBeReplaced, toReplace); // if (HLoader.getMapHeuristics().containsKey(subs.toLowerCase())) { // toReturn = subs; // loop = false; // break; // } } else { loop = false; } } } return toReturn; }
From source file:ubic.gemma.web.controller.common.auditAndSecurity.SecurityControllerImpl.java
@Override public String createGroup(String groupName) { if (StringUtils.isBlank(groupName) || groupName.length() < 3 || !StringUtils.isAlpha(groupName)) { throw new IllegalArgumentException( "Group name must contain only letters and must be at least 3 letters long."); }//w w w .ja va 2 s. c o m securityService.createGroup(groupName); return groupName; }