Example usage for org.apache.commons.lang3 StringUtils isAlpha

List of usage examples for org.apache.commons.lang3 StringUtils isAlpha

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils isAlpha.

Prototype

public static boolean isAlpha(final CharSequence cs) 

Source Link

Document

Checks if the CharSequence contains only Unicode letters.

null will return false .

Usage

From source file:org.ahp.commons.validator.ValidatorUtil.java

public static boolean isAlpha(String pInputString) {
    return StringUtils.isAlpha(pInputString);
}

From source file:org.apache.flink.table.runtime.functions.SqlFunctionUtils.java

public static boolean isAlpha(Object obj) {
    if (obj == null) {
        return false;
    }/*from  w w  w  .j  a  va 2 s.co  m*/
    if (!(obj instanceof String)) {
        return false;
    }
    String s = obj.toString();
    if ("".equals(s)) {
        return false;
    }
    return StringUtils.isAlpha(s);
}

From source file:org.apache.lucene.index.collocations.CollocationExtractor.java

public void extract(CollocationIndexer logger) throws IOException {
    // TermEnum te = reader.terms(new Term(fieldName, ""));
    // http://stackoverflow.com/questions/19208523/how-to-get-all-terms-in-index-directory-created-by-lucene-4-4-0
    Terms terms = MultiFields.getTerms(this.reader, this.fieldName);
    TermsEnum te = terms.iterator(null);

    BytesRef bytesRef = null;/* w  w w . j a v a 2s .co  m*/
    while (te.next() != null) { // iterate item A
        bytesRef = te.term();
        if (!StringUtils.isAlpha(bytesRef.utf8ToString())) {
            continue;
        }
        // only process non-numbers
        /*
        if (!fieldName.equals(bytesRef.field())) {
            break;
        }
        */
        processTerm(bytesRef, logger, slopSize);
    }
}

From source file:org.apache.lucene.index.collocations.CollocationExtractor.java

/**
 * Called for every term in the index//from   www  .  j  a  va 2 s .  c  om
 * docsAndPositions, possible speed up by http://lucene.apache.org/core/4_2_0/core/org/apache/lucene/index/TermsEnum.html
 * http://stackoverflow.com/questions/15771843/get-word-position-in-document-with-lucene
 * Migration Guide: http://lucene.apache.org/core/4_8_1/MIGRATE.html
 * http://stackoverflow.com/questions/15370652/retrieving-all-term-positions-from-docsandpositionsenum
 * @param bytesRef
 * @param logger
 * @param slop
 * @throws IOException
 */
void processTerm(BytesRef bytesRef, CollocationIndexer logger, int slop) throws IOException {
    Term term = new Term(this.fieldName, bytesRef);
    if (!filter.processTerm(term.text())) {
        return;
    }
    System.out.println("Processing term: " + term);
    // TermEnum te = reader.terms(term);
    // int numDocsForTerm = Math.min(te.docFreq(), maxNumDocsToAnalyze);
    int numDocsForTerm = Math.min(this.reader.docFreq(term), maxNumDocsToAnalyze);
    int totalNumDocs = reader.numDocs();
    float percent = (float) numDocsForTerm / (float) totalNumDocs;

    isTermTooPopularOrNotPopularEnough(term, percent);

    // get a list of all the docs with this term
    // Apache Lucene Migration Guide
    // TermDocs td = reader.termDocs(term);
    // get dpe in first hand
    DocsAndPositionsEnum dpe = MultiFields.getTermPositionsEnum(this.reader, null, this.fieldName, bytesRef);
    HashMap<String, CollocationScorer> phraseTerms = new HashMap<String, CollocationScorer>();
    int MAX_TERMS_PER_DOC = 100000;
    BitSet termPos = new BitSet(MAX_TERMS_PER_DOC);

    int numDocsAnalyzed = 0;
    // for all docs that contain this term
    int docSeq;
    while ((docSeq = dpe.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
        int docId = dpe.docID();
        // System.out.println("Processing docId: "+docId);
        numDocsAnalyzed++;
        if (numDocsAnalyzed > maxNumDocsToAnalyze) {
            break;
        }
        // get TermPositions for matching doc
        // TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(docId, fieldName);
        // String[] terms_str = tpv.getTerms();
        Terms tv = this.reader.getTermVector(docId, this.fieldName);
        TermsEnum te = tv.iterator(null);
        // TODO refactor iteration

        List<String> terms_list = new ArrayList<>();
        while (te.next() != null) {
            terms_list.add(te.term().utf8ToString());
        }
        String[] terms_str = terms_list.toArray(new String[terms_list.size()]);
        // System.out.println("terms_str: "+Arrays.toString(terms_str));
        termPos.clear();
        int index = recordAllPositionsOfTheTermInCurrentDocumentBitset(docSeq, term, termPos, tv, terms_str);

        // now look at all OTHER terms_str in this doc and see if they are
        // positioned in a pre-defined sized window around the current term
        /*
        for (int j = 0; j < terms_str.length; j++) {
        if (j == index) { // (item A)
           continue;
        }
        if (!filter.processTerm(terms_str[j])) {
           continue;
        }
        if (!StringUtils.isAlpha(terms_str[j])) {
            continue;
        }
        // sequential code
        boolean matchFound = false;
        for (int k = 0; ((k < dpe.freq()) && (!matchFound)); k++) {
            try {
                // inefficient
                // iterate through all other items (item B)
                Integer position = dpe.nextPosition();
                Integer startpos = Math.max(0, position - slop);
                Integer endpos = position + slop;
                matchFound = populateHashMapWithPhraseTerms(term,
                        numDocsForTerm, totalNumDocs, phraseTerms, termPos,
                        terms_str, j, matchFound, startpos, endpos);
            }
            catch (ArrayIndexOutOfBoundsException e) {
                e.printStackTrace();
                break;
            }
            catch (IOException e) {
                e.printStackTrace();
                break;
            }
                
        }
        }
        */

        ///
        boolean[] matchFound = new boolean[terms_str.length]; // single match is sufficient, no duplicate process
        for (int j = 0; j < matchFound.length; j++)
            matchFound[j] = false;

        for (int k = 0; (k < dpe.freq()); k++) {
            Integer position = dpe.nextPosition();
            Integer startpos = Math.max(0, position - slop);
            Integer endpos = position + slop;
            for (int j = 0; j < terms_str.length && !matchFound[j]; j++) {
                if (j == index) { // (item A)
                    continue;
                }
                if (!filter.processTerm(terms_str[j])) {
                    continue;
                }
                if (!StringUtils.isAlpha(terms_str[j])) {
                    continue;
                }
                // inefficient
                // iterate through all other items (item B)
                populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos,
                        terms_str, j, matchFound, startpos, endpos);
            }

        }
    } // end docs loop

    sortTopTermsAndAddToCollocationsIndexForThisTerm(logger, phraseTerms);
}

From source file:org.xwiki.rendering.util.IdGenerator.java

/**
 * Generate a unique id attribute using the passed text as the seed value. The generated id complies with the XHTML
 * specification. Extract from <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML RFC</a>:
 * <p>/*  w ww .  j a  v  a2  s  .  c o  m*/
 * <code> When defining fragment identifiers to be backward-compatible, only strings matching the pattern
 * [A-Za-z][A-Za-z0-9:_.-]* should be used.</code>
 * </p>
 * 
 * @param prefix the prefix of the identifier. Has to match [a-zA-Z].
 * @param text the text used to generate the unique id
 * @return the unique id. For example "Hello world" will generate prefix + "Helloworld".
 */
public String generateUniqueId(String prefix, String text) {
    // Verify that the passed prefix contains only alpha characters since the generated id must be a valid HTML id.
    if (StringUtils.isEmpty(prefix) || !StringUtils.isAlpha(prefix)) {
        throw new IllegalArgumentException(
                "The prefix [" + prefix + "] should only contain alphanumerical characters and not be empty.");
    }

    String idPrefix = (prefix != null ? prefix : "") + normalizeId(text);

    int occurence = 0;
    String id = idPrefix;
    while (this.generatedIds.contains(id)) {
        occurence++;
        id = idPrefix + "-" + occurence;
    }

    // Save the generated id so that the next call to this method will not generate the same id.
    this.generatedIds.add(id);

    return id;
}

From source file:tech.sirwellington.alchemy.test.junit.runners.GenerateStringTest.java

@Test
public void testValues() {
    System.out.println("testValues");

    AlchemyGenerator<String> result = GenerateString.Values.createGeneratorFor(annotation);
    assertThat(result, notNullValue());/*from   w w w .  ja  v a2s.c o m*/

    String string = result.get();
    assertThat(string, not(isEmptyOrNullString()));

    if (type == UUID) {
        int uuidLength = java.util.UUID.randomUUID().toString().length();
        assertThat(string.length(), is(uuidLength));
    } else {
        assertThat(string.length(), is(length));
    }

    switch (type) {
    case ALPHABETIC:
        assertThat(StringUtils.isAlpha(string), is(true));
        break;
    case ALPHANUMERIC:
        assertThat(StringUtils.isAlphanumeric(string), is(true));
        break;
    case HEXADECIMAL:
        assertThat(string.matches("[A-Fa-f0-9]+"), is(true));
        break;
    //No additional assertions
    }

}

From source file:TextCleaning.SpellCheckingMethods.java

public String repeatedCharacters(String currTerm) {
    String toReturn = currTerm;/*from  www  .  jav  a2  s .  c o  m*/
    Integer index = null;
    Set<RepeatedLetters> setRL = new HashSet();
    int count = 1;
    char[] chars = currTerm.toCharArray();
    char currChar;
    char previousChar = 0;
    for (int i = 0; i < chars.length; i++) {
        currChar = chars[i];
        if (i > 0) {
            previousChar = chars[i - 1];
        }
        if (previousChar == currChar && StringUtils.isAlpha(String.valueOf(previousChar))) {
            if (index == null) {
                index = i - 1;
            }
            count++;

        } else {
            if (count > 1) {
                setRL.add(new RepeatedLetters(previousChar, index, count));
                count = 1;
            }
            index = null;

        }
        if (i == (chars.length - 1) && count > 1) {
            setRL.add(new RepeatedLetters(previousChar, index, count));

        }
    }

    boolean loop = true;
    int loopsCounter = 0;
    while (loop) {
        loopsCounter++;
        if (loopsCounter > 5) {
            break;
        }
        for (RepeatedLetters rl : setRL) {
            String letter = String.valueOf(rl.getCurrChar());
            String toReplace;
            String subs;
            String toBeReplaced;

            //if two same letters are found
            if (rl.getCount() > 1) {
                toBeReplaced = currTerm.substring(rl.getIndex(), rl.getIndex() + rl.getCount());

                ///if these are actually 3 letters or more, test if by replacing them by 2 letters we have a match in the heuristics
                if (rl.getCount() > 2) {
                    toReplace = letter + letter;
                    subs = StringUtils.replace(toReturn, toBeReplaced, toReplace);
                    if (HLoader.getMapHeuristics().containsKey(subs.toLowerCase())) {
                        toReturn = subs;
                        loop = false;
                        break;
                    } else if (toReturn.endsWith(toReplace) && !toReturn.contains(" ")) {
                        toReturn = StringUtils.replace(toReturn, toBeReplaced, letter);
                        loop = true;
                        break;
                    }
                }

                // and maybe that if they are just one, this is a match too? (as in "boredd" meaning "bored")
                //                    toReplace = letter;
                //                    subs = StringUtils.replace(toReturn, toBeReplaced, toReplace);
                //                    if (HLoader.getMapHeuristics().containsKey(subs.toLowerCase())) {
                //                        toReturn = subs;
                //                        loop = false;
                //                        break;
                //                    }
            } else {
                loop = false;
            }
        }
    }
    return toReturn;
}

From source file:ubic.gemma.web.controller.common.auditAndSecurity.SecurityControllerImpl.java

@Override
public String createGroup(String groupName) {

    if (StringUtils.isBlank(groupName) || groupName.length() < 3 || !StringUtils.isAlpha(groupName)) {
        throw new IllegalArgumentException(
                "Group name must contain only letters and must be at least 3 letters long.");
    }//w w  w  .ja va 2  s. c o  m

    securityService.createGroup(groupName);
    return groupName;
}