Example usage for org.apache.commons.lang3 StringUtils isAllUpperCase

List of usage examples for org.apache.commons.lang3 StringUtils isAllUpperCase

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils isAllUpperCase.

Prototype

public static boolean isAllUpperCase(final CharSequence cs) 

Source Link

Document

Checks if the CharSequence contains only uppercase characters.

null will return false .

Usage

From source file:edu.illinois.cs.cogcomp.wikifier.utils.spelling.SurfaceFormSpellChecker.java

public static String getCorrection(String text) {

    // All uppercase normalization HONG KONG => Hong Kong
    String noPunc = text.replaceAll("[^A-Z0-9]*", "");

    if (StringUtils.isAllUpperCase(noPunc) && noPunc.length() > 3) {
        char[] letters = text.toLowerCase().toCharArray();
        for (int i = 0; i < letters.length; i++) {
            if (i == 0 || !Character.isLetter(letters[i - 1]) && Character.isLetter(letters[i])) {
                letters[i] = Character.toUpperCase(letters[i]);
            }/* ww  w.ja  v  a2s .c  o m*/
        }
        return new String(letters);
    }

    // Spell check
    if (correctionCache.get(text) != null) {
        return String.valueOf(correctionCache.get(text));
    } else {
        if (caching) {
            String correction = getGoogleCorrection(text);
            correctionCache.put(text, correction);
            return correction;
        }
    }

    return text;
}

From source file:de.micromata.tpsb.doc.TpsbEnvUtils.java

public static boolean isGenericType(String type) {
    return /* type.length() < 3 && */StringUtils.isAllUpperCase(type) == true;
}

From source file:de.micromata.tpsb.doc.ParserContext.java

public String getFullQualifiedNameFromImports(String className) {
    if (StringUtils.isEmpty(className) == true) {
        return className;
    }//from  ww w  .  j  ava2s .c  om

    // Generics enthalten?
    className = removeGenerics(className);

    // es ist ein generischer Typ
    if (StringUtils.isAllUpperCase(className)) {
        return className;
    }

    // Klassenname ist leer oder bereits fullqualified
    if (className.indexOf(".") > -1) {
        return className;
    }
    String importedClass = currentImports.getKey(className);
    if (StringUtils.isNotEmpty(importedClass)) {
        return importedClass;
    }
    return currentPackage + "." + className;
}

From source file:FunctionParser.MathExp2SPARQL.java

public MathExp2SPARQL(String StringFunction, List<Provider> prov, List<Usage> usage) {
    Parser p = new Parser(true);
    Map<String, Operator> temp = p.getFactory().getIdentifier2OperatorMap();

    Iterator<String> it = temp.keySet().iterator();
    while (it.hasNext()) {
        String key = it.next().toString();
        String val = temp.get(key).getOperatorString();
        //System.out.println(key + "  " +val); //print the key-value entry
        opmap.put(key, val);
    }/*from  www.ja  va  2 s.  co  m*/

    if (StringFunction.contains("IF")) {
        SPARQLQuery = "SELECT ?result\n" + "WHERE {\n";
        String[] partitions = StringFunction.split("~");
        ArrayList<String> parcels = new ArrayList<String>();
        int counter = 0;
        for (String part : partitions) {
            String[] data = part.split(";");
            String cond = data[0].replace("IF", "");
            cond = cond.replace("ELSEIF", "");
            cond = cond.replace("ELSE", "");
            String form = data[1];
            //System.out.println("Cond:  "+cond+"\nForm:  "+form);

            stack.clear();
            infixstack.clear();
            usageVariables.clear();

            ASTNode objcond = p.parse(cond);
            convert(objcond);
            String fa = prefixToInfix(stack, infixstack);
            fa = fa.replace("[", "");
            fa = fa.replace("]", "");

            for (String s : usageVariables)//here we can detect the type of the variable by matching the variables name with the JAVA Object. Through the JAVA object we can see if its a Qualitative or Quantitative Value and treat it as such
            {
                if (!SPARQLQuery.contains(s)) {
                    if (!StringUtils.isAllUpperCase(s))
                        SPARQLQuery = SPARQLQuery + "\n:" + s + " price:hasValue ?" + s + "_instance .\n" + "?"
                                + s + "_instance gr:hasValue ?" + s + "_value .\n";
                    else {
                        //add syntax to deal with constant qualitative attributes like, WINDOWS will probably be 'windows' in the SPARQL Query
                    }
                }
            }

            stack.clear();
            infixstack.clear();
            usageVariables.clear();

            ASTNode objform = p.parse(form);
            convert(objform);
            String fb = prefixToInfix(stack, infixstack);
            fb = fb.replace("[", "");
            fb = fb.replace("]", "");

            for (String s : usageVariables)//here we can detect the type of the variable by matching the variables name with the JAVA Object. Through the JAVA object we can see if its a Qualitative or Quantitative Value and treat it as such
            {
                if (!SPARQLQuery.contains(s)) {
                    if (!StringUtils.isAllUpperCase(s))
                        SPARQLQuery = SPARQLQuery + "\n:" + s + " price:hasValue ?" + s + "_instance .\n" + "?"
                                + s + "_instance gr:hasValue ?" + s + "_value .\n";
                    else {
                        //add syntax to deal with constant qualitative attributes like, WINDOWS will probably be 'windows' in the SPARQL Query
                    }
                }
            }

            //BIND (IF(((?gbs > 1) && (?gbs <= (10 * 1024))), ((?gbs - 1) * ?price10), 0) AS ?priceA) .
            String parcel = "BIND(IF(( " + fa + " ),(" + fb + "),0) AS ?result" + counter++ + " ).";
            parcels.add(parcel);
            //System.out.println("Cond : "+fa+"\nForm:  "+fb+"\n***************************");

        }
        //  BIND ((((?priceA + ?priceB) + ?priceC) + ?priceD) AS ?price) .

        String sum = "";
        counter--;
        for (; counter >= 0; counter--) {
            if (counter == 0)
                sum = sum + "?result" + counter;
            else
                sum = sum + "?result" + counter + "+";
        }

        String lastParcel = "BIND((" + sum + ") AS ?result ) .";

        for (String s : parcels)
            SPARQLQuery = SPARQLQuery + s + "\n";

        SPARQLQuery = SPARQLQuery + lastParcel + "\n";
        SPARQLQuery = SPARQLQuery + "\n}\n";

        //System.out.println(SPARQLQuery);
    } else {
        ASTNode obj = p.parse(StringFunction);//insert the mathematical formula here
        convert(obj);

        SPARQLQuery = "SELECT ?result\n" + "WHERE {\n";
        for (String s : usageVariables)//here we can detect the type of the variable by matching the variables name with the JAVA Object. Through the JAVA object we can see if its a Qualitative or Quantitative Value and treat it as such
        {
            if (!StringUtils.isAllUpperCase(s))
                SPARQLQuery = SPARQLQuery + "\n:" + s + " price:hasValue ?" + s + "_instance .\n" + "?" + s
                        + "_instance gr:hasValue ?" + s + "_value .\n";
            else {

            }
            //add syntax to deal with constant qualitative attributes like, WINDOWS will probably be 'windows' in the SPARQL Query
        }

        String f = prefixToInfix(stack, infixstack);
        f = f.replace("[", "");
        f = f.replace("]", "");
        SPARQLQuery = SPARQLQuery + "\nBIND((" + f + ") AS ?result  ) .\n" + "}";
    }
}

From source file:Heuristics.TermLevelHeuristics.java

public boolean isFirstLetterCapitalized(String termOrig) {
    return (StringUtils.isAllUpperCase(StringUtils.left(termOrig, 1))) ? true : false;
}

From source file:edu.toronto.cs.cidb.ncbieutils.NCBIEUtilsAccessService.java

private static String fixCase(String text) {
    if (text == null || text.length() == 0) {
        return "";
    }/*www  . j av a2s .c  o m*/
    if (StringUtils.isAllUpperCase(text.replaceAll("[^a-zA-Z]", ""))) {
        return StringUtils.capitalize(text.toLowerCase());
    }
    return text;
}

From source file:FunctionParser.MathExp2SPARQL.java

public IExpr convert(ASTNode node) throws ConversionException {

    if (node == null) {
        return null;
    }//ww  w.  j  a va  2s .  c  o m

    if (node instanceof FunctionNode) { //function node
        final FunctionNode functionNode = (FunctionNode) node;
        final IAST ast = F.ast(convert((ASTNode) functionNode.get(0)));
        for (int i = 1; i < functionNode.size(); i++) {
            ast.add(convert((ASTNode) functionNode.get(i)));
        }
        // code below
        return ast;
    }

    if (node instanceof FractionNode) { //fraction node
        FractionNode fr = (FractionNode) node;
        //System.out.println(node.toString());
        stack.add("(" + node.toString() + ")");
        //         if (fr.isSign()) {
        //            return F.fraction((IInteger) convert(fr.getNumerator()),(IInteger) convert(fr.getDenominator())).negate(); //only need the fraction as a string
        //         }
        //         return F.fraction(
        //               (IInteger) convert(((FractionNode) node).getNumerator()),(IInteger) convert(((FractionNode) node).getDenominator()));
    }

    if (node instanceof PatternNode) { //pattern node
        final PatternNode pn = (PatternNode) node;
        return F.pattern((ISymbol) convert(pn.getSymbol()), convert(pn.getConstraint()));
    }

    if (node instanceof SymbolNode) {//symbol node
        if (SYMBOLS_MAP.containsKey(node.getString().toLowerCase())) {
            //System.out.println("Operator - " + node.getString());
            stack.add(opmap.get(node.toString()));
        } else {
            //System.out.println("Variable - " + node.getString());
            if (StringUtils.isAllUpperCase(node.getString()))
                stack.add(node.getString());
            else
                stack.add("?" + node.getString().concat("_value"));

            usageVariables.add(node.getString());
        }
        return F.symbol(node.getString());

    }

    if (node instanceof IntegerNode) { // integer node
        final IntegerNode integerNode = (IntegerNode) node;
        final String iStr = integerNode.getString();
        if (iStr != null) {
            //System.out.println("IntegerNode - "+F.integer(iStr, integerNode.getNumberFormat()));
            stack.add("" + F.integer(iStr, integerNode.getNumberFormat()));
            return F.integer(iStr, integerNode.getNumberFormat());
        }
        //System.out.println("IntegerNode - " + integerNode.getIntValue());
        stack.add("" + integerNode.getIntValue());
        return F.integer(integerNode.getIntValue());
    }

    if (node instanceof StringNode) { //string node
        //System.out.println("StringNode - " + node.getString());
        return F.stringx(node.getString());
    }

    if (node instanceof FloatNode) { //float node
        //System.out.println("FloatNode - " + node.getString());
        stack.add(node.getString());
        return F.num(node.getString());
    }

    return F.symbol(node.toString());
}

From source file:Heuristics.TermLevelHeuristics.java

public boolean isAllCaps(String termOrig) {
    String temp = termOrig.replaceAll(" ", "").trim();
    return (StringUtils.isAllUpperCase(temp)) ? true : false;
}

From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java

@Override
public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) {
    entityScoreMap = new HashMap<String, EntityScores>();
    selectedEntities = new HashSet<String>();
    Multiset<String> entityFrequencyMultiset = HashMultiset.create();

    EntityMatchList entities = sml.getEntities();
    String inputText = localParams.getParams().get("text");
    String algorithm = Property.getInstance().get("algorithm");

    String ambigious = Property.getInstance().get("algorithm.ambigious");

    List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true);
    List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText);
    Multiset<String> inputTokensMultiset = HashMultiset.create();
    for (Token token : inputTokens) {
        inputTokensMultiset.add(token.getMorphText());
    }//from  w  ww  . j a v  a  2s .c o  m

    Multiset<String> domainMultiset = HashMultiset.create();
    Multiset<String> typeMultiset = HashMultiset.create();
    HashMap<String, Double> entitySimMap = new HashMap<String, Double>();
    // if (printCandidateEntities) {
    // printEntities(entities);
    // }
    HashSet<String> words = new HashSet<String>();
    Multiset<String> leskWords = HashMultiset.create();

    // first pass for finding number of types and domains
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        if (!entityFrequencyMultiset.contains(id)) {
            entityFrequencyMultiset.add(id);
            Entity entity = em.getEntity();
            words.add(entity.getShingle().getText());
            String type = entity.getPage().getType();
            if (type != null && type.length() > 0) {
                typeMultiset.add(type);
            }
            String domain = entity.getPage().getDomain();
            if (domain != null && domain.length() > 0) {
                domainMultiset.add(domain);
            }

            String desc = entity.getPage().getDescription();
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                leskWords.add(token.getMorphText());
            }

        } else {
            entityFrequencyMultiset.add(id);
        }
    }

    int maxDomainCount = 0;
    for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) {
        maxDomainCount = domainMultiset.count(domain);
        break;
    }
    int maxTypeCount = 0;
    for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) {
        maxTypeCount = typeMultiset.count(type);
        break;
    }

    double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0,
            maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0,
            maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0;
    // second pass compute similarities between entities in a window
    int currentSpotIndex = -1;
    SpotMatch currentSpot = null;
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        SpotMatch spot = em.getSpot();
        if (currentSpot == null || spot != currentSpot) {
            currentSpotIndex++;
            currentSpot = spot;
        }

        String id = em.getId();
        Entity entity = entities.get(i).getEntity();
        EntityPage page = entities.get(i).getEntity().getPage();
        String domain = page.getDomain();
        String type = page.getType();
        Shingle shingle = entity.getShingle();

        /* windowing algorithms stars */
        int left = currentSpotIndex - window;
        int right = currentSpotIndex + window;
        if (left < 0) {
            right -= left;
            left = 0;
        }
        if (right > sml.size()) {
            left += (sml.size()) - right;
            right = sml.size();
            if (left < 0) {
                left = 0;
            }
        }

        double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0,
                wordvecLinksScore = 0;
        for (int j = left; j < right; j++) {
            SpotMatch sm2 = sml.get(j);
            EntityMatchList entities2 = sm2.getEntities();
            for (EntityMatch em2 : entities2) {
                String id2 = em2.getId();
                EntityPage page2 = em2.getEntity().getPage();
                int counter = 0;
                if (!ambigious.equals("true")) {
                    for (EntityMatch entityMatch : entities2) {
                        if (entityMatch.getId().startsWith("w")) {
                            counter++;
                        }
                    }
                }

                if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot()
                        && !id.equals(id2)) {
                    // Link Similarity calculation starts
                    double linkSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("link" + id + id2)) {
                            linkSim = entitySimMap.get("link" + id + id2);
                        } else {
                            HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" "));
                            HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" "));
                            linkSim = JaccardCalculator.calculateSimilarity(set1, set2);
                            entitySimMap.put("link" + id + id2, linkSim);
                        }
                        linkScore += linkSim;
                        // Link Similarity calculation ends
                    }
                    // Entity embedding similarity calculation starts
                    double eeSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("ee" + id + id2)) {
                            eeSim = entitySimMap.get("ee" + id + id2);
                        } else {
                            eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2);
                            entitySimMap.put("ee" + id + id2, eeSim);
                        }
                        hashInfoboxScore += eeSim;
                    }
                    double w2veclinksSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("wl" + id + id2)) {
                            w2veclinksSim = entitySimMap.get("wl" + id + id2);
                        } else {
                            w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(),
                                    page2.getWord2vec());
                            entitySimMap.put("wl" + id + id2, w2veclinksSim);
                        }
                        wordvecLinksScore += w2veclinksSim;
                    }

                    // Entity embedding similarity calculation ends

                    // Description word2vec similarity calculation
                    // starts
                    double word2vecSim = 0;

                    if (entitySimMap.containsKey("w2v" + id + id2)) {
                        word2vecSim = entitySimMap.get("w2v" + id + id2);
                    } else {
                        word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(),
                                page.getDword2vec());
                        entitySimMap.put("w2v" + id + id2, word2vecSim);
                    }
                    wordvecDescriptionLocalScore += word2vecSim;
                    // Description word2vec similarity calculation ends

                    // Description autoencoder similarity calculation
                    // starts
                    double autoVecSim = 0;

                    if (entitySimMap.containsKey("a2v" + id + id2)) {
                        autoVecSim = entitySimMap.get("a2v" + id + id2);
                    } else {
                        autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(),
                                page.getDautoencoder());
                        entitySimMap.put("a2v" + id + id2, autoVecSim);
                    }
                    hashDescriptionScore += autoVecSim;
                    // Description autoencoder similarity calculation
                    // ends

                }
            }
        }
        if (linkScore > maxLinkScore) {
            maxLinkScore = linkScore;
        }
        if (hashInfoboxScore > maxHashInfoboxScore) {
            maxHashInfoboxScore = hashInfoboxScore;
        }
        if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) {
            maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore;
        }
        if (hashDescriptionScore > maxHashDescriptionScore) {
            maxHashDescriptionScore = hashDescriptionScore;
        }
        if (wordvecLinksScore > maxWordvecLinksScore) {
            maxWordvecLinksScore = wordvecLinksScore;
        }

        /* windowing algorithms ends */

        double domainScore = 0;
        if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) {
            domainScore = (double) domainMultiset.count(domain) / maxDomainCount;
        }
        double typeScore = 0;
        if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) {
            typeScore = (double) typeMultiset.count(type) / maxTypeCount;
        }
        if (typeBlackList.contains(type)) {
            typeScore /= 10;
        }

        double typeContentScore = 0;
        if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) {
            typeContentScore = 1;
        }

        double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(),
                entity.getShingle().getSentence());

        double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector,
                page.getDword2vec());
        if (wordvecDescriptionScore > maxWordvectorAverage) {
            maxWordvectorAverage = wordvecDescriptionScore;
        }

        double suffixScore = 0;

        if (type != null && type.length() > 0) {
            Set<String> suffixes = new HashSet<String>();
            String t = entity.getTitle().toLowerCase(new Locale("tr", "TR"));

            for (int x = 0; x < entities.size(); x++) {
                EntityMatch e2 = entities.get(x);
                if (e2.getId().equals(entity.getId())) {
                    suffixes.add(e2.getMention());
                }
            }
            suffixes.remove(t);
            suffixes.remove(entity.getTitle());
            // String inputTextLower = inputText.toLowerCase(new
            // Locale("tr",
            // "TR"));
            // while (inputTextLower.contains(t)) {
            // int start = inputTextLower.indexOf(t);
            // int end = inputTextLower.indexOf(" ", start + t.length());
            // if (end > start) {
            // String suffix = inputTextLower.substring(start, end);
            // // .replaceAll("\\W", "");
            // if (suffix.contains("'")
            // || (Zemberek.getInstance().hasMorph(suffix)
            // && !suffix.equals(t) && suffix.length() > 4)) {
            // suffixes.add(suffix);
            // }
            // inputTextLower = inputTextLower.substring(end);
            // } else {
            // break;
            // }
            // }
            if (suffixes.size() >= minSuffix) {
                for (String suffix : suffixes) {
                    double sim = gd.calculateSimilarity(suffix, type);
                    suffixScore += sim;
                }
            }
        }

        // String entitySuffix = page.getSuffix();
        // String[] inputSuffix = shingle.getSuffix().split(" ");
        // for (int j = 0; j < inputSuffix.length; j++) {
        // if (entitySuffix.contains(inputSuffix[j])) {
        // suffixScore += 0.25f;
        // }
        // }

        if (suffixScore > maxSuffixScore) {
            maxSuffixScore = suffixScore;
        }
        // if (id.equals("w691538")) {
        // LOGGER.info("");
        // }
        double letterCaseScore = 0;
        int lc = page.getLetterCase();
        if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) {
            letterCaseScore = 1;
        }

        double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(),
                Zemberek.removeAfterSpostrophe(em.getMention()));

        double popularityScore = page.getRank();
        if (id.startsWith("w")) {
            popularityScore = Math.log10(popularityScore + 1);
            if (popularityScore > maxPopularityScore) {
                maxPopularityScore = popularityScore;
            }
        }

        double leskScore = 0, simpleLeskScore = 0;

        String desc = em.getEntity().getPage().getDescription();
        if (desc != null) {
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                if (inputTokensMultiset.contains(token.getMorphText())
                        && !TurkishNLP.isStopWord(token.getMorphText())) {
                    simpleLeskScore += inputTokensMultiset.count(token.getMorphText());
                }
                if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) {
                    leskScore += leskWords.count(token.getMorphText());
                }

            }
            leskScore /= Math.log(tokens.size() + 1);
            simpleLeskScore /= Math.log(tokens.size() + 1);
            if (leskScore > maxLeskScore) {
                maxLeskScore = leskScore;
            }
            if (simpleLeskScore > maxSimpleLeskScore) {
                maxSimpleLeskScore = simpleLeskScore;
            }

            if (!entityScoreMap.containsKey(id)) {
                EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore,
                        suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore,
                        hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore,
                        wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore);
                entityScoreMap.put(id, scores);
            } else {
                EntityScores entityScores = entityScoreMap.get(id);
                entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2);
                entityScores.setHashDescriptionScore(
                        (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2);
                entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2);
                entityScores.setWordvecDescriptionLocalScore(
                        (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2);
                entityScores
                        .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2);
                entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2);

            }

        }
    }
    /* normalization and total score calculation starts */
    Set<String> set = new HashSet<String>();
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores entityScores = entityScoreMap.get(id);
        if (set.contains(id)) {
            continue;
        }
        if (id.startsWith("w")) {
            if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) {
                entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore);
            }
            if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) {
                entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore);
            }
            if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) {
                entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore);
            }
            if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) {
                entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore);
            }
        }
        if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) {
            entityScores.setWordvecDescriptionLocalScore(
                    entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore);
        }
        if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) {
            entityScores
                    .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore);
        }
        if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) {
            entityScores.setWordvecDescriptionScore(
                    entityScores.getWordvecDescriptionScore() / maxWordvectorAverage);
        }
        if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) {
            entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore);
        }
        if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) {
            entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore);
        }
        if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) {
            entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore);
        }
        set.add(id);
    }

    LOGGER.info("\t"
            + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription");
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores e = entityScoreMap.get(id);
        double wikiScore = 0;
        if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        }
        // if(id.equals("w508792")){
        // LOGGER.info("");
        // }
        double totalScore = wikiScore + e.getPopularityScore() * popularityWeight
                + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight
                + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight
                + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight
                + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight
                + e.getHashDescriptionScore() * hashDescriptionWeight
                + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight
                + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight;
        if (ranklib == true) {
            totalScore = RankLib.getInstance().score(e);
        }

        if (em.getEntity().getPage().getUrlTitle().contains("(")) {
            totalScore /= 2;
        }
        em.setScore(totalScore);
        e.setScore(totalScore);

        LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t"
                + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t"
                + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t"
                + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t"
                + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t"
                + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t"
                + e.getDomainScore() * domainWeight + "\t"
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t"
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t"
                + e.getHashDescriptionScore() * hashDescriptionWeight + "\t"
                + e.getHashInfoboxScore() * hashInfoboxWeight + "\t"
                + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t"
                + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t"
                + em.getEntity().getPage().getDescription());
    }

    // if (annotateEntities) {
    // annotateEntities(localParams.getParams().get("originalText"), sml);
    // }

    EntityMatchList eml = new EntityMatchList();
    for (SpotMatch match : sml) {
        EntityMatchList list = match.getEntities();
        if (!list.isEmpty()) {
            list.sort();
            eml.add(list.get(0));
            selectedEntities.add(list.get(0).getId());
        }
    }
    return eml;
}

From source file:org.aksw.simba.bengal.triple2nl.converter.SimpleIRIConverter.java

private String splitCamelCase(String s) {
    // we only split if it contains a vowel
    if (!(s.matches(".*[aeiou].*"))) {
        return s;
    }//from   ww w .j  a  v a 2  s  . c  o  m

    StringBuilder sb = new StringBuilder();
    for (String token : s.split(" ")) {
        String[] tokenSplit = StringUtils.splitByCharacterTypeCamelCase(token);

        String noVowels = "";
        for (String t : tokenSplit) {
            if (t.matches(".*[aeiou].*") || !StringUtils.isAllUpperCase(t)) {
                if (!noVowels.isEmpty()) {
                    sb.append(noVowels).append(" ");
                    noVowels = "";
                }
                sb.append(t).append(" ");
            } else {
                noVowels += t;
            }
            // sb = new StringBuilder(sb.toString().trim());
        }
        sb.append(noVowels);
        // sb.append(" ");
    }
    return sb.toString().trim();
    // return s.replaceAll(
    // String.format("%s|%s|%s",
    // "(?<=[A-Z])(?=[A-Z][a-z])",
    // "(?<=[^A-Z])(?=[A-Z])",
    // "(?<=[A-Za-z])(?=[^A-Za-z])"
    // ),
    // " "
    // );
}