Example usage for java.lang Double compare

Introduction

In this page you can find the example usage for java.lang Double compare.

Prototype

public static int compare(double d1, double d2)

Source Link

Document

Compares the two specified double values.

Usage

From source file:org.nd4j.linalg.factory.Nd4j.java

/**
 * Sort an ndarray along a particular dimension
 *
 * @param ndarray   the ndarray to sort//from   w ww.java  2 s.c  o  m
 * @param dimension the dimension to sort
 * @return the sorted ndarray
 */
public static IComplexNDArray sort(IComplexNDArray ndarray, int dimension, boolean ascending) {
    for (int i = 0; i < ndarray.vectorsAlongDimension(dimension); i++) {
        IComplexNDArray vec = ndarray.vectorAlongDimension(i, dimension);
        IComplexNumber[] data = new IComplexNumber[vec.length()];
        for (int j = 0; j < vec.length(); j++) {
            data[j] = vec.getComplex(j);
        }
        if (ascending)
            Arrays.sort(data, new Comparator<IComplexNumber>() {
                @Override
                public int compare(IComplexNumber o1, IComplexNumber o2) {
                    return Double.compare(o1.asDouble().absoluteValue().doubleValue(),
                            o2.asDouble().absoluteValue().doubleValue());
                }
            });

        else
            Arrays.sort(data, new Comparator<IComplexNumber>() {
                @Override
                public int compare(IComplexNumber o1, IComplexNumber o2) {
                    return -Double.compare(o1.asDouble().absoluteValue().doubleValue(),
                            o2.asDouble().absoluteValue().doubleValue());
                }
            });

        for (int j = 0; j < vec.length(); j++)
            vec.putScalar(j, data[j]);

    }

    return ndarray;
}

From source file:structuredPredictionNLG.SFX.java

/**
 *
 * @param classifierAttrs//from   ww w  . j a v a  2s  .  c o  m
 * @param classifierWords
 * @param testingData
 * @param epoch
 * @return
 */
@Override
public Double evaluateGeneration(HashMap<String, JAROW> classifierAttrs,
        HashMap<String, HashMap<String, JAROW>> classifierWords, ArrayList<DatasetInstance> testingData,
        int epoch) {
    System.out.println("Evaluate argument generation ");

    ArrayList<ScoredFeaturizedTranslation<IString, String>> generations = new ArrayList<>();
    HashMap<DatasetInstance, ArrayList<Action>> generationActions = new HashMap<>();
    ArrayList<ArrayList<Sequence<IString>>> finalReferences = new ArrayList<>();
    HashMap<DatasetInstance, ArrayList<String>> finalReferencesWordSequences = new HashMap<>();
    HashMap<DatasetInstance, String> predictedWordSequences_overAllPredicates = new HashMap<>();
    ArrayList<String> allPredictedWordSequences = new ArrayList<>();
    ArrayList<String> allPredictedMRStr = new ArrayList<>();
    ArrayList<ArrayList<String>> allPredictedReferences = new ArrayList<>();
    HashMap<String, Double> attrCoverage = new HashMap<>();

    HashMap<String, HashSet<String>> abstractMRsToMRs = new HashMap<>();

    for (DatasetInstance di : testingData) {
        String predicate = di.getMeaningRepresentation().getPredicate();
        ArrayList<Action> predictedActionList = new ArrayList<>();
        ArrayList<Action> predictedWordList = new ArrayList<>();

        //PHRASE GENERATION EVALUATION
        String predictedAttr = "";
        ArrayList<String> predictedAttrValues = new ArrayList<>();

        HashSet<String> attrValuesToBeMentioned = new HashSet<>();
        HashSet<String> attrValuesAlreadyMentioned = new HashSet<>();
        for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) {
            for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) {
                attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase());
            }
        }
        if (attrValuesToBeMentioned.isEmpty()) {
            attrValuesToBeMentioned.add("empty=empty");
        }
        while (!predictedAttr.equals(Action.TOKEN_END)
                && predictedAttrValues.size() < getMaxContentSequenceLength()) {
            if (!predictedAttr.isEmpty()) {
                attrValuesToBeMentioned.remove(predictedAttr);
            }
            if (!attrValuesToBeMentioned.isEmpty()) {
                Instance attrTrainingVector = createContentInstance(predicate, "@TOK@", predictedAttrValues,
                        attrValuesAlreadyMentioned, attrValuesToBeMentioned, di.getMeaningRepresentation(),
                        getAvailableContentActions());

                if (attrTrainingVector != null) {
                    Prediction predictAttr = classifierAttrs.get(predicate).predict(attrTrainingVector);
                    if (predictAttr.getLabel() != null) {
                        predictedAttr = predictAttr.getLabel().trim();

                        if (!classifierAttrs.get(predicate).getCurrentWeightVectors().keySet()
                                .containsAll(di.getMeaningRepresentation().getAttributeValues().keySet())) {
                            System.out.println("MR ATTR NOT IN CLASSIFIERS");
                            System.out
                                    .println(classifierAttrs.get(predicate).getCurrentWeightVectors().keySet());
                        }
                        String predictedValue = "";
                        if (!predictedAttr.equals(Action.TOKEN_END)) {
                            predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned);

                            HashSet<String> rejectedAttrs = new HashSet<>();
                            while (predictedValue.isEmpty() && (!predictedAttr.equals(Action.TOKEN_END)
                                    || (predictedAttrValues.isEmpty()
                                            && classifierAttrs.get(predicate).getCurrentWeightVectors().keySet()
                                                    .containsAll(di.getMeaningRepresentation()
                                                            .getAttributeValues().keySet())))) {
                                rejectedAttrs.add(predictedAttr);

                                predictedAttr = Action.TOKEN_END;
                                double maxScore = -Double.MAX_VALUE;
                                for (String attr : predictAttr.getLabel2Score().keySet()) {
                                    if (!rejectedAttrs.contains(attr) && (Double
                                            .compare(predictAttr.getLabel2Score().get(attr), maxScore) > 0)) {
                                        maxScore = predictAttr.getLabel2Score().get(attr);
                                        predictedAttr = attr;
                                    }
                                }
                                if (!predictedAttr.equals(Action.TOKEN_END)) {
                                    predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned);
                                }
                            }
                        }
                        if (!predictedAttr.equals(Action.TOKEN_END)) {
                            predictedAttr += "=" + predictedValue;
                        }
                        predictedAttrValues.add(predictedAttr);
                        if (!predictedAttr.isEmpty()) {
                            attrValuesAlreadyMentioned.add(predictedAttr);
                            attrValuesToBeMentioned.remove(predictedAttr);
                        }
                    } else {
                        predictedAttr = Action.TOKEN_END;
                        predictedAttrValues.add(predictedAttr);
                    }
                } else {
                    predictedAttr = Action.TOKEN_END;
                    predictedAttrValues.add(predictedAttr);
                }
            } else {
                predictedAttr = Action.TOKEN_END;
                predictedAttrValues.add(predictedAttr);
            }
        }

        //WORD SEQUENCE EVALUATION
        predictedAttr = "";
        ArrayList<String> predictedAttributes = new ArrayList<>();

        attrValuesToBeMentioned = new HashSet<>();
        attrValuesAlreadyMentioned = new HashSet<>();
        HashMap<String, ArrayList<String>> valuesToBeMentioned = new HashMap<>();
        for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) {
            for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) {
                attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase());
            }
            valuesToBeMentioned.put(attribute,
                    new ArrayList<>(di.getMeaningRepresentation().getAttributeValues().get(attribute)));
        }
        if (attrValuesToBeMentioned.isEmpty()) {
            attrValuesToBeMentioned.add("empty=empty");
        }
        HashSet<String> attrValuesToBeMentionedCopy = new HashSet<>(attrValuesToBeMentioned);

        int a = -1;
        for (String attrValue : predictedAttrValues) {
            a++;
            if (!attrValue.equals(Action.TOKEN_END)) {
                String attribute = attrValue.split("=")[0];
                predictedAttributes.add(attrValue);

                //GENERATE PHRASES
                if (!attribute.equals(Action.TOKEN_END)) {
                    if (classifierWords.get(predicate).containsKey(attribute)) {
                        ArrayList<String> nextAttributesForInstance = new ArrayList<>(
                                predictedAttrValues.subList(a + 1, predictedAttrValues.size()));
                        String predictedWord = "";

                        boolean isValueMentioned = false;
                        String valueTBM = "";
                        if (attrValue.contains("=")) {
                            valueTBM = attrValue.substring(attrValue.indexOf('=') + 1);
                        }
                        if (valueTBM.isEmpty()) {
                            isValueMentioned = true;
                        }
                        ArrayList<String> subPhrase = new ArrayList<>();
                        while (!predictedWord.equals(Action.TOKEN_END)
                                && predictedWordList.size() < getMaxWordSequenceLength()) {
                            ArrayList<String> predictedAttributesForInstance = new ArrayList<>();
                            for (int i = 0; i < predictedAttributes.size() - 1; i++) {
                                predictedAttributesForInstance.add(predictedAttributes.get(i));
                            }
                            if (!predictedAttributes.get(predictedAttributes.size() - 1).equals(attrValue)) {
                                predictedAttributesForInstance
                                        .add(predictedAttributes.get(predictedAttributes.size() - 1));
                            }
                            Instance wordTrainingVector = createWordInstance(predicate,
                                    new Action("@TOK@", attrValue), predictedAttributesForInstance,
                                    predictedActionList, nextAttributesForInstance, attrValuesAlreadyMentioned,
                                    attrValuesToBeMentioned, isValueMentioned,
                                    getAvailableWordActions().get(predicate));

                            if (wordTrainingVector != null && classifierWords.get(predicate) != null) {
                                if (classifierWords.get(predicate).get(attribute) != null) {
                                    Prediction predictWord = classifierWords.get(predicate).get(attribute)
                                            .predict(wordTrainingVector);
                                    if (predictWord.getLabel() != null) {
                                        predictedWord = predictWord.getLabel().trim();
                                        while (predictedWord.equals(Action.TOKEN_END)
                                                && !predictedActionList.isEmpty()
                                                && predictedActionList.get(predictedActionList.size() - 1)
                                                        .getWord().equals(Action.TOKEN_END)) {
                                            double maxScore = -Double.MAX_VALUE;
                                            for (String word : predictWord.getLabel2Score().keySet()) {
                                                if (!word.equals(Action.TOKEN_END) && (Double.compare(
                                                        predictWord.getLabel2Score().get(word),
                                                        maxScore) > 0)) {
                                                    maxScore = predictWord.getLabel2Score().get(word);
                                                    predictedWord = word;
                                                }
                                            }
                                        }

                                        predictedActionList.add(new Action(predictedWord, attrValue));
                                        if (!predictedWord.equals(Action.TOKEN_START)
                                                && !predictedWord.equals(Action.TOKEN_END)) {
                                            subPhrase.add(predictedWord);
                                            predictedWordList.add(new Action(predictedWord, attrValue));
                                        }
                                    } else {
                                        predictedWord = Action.TOKEN_END;
                                        predictedActionList.add(new Action(predictedWord, attrValue));
                                    }
                                } else {
                                    predictedWord = Action.TOKEN_END;
                                    predictedActionList.add(new Action(predictedWord, attrValue));
                                }

                            }
                            if (!isValueMentioned) {
                                if (!predictedWord.equals(Action.TOKEN_END)) {
                                    if (predictedWord.startsWith(Action.TOKEN_X)
                                            && (valueTBM.matches("\"[xX][0-9]+\"")
                                                    || valueTBM.matches("[xX][0-9]+")
                                                    || valueTBM.startsWith(Action.TOKEN_X))) {
                                        isValueMentioned = true;
                                    } else if (!predictedWord.startsWith(Action.TOKEN_X)
                                            && !(valueTBM.matches("\"[xX][0-9]+\"")
                                                    || valueTBM.matches("[xX][0-9]+")
                                                    || valueTBM.startsWith(Action.TOKEN_X))) {
                                        String valueToCheck = valueTBM;
                                        if (valueToCheck.equals("no") || valueToCheck.equals("yes")
                                                || valueToCheck.equals("yes or no")
                                                || valueToCheck.equals("none")
                                                //|| valueToCheck.equals("dont_care")
                                                || valueToCheck.equals("empty")) {
                                            if (attribute.contains("=")) {
                                                valueToCheck = attribute.replace("=", ":");
                                            } else {
                                                valueToCheck = attribute + ":" + valueTBM;
                                            }
                                        }
                                        if (!valueToCheck.equals("empty:empty")
                                                && getValueAlignments().containsKey(valueToCheck)) {
                                            for (ArrayList<String> alignedStr : getValueAlignments()
                                                    .get(valueToCheck).keySet()) {
                                                if (endsWith(subPhrase, alignedStr)) {
                                                    isValueMentioned = true;
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                }
                                if (isValueMentioned) {
                                    attrValuesAlreadyMentioned.add(attrValue);
                                    attrValuesToBeMentioned.remove(attrValue);
                                }
                            }
                            String mentionedAttrValue = "";
                            if (!predictedWord.startsWith(Action.TOKEN_X)) {
                                for (String attrValueTBM : attrValuesToBeMentioned) {
                                    if (attrValueTBM.contains("=")) {
                                        String value = attrValueTBM.substring(attrValueTBM.indexOf('=') + 1);
                                        if (!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+")
                                                || value.startsWith(Action.TOKEN_X))) {
                                            String valueToCheck = value;
                                            if (valueToCheck.equals("no") || valueToCheck.equals("yes")
                                                    || valueToCheck.equals("yes or no")
                                                    || valueToCheck.equals("none")
                                                    //|| valueToCheck.equals("dont_care")
                                                    || valueToCheck.equals("empty")) {
                                                valueToCheck = attrValueTBM.replace("=", ":");
                                            }
                                            if (!valueToCheck.equals("empty:empty")
                                                    && getValueAlignments().containsKey(valueToCheck)) {
                                                for (ArrayList<String> alignedStr : getValueAlignments()
                                                        .get(valueToCheck).keySet()) {
                                                    if (endsWith(subPhrase, alignedStr)) {
                                                        mentionedAttrValue = attrValueTBM;
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                            if (!mentionedAttrValue.isEmpty()) {
                                attrValuesAlreadyMentioned.add(mentionedAttrValue);
                                attrValuesToBeMentioned.remove(mentionedAttrValue);
                            }
                        }
                        if (predictedWordList.size() >= getMaxWordSequenceLength() && !predictedActionList
                                .get(predictedActionList.size() - 1).getWord().equals(Action.TOKEN_END)) {
                            predictedWord = Action.TOKEN_END;
                            predictedActionList.add(new Action(predictedWord, attrValue));
                        }
                    } else {
                        String predictedWord = Action.TOKEN_END;
                        predictedActionList.add(new Action(predictedWord, attrValue));
                    }
                }
            }
        }
        ArrayList<String> predictedAttrs = new ArrayList<>();
        predictedAttrValues.forEach((attributeValuePair) -> {
            predictedAttrs.add(attributeValuePair.split("=")[0]);
        });

        String predictedWordSequence = postProcessWordSequence(di, predictedActionList);

        ArrayList<String> predictedAttrList = getPredictedAttrList(predictedActionList);
        if (attrValuesToBeMentionedCopy.size() != 0.0) {
            double missingAttrs = 0.0;
            missingAttrs = attrValuesToBeMentionedCopy.stream()
                    .filter((attr) -> (!predictedAttrList.contains(attr))).map((_item) -> 1.0)
                    .reduce(missingAttrs, (accumulator, _item) -> accumulator + _item);
            double attrSize = attrValuesToBeMentionedCopy.size();
            attrCoverage.put(predictedWordSequence, missingAttrs / attrSize);
        }

        allPredictedWordSequences.add(predictedWordSequence);
        allPredictedMRStr.add(di.getMeaningRepresentation().getMRstr());
        predictedWordSequences_overAllPredicates.put(di, predictedWordSequence);

        if (!abstractMRsToMRs.containsKey(di.getMeaningRepresentation().getAbstractMR())) {
            abstractMRsToMRs.put(di.getMeaningRepresentation().getAbstractMR(), new HashSet<String>());
        }
        abstractMRsToMRs.get(di.getMeaningRepresentation().getAbstractMR())
                .add(di.getMeaningRepresentation().getMRstr());

        Sequence<IString> translation = IStrings
                .tokenize(NISTTokenizer.tokenize(predictedWordSequence.toLowerCase()));
        ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>(translation, null,
                0);
        generations.add(tran);
        generationActions.put(di, predictedActionList);

        ArrayList<Sequence<IString>> references = new ArrayList<>();
        ArrayList<String> referencesStrings = new ArrayList<>();

        if (getPerformEvaluationOn().equals("valid") || getPerformEvaluationOn().equals("train")) {
            for (String ref : di.getEvaluationReferences()) {
                referencesStrings.add(ref);
                references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref)));
            }
        } else {
            references = wenEvaluationReferenceSequences.get(di.getMeaningRepresentation().getMRstr());
            referencesStrings = wenEvaluationReferences.get(di.getMeaningRepresentation().getMRstr());
            if (references == null) {
                references = new ArrayList<>();
                referencesStrings = new ArrayList<>();
                for (String ref : di.getEvaluationReferences()) {
                    referencesStrings.add(ref);
                    references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref)));
                }
            }
        }
        allPredictedReferences.add(referencesStrings);
        finalReferencesWordSequences.put(di, referencesStrings);
        finalReferences.add(references);
    }

    BLEUMetric BLEU = new BLEUMetric(finalReferences, 4, false);
    Double bleuScore = BLEU.score(generations);

    double finalCoverageError = 0.0;
    finalCoverageError = attrCoverage.values().stream().map((c) -> c).reduce(finalCoverageError,
            (accumulator, _item) -> accumulator + _item);
    finalCoverageError /= attrCoverage.size();
    for (int i = 0; i < allPredictedWordSequences.size(); i++) {
        double maxRouge = 0.0;
        String predictedWordSequence = allPredictedWordSequences.get(i).replaceAll("\\?", " \\? ")
                .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll("  ", " ")
                .trim();
        for (String ref : allPredictedReferences.get(i)) {
            double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4);
            if (rouge > maxRouge) {
                maxRouge = rouge;
            }
        }
        //System.out.println(allPredictedMRStr.get(i) + "\t" + maxRouge + "\t" + allPredictedWordSequences.get(i) + "\t" + refs);
    }

    double avgRougeScore = 0.0;
    String detailedRes = "";

    avgRougeScore = testingData.stream().map((di) -> {
        double maxRouge = 0.0;
        if (!finalReferencesWordSequences.containsKey(di)) {
            System.out.println(di.getMeaningRepresentation().getAbstractMR());
        }
        String predictedWordSequence = predictedWordSequences_overAllPredicates.get(di)
                .replaceAll("\\?", " \\? ").replaceAll(":", " : ").replaceAll("\\.", " \\. ")
                .replaceAll(",", " , ").replaceAll("  ", " ").trim();
        for (String ref : finalReferencesWordSequences.get(di)) {
            double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4);
            if (rouge > maxRouge) {
                maxRouge = rouge;
            }
        }
        return maxRouge;
    }).map((maxRouge) -> maxRouge).reduce(avgRougeScore, (accumulator, _item) -> accumulator + _item);
    System.out.println("BLEU: \t" + bleuScore);
    //System.out.println("g: " + generations);
    //System.out.println("attr: " + predictedAttrLists);
    //System.out.println("BLEU smooth: \t" + bleuSmoothScore);
    //System.out.println("g: " + generations);
    //System.out.println("attr: " + predictedAttrLists);
    //System.out.println("BLEU smooth: \t" + bleuSmoothScore);
    System.out.println("ROUGE: \t" + (avgRougeScore / allPredictedWordSequences.size()));
    System.out.println("COVERAGE ERROR: \t" + finalCoverageError);
    System.out.println("BRC: \t"
            + ((avgRougeScore / allPredictedWordSequences.size()) + bleuScore + (1.0 - finalCoverageError))
                    / 3.0);

    if (isCalculateResultsPerPredicate()) {
        ////////////////////////
        //ArrayList<String> bestPredictedStrings = new ArrayList<>();
        //ArrayList<String> bestPredictedStringsMRs = new ArrayList<>();
        double uniqueMRsInTestAndNotInTrainAllPredWordBLEU = 0.0;
        double uniqueMRsInTestAndNotInTrainAllPredWordROUGE = 0.0;
        double uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR = 0.0;
        double uniqueMRsInTestAndNotInTrainAllPredWordBRC = 0.0;

        detailedRes = "";
        ArrayList<DatasetInstance> abstractMRList = new ArrayList<>();
        HashSet<String> reportedAbstractMRs = new HashSet<>();
        testingData.stream()
                .filter((di) -> (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())))
                .map((di) -> {
                    reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR());
                    return di;
                }).forEachOrdered((di) -> {
                    boolean isInTraining = false;
                    for (DatasetInstance di2 : getTrainingData()) {
                        if (di2.getMeaningRepresentation().getAbstractMR()
                                .equals(di.getMeaningRepresentation().getAbstractMR())) {
                            isInTraining = true;
                        }
                    }
                    if (!isInTraining) {
                        for (DatasetInstance di2 : getValidationData()) {
                            if (di2.getMeaningRepresentation().getAbstractMR()
                                    .equals(di.getMeaningRepresentation().getAbstractMR())) {
                                isInTraining = true;
                            }
                        }
                    }
                    if (!isInTraining) {
                        abstractMRList.add(di);
                    }
                });
        for (DatasetInstance di : abstractMRList) {
            Double bestROUGE = -100.0;
            Double bestBLEU = -100.0;
            Double bestCover = -100.0;
            Double bestHarmonicMean = -100.0;
            String predictedString = predictedWordSequences_overAllPredicates.get(di);
            reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR());
            double maxRouge = 0.0;
            String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ")
                    .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll("  ", " ").trim();
            for (String ref : finalReferencesWordSequences.get(di)) {
                double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4);
                if (rouge > maxRouge) {
                    maxRouge = rouge;
                }
            }

            double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence,
                    finalReferencesWordSequences.get(di), 4);
            double cover = 1.0 - attrCoverage.get(predictedString);
            double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover);

            if (harmonicMean > bestHarmonicMean) {
                bestROUGE = maxRouge;
                bestBLEU = BLEUSmooth;
                bestCover = cover;
                bestHarmonicMean = harmonicMean;
            }

            uniqueMRsInTestAndNotInTrainAllPredWordBLEU += bestBLEU;
            uniqueMRsInTestAndNotInTrainAllPredWordROUGE += bestROUGE;
            uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR += bestCover;
            uniqueMRsInTestAndNotInTrainAllPredWordBRC += bestHarmonicMean;
        }
        uniqueMRsInTestAndNotInTrainAllPredWordBLEU /= abstractMRList.size();
        uniqueMRsInTestAndNotInTrainAllPredWordROUGE /= abstractMRList.size();
        uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR /= abstractMRList.size();
        uniqueMRsInTestAndNotInTrainAllPredWordBRC /= abstractMRList.size();
        System.out.println(
                "UNIQUE (NOT IN TRAIN) WORD ALL PRED BLEU: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBLEU);
        System.out.println(
                "UNIQUE (NOT IN TRAIN) WORD ALL PRED ROUGE: \t" + uniqueMRsInTestAndNotInTrainAllPredWordROUGE);
        System.out.println("UNIQUE (NOT IN TRAIN) WORD ALL PRED COVERAGE ERROR: \t"
                + (1.0 - uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR));
        System.out.println(
                "UNIQUE (NOT IN TRAIN) WORD ALL PRED BRC: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBRC);

        abstractMRList.forEach((di) -> {
            System.out.println(di.getMeaningRepresentation().getAbstractMR() + "\t"
                    + predictedWordSequences_overAllPredicates.get(di));
        });
        System.out.println("TOTAL SET SIZE: \t" + abstractMRList.size());
        //System.out.println(abstractMRList);  
        //System.out.println(detailedRes);
    }
    ArrayList<String> bestPredictedStrings = new ArrayList<>();
    ArrayList<String> bestPredictedStringsMRs = new ArrayList<>();
    double uniqueAllPredWordBLEU = 0.0;
    double uniqueAllPredWordROUGE = 0.0;
    double uniqueAllPredWordCOVERAGEERR = 0.0;
    double uniqueAllPredWordBRC = 0.0;

    HashSet<String> reportedAbstractMRs = new HashSet<>();
    for (DatasetInstance di : testingData) {
        if (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) {
            String bestPredictedString = "";
            Double bestROUGE = -100.0;
            Double bestBLEU = -100.0;
            Double bestCover = -100.0;
            Double bestHarmonicMean = -100.0;
            String predictedString = predictedWordSequences_overAllPredicates.get(di);
            reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR());
            double maxRouge = 0.0;
            String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ")
                    .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll("  ", " ").trim();
            for (String ref : finalReferencesWordSequences.get(di)) {
                double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4);
                if (rouge > maxRouge) {
                    maxRouge = rouge;
                }
            }

            double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence,
                    finalReferencesWordSequences.get(di), 4);
            double cover = 1.0 - attrCoverage.get(predictedString);
            double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover);

            if (harmonicMean > bestHarmonicMean) {
                bestPredictedString = predictedString;
                bestROUGE = maxRouge;
                bestBLEU = BLEUSmooth;
                bestCover = cover;
                bestHarmonicMean = harmonicMean;
            }
            bestPredictedStrings.add(bestPredictedString);
            bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr());

            uniqueAllPredWordBLEU += bestBLEU;
            uniqueAllPredWordROUGE += bestROUGE;
            uniqueAllPredWordCOVERAGEERR += bestCover;
            uniqueAllPredWordBRC += bestHarmonicMean;
        }
        //}
    }
    if (isCalculateResultsPerPredicate()) {
        uniqueAllPredWordBLEU /= reportedAbstractMRs.size();
        uniqueAllPredWordROUGE /= reportedAbstractMRs.size();
        uniqueAllPredWordCOVERAGEERR /= reportedAbstractMRs.size();
        uniqueAllPredWordBRC /= reportedAbstractMRs.size();
        System.out.println("UNIQUE WORD ALL PRED BLEU: \t" + uniqueAllPredWordBLEU);
        System.out.println("UNIQUE WORD ALL PRED ROUGE: \t" + uniqueAllPredWordROUGE);
        System.out.println("UNIQUE WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueAllPredWordCOVERAGEERR));
        System.out.println("UNIQUE WORD ALL PRED BRC: \t" + uniqueAllPredWordBRC);
        System.out.println(detailedRes);
        System.out.println("TOTAL: \t" + reportedAbstractMRs.size());

        ////////////////////////
        for (String predicate : getPredicates()) {
            detailedRes = "";
            bestPredictedStrings = new ArrayList<>();
            bestPredictedStringsMRs = new ArrayList<>();
            double uniquePredWordBLEU = 0.0;
            double uniquePredWordROUGE = 0.0;
            double uniquePredWordCOVERAGEERR = 0.0;
            double uniquePredWordBRC = 0.0;

            reportedAbstractMRs = new HashSet<>();
            for (DatasetInstance di : testingData) {
                if (di.getMeaningRepresentation().getPredicate().equals(predicate)
                        && !reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) {
                    String bestPredictedString = "";
                    Double bestROUGE = -100.0;
                    Double bestBLEU = -100.0;
                    Double bestCover = -100.0;
                    Double bestHarmonicMean = -100.0;

                    String predictedString = predictedWordSequences_overAllPredicates.get(di);
                    reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR());
                    double maxRouge = 0.0;
                    String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ")
                            .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ")
                            .replaceAll("  ", " ").trim();
                    for (String ref : finalReferencesWordSequences.get(di)) {
                        double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4);
                        if (rouge > maxRouge) {
                            maxRouge = rouge;
                        }
                    }

                    double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence,
                            finalReferencesWordSequences.get(di), 4);
                    double cover = 1.0 - attrCoverage.get(predictedString);
                    double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover);

                    if (harmonicMean > bestHarmonicMean) {
                        bestPredictedString = predictedString;
                        bestROUGE = maxRouge;
                        bestBLEU = BLEUSmooth;
                        bestCover = cover;
                        bestHarmonicMean = harmonicMean;
                    }
                    bestPredictedStrings.add(bestPredictedString);
                    bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr());

                    uniquePredWordBLEU += bestBLEU;
                    uniquePredWordROUGE += bestROUGE;
                    uniquePredWordCOVERAGEERR += bestCover;
                    uniquePredWordBRC += bestHarmonicMean;
                }
            }

            uniquePredWordBLEU /= reportedAbstractMRs.size();
            uniquePredWordROUGE /= reportedAbstractMRs.size();
            uniquePredWordCOVERAGEERR /= reportedAbstractMRs.size();
            uniquePredWordBRC /= reportedAbstractMRs.size();
            System.out.println("UNIQUE WORD " + predicate + " BLEU: \t" + uniquePredWordBLEU);
            System.out.println("UNIQUE WORD " + predicate + " ROUGE: \t" + uniquePredWordROUGE);
            System.out.println(
                    "UNIQUE WORD " + predicate + " COVERAGE ERROR: \t" + (1.0 - uniquePredWordCOVERAGEERR));
            System.out.println("UNIQUE WORD " + predicate + " BRC: \t" + uniquePredWordBRC);
            System.out.println(detailedRes);
            System.out.println("TOTAL " + predicate + ": \t" + reportedAbstractMRs.size());
        }
    }

    if (isCalculateResultsPerPredicate()) {
        BufferedWriter bw = null;
        File f = null;
        try {
            f = new File("results/random_SFX" + getDataset() + "TextsAfter" + (epoch) + "_"
                    + JLOLS.sentenceCorrectionFurtherSteps + "_" + JLOLS.p + "epochsTESTINGDATA.txt");
        } catch (NullPointerException e) {
        }

        try {
            bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f)));
        } catch (FileNotFoundException e) {
        }

        try {
            bw.write("BLEU:" + bleuScore);
            bw.write("\n");
        } catch (IOException e) {
        }
        for (int i = 0; i < bestPredictedStrings.size(); i++) {
            try {
                String mr = bestPredictedStringsMRs.get(i);
                bw.write("MR;" + mr.replaceAll(";", ",") + ";");
                if (getDataset().equals("hotel")) {
                    bw.write("LOLS_SFHOT;");
                } else {
                    bw.write("LOLS_SFRES;");
                }

                bw.write("\n");
            } catch (IOException e) {
            }
        }

        try {
            bw.close();
        } catch (IOException e) {
        }
    }
    return bleuScore;
}

From source file:org.hawkular.metrics.core.service.MetricsServiceITest.java

private <T extends Number> NumericBucketPoint createSingleBucket(List<? extends DataPoint<T>> combinedData,
        DateTime start, DateTime end) {/*ww  w  .ja  va2s.  co m*/
    T expectedMin = combinedData.stream()
            .min((x, y) -> Double.compare(x.getValue().doubleValue(), y.getValue().doubleValue())).get()
            .getValue();
    T expectedMax = combinedData.stream()
            .max((x, y) -> Double.compare(x.getValue().doubleValue(), y.getValue().doubleValue())).get()
            .getValue();
    PercentileWrapper expectedMedian = NumericDataPointCollector.createPercentile.apply(50.0);
    Mean expectedAverage = new Mean();
    Sum expectedSamples = new Sum();
    combinedData.stream().forEach(arg -> {
        expectedMedian.addValue(arg.getValue().doubleValue());
        expectedAverage.increment(arg.getValue().doubleValue());
        expectedSamples.increment(1);
    });

    return new NumericBucketPoint.Builder(start.getMillis(), end.getMillis()).setMin(expectedMin.doubleValue())
            .setMax(expectedMax.doubleValue()).setAvg(expectedAverage.getResult())
            .setMedian(expectedMedian.getResult())
            .setSamples(new Double(expectedSamples.getResult()).intValue()).build();
}

From source file:org.nd4j.linalg.factory.Nd4j.java

/**Sort (shuffle) the rows of a 2d array according to the value at a specified column.
 * Other than the order of the rows, each row is unmodified. Copy operation: original
 * INDArray is unmodified<br>//from www  . j  a va  2  s .  c  o m
 * So if sorting the following on values of column 2 (ascending):<br>
 * [a b 2]<br>
 * [c d 0]<br>
 * [e f -3]<br>
 * Then output is<br>
 * [e f -3]<br>
 * [c d 0]<br>
 * [a b 2]<br>
 * @param in 2d array to sort
 * @param colIdx The column to sort on
 * @param ascending true if smallest-to-largest; false if largest-to-smallest
 * @return
 */
public static INDArray sortRows(final INDArray in, final int colIdx, final boolean ascending) {
    if (in.rank() != 2)
        throw new IllegalArgumentException("Cannot sort rows on non-2d matrix");
    if (colIdx < 0 || colIdx >= in.columns())
        throw new IllegalArgumentException(
                "Cannot sort on values in column " + colIdx + ", nCols=" + in.columns());

    INDArray out = Nd4j.create(in.shape());
    int nRows = in.rows();
    ArrayList<Integer> list = new ArrayList<>(nRows);
    for (int i = 0; i < nRows; i++)
        list.add(i);
    Collections.sort(list, new Comparator<Integer>() {
        @Override
        public int compare(Integer o1, Integer o2) {
            if (ascending)
                return Double.compare(in.getDouble(o1, colIdx), in.getDouble(o2, colIdx));
            else
                return -Double.compare(in.getDouble(o1, colIdx), in.getDouble(o2, colIdx));
        }
    });
    for (int i = 0; i < nRows; i++) {
        out.putRow(i, in.getRow(list.get(i)));
    }
    return out;
}

From source file:gov.opm.scrd.batchprocessing.jobs.BatchProcessingJob.java

/**
 * Get log text of import status./*  w  w  w. j  ava  2  s  .  c om*/
 *
 * @param importStatus The import status.
 * @return The log text of import status.
 */
private String logImportStatus(ImportStatus importStatus) {
    Date now = new Date();
    StringBuilder sb = new StringBuilder(CRLF);
    sb.append("Service Credit imported a data file from the bank lockbox on ");
    sb.append(DateFormat.getDateInstance(DateFormat.LONG, Locale.US).format(now));
    sb.append(" at ");
    sb.append(DateFormat.getTimeInstance(DateFormat.LONG, Locale.US).format(now)).append(".").append(CRLF);
    sb.append("Lockbox data imported into the ").append(databaseString);
    sb.append(" database on server ").append(datasourceString);
    sb.append(" from computer ").append(workstationId).append(".");

    sb.append(CRLF).append(CRLF);

    sb.append("# Lines in file = ")
            .append(importStatus.getNumberDuplicateRecords() + importStatus.getNumberDiscreteRecords())
            .append(CRLF);

    if (importStatus.getNumberDiscreteRecords() > 0) {
        sb.append("# New Lines = ").append(importStatus.getNumberDiscreteRecords()).append(CRLF);
    }

    sb.append("# Duplicate Lines = ").append(importStatus.getNumberDuplicateRecords()).append(CRLF);
    sb.append(CRLF);

    int failedInserts = importStatus.getNumberBadChangeRecords() + importStatus.getNumberNoMatchChangeRecords()
            + importStatus.getNumberBadAchPayments() + importStatus.getNumberBadCheckPayments();

    int successfulInserts = importStatus.getNumberGoodChangeRecords() + importStatus.getNumberGoodAchPayments()
            + importStatus.getNumberGoodCheckPayments();

    sb.append("# Records Processed = ")
            .append(importStatus.getNumberBadDataRecords() + importStatus.getNumberGoodSummaryRecords()
                    + importStatus.getNumberBadSummaryRecords() + importStatus.getNumberDupeAchPayments()
                    + importStatus.getNumberDupeCheckPayments() + failedInserts + successfulInserts)
            .append(CRLF);

    sb.append("# Check Payments = ").append(importStatus.getNumberGoodCheckPayments()
            + importStatus.getNumberBadCheckPayments() + importStatus.getNumberDupeCheckPayments())
            .append(CRLF);
    sb.append("# Good Check Payments = ").append(importStatus.getNumberGoodCheckPayments()).append(CRLF);
    sb.append("# Bad Check Payments = ").append(importStatus.getNumberBadCheckPayments()).append(CRLF);
    sb.append("# Dupe Check Payments = ").append(importStatus.getNumberDupeCheckPayments()).append(CRLF);

    sb.append("# ACH Payments = ").append(importStatus.getNumberGoodAchPayments()
            + importStatus.getNumberBadAchPayments() + importStatus.getNumberDupeAchPayments()).append(CRLF);
    sb.append("# Good ACH Payments = ").append(importStatus.getNumberGoodAchPayments()).append(CRLF);
    sb.append("# Bad ACH Payments = ").append(importStatus.getNumberBadAchPayments()).append(CRLF);
    sb.append("# Dupe ACH Payments = ").append(importStatus.getNumberDupeAchPayments()).append(CRLF);

    sb.append("# Bad Change Records = ").append(importStatus.getNumberBadChangeRecords()).append(CRLF);
    sb.append("# Good Change Records = ").append(importStatus.getNumberGoodChangeRecords()).append(CRLF);
    sb.append("# No Match Change Records = ").append(importStatus.getNumberNoMatchChangeRecords()).append(CRLF);

    sb.append("# Invalid Summary Records = ").append(importStatus.getNumberBadSummaryRecords()).append(CRLF);
    sb.append("# Valid Summary Records = ").append(importStatus.getNumberGoodSummaryRecords()).append(CRLF);

    sb.append("# Successful Transactions = ").append(successfulInserts).append(CRLF);
    sb.append("# Failed Transactions = ").append(failedInserts).append(CRLF);
    sb.append("# Invalid Lines = ").append(importStatus.getNumberBadDataRecords()).append(CRLF);

    sb.append(CRLF);
    sb.append("Total Check Payments = ").append(importStatus.getTotalCheckPayments()).append(CRLF);
    sb.append("Total ACH Payments = ").append(importStatus.getTotalACHPayments()).append(CRLF);
    sb.append("Total of Accepted Payments = ")
            .append(importStatus.getTotalCheckPayments().add(importStatus.getTotalACHPayments())).append(CRLF);

    if (Double.compare(importStatus.getTransactionsTotal().doubleValue(),
            importStatus.getFileSummaryTotal().doubleValue()) == 0) {
        sb.append("Total in File = ").append(importStatus.getFileSummaryTotal()).append(CRLF);
    } else {
        sb.append("Total in File Summary (checksum) = ").append(importStatus.getFileSummaryTotal())
                .append(CRLF);
        sb.append("Total of Processed Transactions = ").append(importStatus.getTransactionsTotal())
                .append(CRLF);

        sb.append(CRLF).append(CRLF);

        sb.append("ERROR: THE BANK FILE CONTAINED ").append(importStatus.getFileSummaryTotal()).append(" BUT ")
                .append(importStatus.getTransactionsTotal())
                .append(" WAS IMPORTED INTO THE SERVICE CREDIT DATABASE. ");

        if (importStatus.getNumberGoodSummaryRecords() > 0) {
            if (Double.compare(importStatus.getTransactionsTotal().doubleValue(),
                    importStatus.getFileSummaryTotal().doubleValue()) > 0) {
                sb.append("THE PROGRAM IMPORTED ")
                        .append(importStatus.getTransactionsTotal()
                                .subtract(importStatus.getFileSummaryTotal()))
                        .append(" MORE THAN THE BANK TOTAL.").append(CRLF);
            } else if (Double.compare(importStatus.getTransactionsTotal().doubleValue(),
                    importStatus.getFileSummaryTotal().doubleValue()) < 0) {
                sb.append("THE PROGRAM IMPORTED ")
                        .append(importStatus.getFileSummaryTotal()
                                .subtract(importStatus.getTransactionsTotal()))
                        .append(" LESS THAN THE BANK TOTAL.").append(CRLF);
            }
        }
    }

    sb.append(CRLF);

    int failedCount = importStatus.getNumberBadDataRecords() + importStatus.getNumberBadSummaryRecords()
            + failedInserts;

    if (failedCount > 0) {
        sb.append("ERROR: ").append(failedCount).append(" RECORDS FAILED PROCESSING.").append(CRLF);
    } else {
        sb.append("No bad records in this batch.").append(CRLF);
    }

    if (failedInserts > 0) {
        sb.append("ERROR: ").append(failedInserts).append(" TRANSACTIONS COULD NOT BE READ INTO THE DATABASE!")
                .append(CRLF);
    }

    if (importStatus.getNumberBadSummaryRecords() > 0) {
        sb.append("ERROR: ").append(importStatus.getNumberBadSummaryRecords())
                .append(" CHECKSUMS COULD NOT BE PROCESSED!").append(CRLF);
    }

    if (importStatus.getNumberDuplicateRecords() > 0) {
        sb.append(importStatus.getNumberDuplicateRecords()).append(" DUPLICATE RECORDS FROM THE BANK")
                .append(CRLF);
    }

    sb.append(successfulInserts).append(" = number of successful calls to the ProcessImportRecord function.")
            .append(CRLF);

    if (failedInserts > 0) {
        sb.append(failedInserts).append(" NUMBER OF FAILED CALLS TO THE PROCESSIMPORTRECORD FUNCTION!")
                .append(CRLF);
    }

    sb.append(CRLF);

    // Log pending payment transactions collated
    sb.append("         Accepted Payments: ").append(
            importStatus.getNumberAcceptedCheckPayments() + importStatus.getNumberAcceptedAchPayments());
    sb.append(CRLF);
    sb.append("       Unresolved Payments: ").append(
            importStatus.getNumberUnresolvedCheckPayments() + importStatus.getNumberUnresolvedAchPayments());
    sb.append(CRLF);
    sb.append("        Suspended Payments: ").append(
            importStatus.getNumberSuspendedCheckPayments() + importStatus.getNumberSuspendedAchPayments());
    sb.append(CRLF);
    sb.append("-----------------------------------");
    sb.append(CRLF);
    sb.append("Pending payments processed: ").append(importStatus.getNumberAcceptedCheckPayments()
            + importStatus.getNumberAcceptedAchPayments() + importStatus.getNumberUnresolvedCheckPayments()
            + importStatus.getNumberUnresolvedAchPayments() + importStatus.getNumberSuspendedCheckPayments()
            + importStatus.getNumberSuspendedAchPayments());
    sb.append(CRLF);

    return sb.toString();
}

From source file:ml.shifu.shifu.core.dtrain.dt.DTWorker.java

@Override
public void load(GuaguaWritableAdapter<LongWritable> currentKey, GuaguaWritableAdapter<Text> currentValue,
        WorkerContext<DTMasterParams, DTWorkerParams> context) {
    this.count += 1;
    if ((this.count) % 5000 == 0) {
        LOG.info("Read {} records.", this.count);
    }/*w ww . j  a v a 2  s  .c o  m*/

    // hashcode for fixed input split in train and validation
    long hashcode = 0;

    short[] inputs = new short[this.inputCount];
    float ideal = 0f;
    float significance = 1f;
    // use guava Splitter to iterate only once
    // use NNConstants.NN_DEFAULT_COLUMN_SEPARATOR to replace getModelConfig().getDataSetDelimiter(), super follows
    // the function in akka mode.
    int index = 0, inputIndex = 0;
    for (String input : this.splitter.split(currentValue.getWritable().toString())) {
        if (index == this.columnConfigList.size()) {
            // do we need to check if not weighted directly set to 1f; if such logic non-weight at first, then
            // weight, how to process???
            if (StringUtils.isBlank(modelConfig.getWeightColumnName())) {
                significance = 1f;
                break;
            }
            // check here to avoid bad performance in failed NumberFormatUtils.getFloat(input, 1f)
            significance = input.length() == 0 ? 1f : NumberFormatUtils.getFloat(input, 1f);
            // if invalid weight, set it to 1f and warning in log
            if (Float.compare(significance, 0f) < 0) {
                LOG.warn(
                        "The {} record in current worker weight {} is less than 0f, it is invalid, set it to 1.",
                        count, significance);
                significance = 1f;
            }
            // the last field is significance, break here
            break;
        } else {
            ColumnConfig columnConfig = this.columnConfigList.get(index);
            if (columnConfig != null && columnConfig.isTarget()) {
                ideal = getFloatValue(input);
            } else {
                if (!isAfterVarSelect) {
                    // no variable selected, good candidate but not meta and not target chose
                    if (!columnConfig.isMeta() && !columnConfig.isTarget()
                            && CommonUtils.isGoodCandidate(columnConfig, this.hasCandidates)) {
                        if (columnConfig.isNumerical()) {
                            float floatValue = getFloatValue(input);
                            // cast is safe as we limit max bin to Short.MAX_VALUE
                            short binIndex = (short) getBinIndex(floatValue, columnConfig.getBinBoundary());
                            inputs[inputIndex] = binIndex;
                            if (!this.inputIndexMap.containsKey(columnConfig.getColumnNum())) {
                                this.inputIndexMap.put(columnConfig.getColumnNum(), inputIndex);
                            }
                        } else if (columnConfig.isCategorical()) {
                            short shortValue = (short) (columnConfig.getBinCategory().size());
                            if (input.length() == 0) {
                                // empty
                                shortValue = (short) (columnConfig.getBinCategory().size());
                            } else {
                                Integer categoricalIndex = this.columnCategoryIndexMapping
                                        .get(columnConfig.getColumnNum()).get(input);
                                if (categoricalIndex == null) {
                                    shortValue = -1; // invalid category, set to -1 for last index
                                } else {
                                    // cast is safe as we limit max bin to Short.MAX_VALUE
                                    shortValue = (short) (categoricalIndex.intValue());
                                }
                                if (shortValue == -1) {
                                    // not found
                                    shortValue = (short) (columnConfig.getBinCategory().size());
                                }
                            }
                            inputs[inputIndex] = shortValue;
                            if (!this.inputIndexMap.containsKey(columnConfig.getColumnNum())) {
                                this.inputIndexMap.put(columnConfig.getColumnNum(), inputIndex);
                            }
                        }
                        hashcode = hashcode * 31 + input.hashCode();
                        inputIndex += 1;
                    }
                } else {
                    // final select some variables but meta and target are not included
                    if (columnConfig != null && !columnConfig.isMeta() && !columnConfig.isTarget()
                            && columnConfig.isFinalSelect()) {
                        if (columnConfig.isNumerical()) {
                            float floatValue = getFloatValue(input);
                            // cast is safe as we limit max bin to Short.MAX_VALUE
                            short binIndex = (short) getBinIndex(floatValue, columnConfig.getBinBoundary());
                            inputs[inputIndex] = binIndex;
                            if (!this.inputIndexMap.containsKey(columnConfig.getColumnNum())) {
                                this.inputIndexMap.put(columnConfig.getColumnNum(), inputIndex);
                            }
                        } else if (columnConfig.isCategorical()) {
                            // cast is safe as we limit max bin to Short.MAX_VALUE
                            short shortValue = (short) (columnConfig.getBinCategory().size());
                            if (input.length() == 0) {
                                // empty
                                shortValue = (short) (columnConfig.getBinCategory().size());
                            } else {
                                Integer categoricalIndex = this.columnCategoryIndexMapping
                                        .get(columnConfig.getColumnNum()).get(input);
                                if (categoricalIndex == null) {
                                    shortValue = -1; // invalid category, set to -1 for last index
                                } else {
                                    // cast is safe as we limit max bin to Short.MAX_VALUE
                                    shortValue = (short) (categoricalIndex.intValue());
                                }
                                if (shortValue == -1) {
                                    // not found
                                    shortValue = (short) (columnConfig.getBinCategory().size());
                                }
                            }
                            inputs[inputIndex] = shortValue;
                            if (!this.inputIndexMap.containsKey(columnConfig.getColumnNum())) {
                                this.inputIndexMap.put(columnConfig.getColumnNum(), inputIndex);
                            }
                        }
                        hashcode = hashcode * 31 + input.hashCode();
                        inputIndex += 1;
                    }
                }
            }
        }
        index += 1;
    }

    // output delimiter in norm can be set by user now and if user set a special one later changed, this exception
    // is helped to quick find such issue.
    if (inputIndex != inputs.length) {
        String delimiter = context.getProps().getProperty(Constants.SHIFU_OUTPUT_DATA_DELIMITER,
                Constants.DEFAULT_DELIMITER);
        throw new RuntimeException("Input length is inconsistent with parsing size. Input original size: "
                + inputs.length + ", parsing size:" + inputIndex + ", delimiter:" + delimiter + ".");
    }

    if (this.isOneVsAll) {
        // if one vs all, update target value according to index of target
        ideal = updateOneVsAllTargetValue(ideal);
    }

    // sample negative only logic here
    if (modelConfig.getTrain().getSampleNegOnly()) {
        if (this.modelConfig.isFixInitialInput()) {
            // if fixInitialInput, sample hashcode in 1-sampleRate range out if negative records
            int startHashCode = (100 / this.modelConfig.getBaggingNum()) * this.trainerId;
            // here BaggingSampleRate means how many data will be used in training and validation, if it is 0.8, we
            // should take 1-0.8 to check endHashCode
            int endHashCode = startHashCode
                    + Double.valueOf((1d - this.modelConfig.getBaggingSampleRate()) * 100).intValue();
            if ((modelConfig.isRegression() || this.isOneVsAll) // regression or onevsall
                    && (int) (ideal + 0.01d) == 0 // negative record
                    && isInRange(hashcode, startHashCode, endHashCode)) {
                return;
            }
        } else {
            // if not fixed initial input, and for regression or onevsall multiple classification (regression also).
            // and if negative record do sampling out
            if ((modelConfig.isRegression() || this.isOneVsAll) // regression or onevsall
                    && (int) (ideal + 0.01d) == 0 // negative record
                    && Double.compare(this.sampelNegOnlyRandom.nextDouble(),
                            this.modelConfig.getBaggingSampleRate()) >= 0) {
                return;
            }
        }
    }

    float output = ideal;
    float predict = ideal;

    // up sampling logic, just add more weights while bagging sampling rate is still not changed
    if (modelConfig.isRegression() && isUpSampleEnabled() && Double.compare(ideal, 1d) == 0) {
        // Double.compare(ideal, 1d) == 0 means positive tags; sample + 1 to avoid sample count to 0
        significance = significance * (this.upSampleRng.sample() + 1);
    }

    Data data = new Data(inputs, predict, output, output, significance);

    boolean isValidation = false;
    if (context.getAttachment() != null && context.getAttachment() instanceof Boolean) {
        isValidation = (Boolean) context.getAttachment();
    }

    // split into validation and training data set according to validation rate
    boolean isInTraining = this.addDataPairToDataSet(hashcode, data, isValidation);

    // do bagging sampling only for training data
    if (isInTraining) {
        data.subsampleWeights = sampleWeights(data.label);
        // for training data, compute real selected training data according to baggingSampleRate
        // if gbdt, only the 1st sampling value is used, if rf, use the 1st to denote some information, no need all
        if (isPositive(data.label)) {
            this.positiveSelectedTrainCount += data.subsampleWeights[0] * 1L;
        } else {
            this.negativeSelectedTrainCount += data.subsampleWeights[0] * 1L;
        }
    } else {
        // for validation data, according bagging sampling logic, we may need to sampling validation data set, while
        // validation data set are only used to compute validation error, not to do real sampling is ok.
    }
}

From source file:org.nd4j.linalg.factory.Nd4j.java

/**Sort (shuffle) the columns of a 2d array according to the value at a specified row.
 * Other than the order of the columns, each column is unmodified. Copy operation: original
 * INDArray is unmodified<br>/*from w  ww .j a va 2s  . co  m*/
 * So if sorting the following on values of row 1 (ascending):<br>
 * [a b c]<br>
 * [1 -1 0]<br>
 * [d e f]<br>
 * Then output is<br>
 * [b c a]<br>
 * [-1 0 1]<br>
 * [e f d]<br>
 * @param in 2d array to sort
 * @param rowIdx The row to sort on
 * @param ascending true if smallest-to-largest; false if largest-to-smallest
 * @return
 */
public static INDArray sortColumns(final INDArray in, final int rowIdx, final boolean ascending) {
    if (in.rank() != 2)
        throw new IllegalArgumentException("Cannot sort columns on non-2d matrix");
    if (rowIdx < 0 || rowIdx >= in.rows())
        throw new IllegalArgumentException("Cannot sort on values in row " + rowIdx + ", nRows=" + in.rows());

    INDArray out = Nd4j.create(in.shape());
    int nCols = in.columns();
    ArrayList<Integer> list = new ArrayList<>(nCols);
    for (int i = 0; i < nCols; i++)
        list.add(i);
    Collections.sort(list, new Comparator<Integer>() {
        @Override
        public int compare(Integer o1, Integer o2) {
            if (ascending)
                return Double.compare(in.getDouble(rowIdx, o1), in.getDouble(rowIdx, o2));
            else
                return -Double.compare(in.getDouble(rowIdx, o1), in.getDouble(rowIdx, o2));
        }
    });
    for (int i = 0; i < nCols; i++) {
        out.putColumn(i, in.getColumn(list.get(i)));
    }
    return out;
}

From source file:ml.shifu.shifu.core.dtrain.dt.DTWorker.java

/**
 * Add to training set or validation set according to validation rate.
 * //  w w  w  .  ja  v  a 2 s . co  m
 * @param hashcode
 *            the hash code of the data
 * @param data
 *            data instance
 * @param isValidation
 *            if it is validation
 * @return if in training, training is true, others are false.
 */
protected boolean addDataPairToDataSet(long hashcode, Data data, boolean isValidation) {
    if (this.isKFoldCV) {
        int k = this.modelConfig.getTrain().getNumKFold();
        if (hashcode % k == this.trainerId) {
            this.validationData.append(data);
            if (isPositive(data.label)) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }

    if (this.isManualValidation) {
        if (isValidation) {
            this.validationData.append(data);
            if (isPositive(data.label)) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    } else {
        if (Double.compare(this.modelConfig.getValidSetRate(), 0d) != 0) {
            int classValue = (int) (data.label + 0.01f);
            Random random = null;
            if (this.isStratifiedSampling) {
                // each class use one random instance
                random = validationRandomMap.get(classValue);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(classValue, random);
                }
            } else {
                // all data use one random instance
                random = validationRandomMap.get(0);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(0, random);
                }
            }

            if (this.modelConfig.isFixInitialInput()) {
                // for fix initial input, if hashcode%100 is in [start-hashcode, end-hashcode), validation,
                // otherwise training. start hashcode in different job is different to make sure bagging jobs have
                // different data. if end-hashcode is over 100, then check if hashcode is in [start-hashcode, 100]
                // or [0, end-hashcode]
                int startHashCode = (100 / this.modelConfig.getBaggingNum()) * this.trainerId;
                int endHashCode = startHashCode
                        + Double.valueOf(this.modelConfig.getValidSetRate() * 100).intValue();
                if (isInRange(hashcode, startHashCode, endHashCode)) {
                    this.validationData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                } else {
                    this.trainingData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                }
            } else {
                // not fixed initial input, if random value >= validRate, training, otherwise validation.
                if (random.nextDouble() >= this.modelConfig.getValidSetRate()) {
                    this.trainingData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                } else {
                    this.validationData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                }
            }
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }
}

From source file:org.esa.nest.gpf.ERSCalibrator.java

/**
 * Compute the UK-PAF ERS-1 antenna pattern (Appendix H) for given look angle.
 *
 * @param lookAngle The look angle (in degree)
 * @return The antenna pattern gain (in linear scale)
 */// ww w.  jav a2  s .  c  o m
private double ec(final double lookAngle) {

    final int numRows = appendixH.length;
    final int numCols = appendixH[0].length;
    if (numRows < 2 || numCols < 2) {
        throw new OperatorException("Not enough antenna pattern data");
    }

    final double boreSightAngle = lookAngle - relativeLookAngle;

    int row1 = 0;
    int row2 = 0;
    if (sceneCentreLatitude < appendixH[1][0]) {
        row1 = 1;
        row2 = 2;
    } else if (sceneCentreLatitude > appendixH[numRows - 1][0]) {
        row1 = numRows - 2;
        row2 = numRows - 1;
    } else {
        for (int i = 2; i < numRows; i++) {
            if (sceneCentreLatitude < appendixH[i][0]) {
                row1 = i - 1;
                row2 = i;
                break;
            }
        }
    }

    int col1 = 0;
    int col2 = 0;
    if (boreSightAngle < appendixH[0][1]) {
        col1 = 1;
        col2 = 2;
    } else if (boreSightAngle > appendixH[numCols - 1][0]) {
        col1 = numCols - 2;
        col2 = numCols - 1;
    } else {
        for (int j = 2; j < numCols; j++) {
            if (boreSightAngle < appendixH[0][j]) {
                col1 = j - 1;
                col2 = j;
                break;
            }
        }
    }

    final double lat1 = appendixH[row1][0];
    final double lat2 = appendixH[row2][0];
    final double delTheta1 = appendixH[0][col1];
    final double delTheta2 = appendixH[0][col2];
    if (Double.compare(lat1, lat2) == 0 || Double.compare(delTheta1, delTheta2) == 0) {
        throw new OperatorException("Incorrect latitude or look angle data");
    }

    final double gain11 = appendixH[row1][col1];
    final double gain12 = appendixH[row1][col2];
    final double gain21 = appendixH[row2][col1];
    final double gain22 = appendixH[row2][col2];
    final double lambda1 = (sceneCentreLatitude - lat1) / (lat2 - lat1);
    final double lambda2 = (boreSightAngle - delTheta1) / (delTheta2 - delTheta1);
    double gain = (1 - lambda2) * ((1 - lambda1) * gain11 + lambda1 * gain21)
            + lambda2 * ((1 - lambda1) * gain12 + lambda1 * gain22);
    gain = Math.pow(10, gain / 10); // dB to linear scale

    return gain;
}

From source file:gedi.util.ArrayUtils.java

/**
 * Returns the sorted permutation of a, i.e. re[0] contains the index of the smallest value in a,
 * re[1] the second smallest and so on.//from ww w . j a  va2s.c om
 * @param <T>
 * @param a
 * @param comp
 * @return
 */
public static int[] sortedPermutation(final double[] a) {
    Integer[] re = new Integer[a.length];
    for (int i = 0; i < re.length; i++)
        re[i] = i;
    Arrays.sort(re, new Comparator<Integer>() {
        @Override
        public int compare(Integer o1, Integer o2) {
            return Double.compare(a[o1], a[o2]);
        }
    });
    return intCollectionToPrimitiveArray(Arrays.asList(re));
}