Example usage for java.util HashSet forEach

List of usage examples for java.util HashSet forEach

Introduction

In this page you can find the example usage for java.util HashSet forEach.

Prototype

default void forEach(Consumer<? super T> action) 

Source Link

Document

Performs the given action for each element of the Iterable until all elements have been processed or the action throws an exception.

Usage

From source file:sh.isaac.convert.rxnorm.standard.RxNormMojo.java

/**
 * Process CUI rows./*from   w  w w  .  ja va 2s .  c om*/
 *
 * @param conceptData the concept data
 * @throws IOException Signals that an I/O exception has occurred.
 * @throws SQLException the SQL exception
 * @throws PropertyVetoException the property veto exception
 */
private void processCUIRows(ArrayList<RXNCONSO> conceptData)
        throws IOException, SQLException, PropertyVetoException {
    final String rxCui = conceptData.get(0).rxcui;
    final HashSet<String> uniqueTTYs = new HashSet<>();
    final HashSet<String> uniqueSABs = new HashSet<>();

    // ensure all the same CUI, gather the TTYs involved
    conceptData.stream().map((row) -> {
        uniqueTTYs.add(row.tty);
        return row;
    }).map((row) -> {
        uniqueSABs.add(row.sab);
        return row;
    }).filter((row) -> (!row.rxcui.equals(rxCui))).forEachOrdered((_item) -> {
        throw new RuntimeException("Oops");
    });

    ComponentReference cuiConcept;

    if ((uniqueSABs.size() == 1) && uniqueSABs.iterator().next().equals(this.sctSab)) {
        // This is a SCT only concept - we don't want to create it.  But we might need to put some relationships or associations here.
        final String sctId = conceptData.get(0).code;

        if (sctId == null) {
            throw new RuntimeException("Unexpected");
        }

        cuiConcept = ComponentReference.fromConcept(this.sctIdToUUID.get(sctId));

        // Add the RxCUI UUID
        this.importUtil.addUUID(cuiConcept.getPrimordialUuid(), createCUIConceptUUID(rxCui));

        // TODO need to look at what else I should be grabbing - the RXCUI for example should be attached.  What else?
    } else {
        // just creating the reference here, with the UUID - because we don't know if it should be active or inactive yet.
        // create the real concept later.
        cuiConcept = ComponentReference.fromConcept(createCUIConceptUUID(rxCui));

        long conceptTime = Integer.MAX_VALUE;

        // Activate the concept if any description is active
        Status conceptState = Status.INACTIVE;

        this.importUtil.addStringAnnotation(cuiConcept, rxCui,
                this.ptUMLSAttributes.getProperty("RXCUI").getUUID(), Status.ACTIVE);

        final ArrayList<ValuePropertyPairWithSAB> cuiDescriptions = new ArrayList<>();
        final HashSet<String> sabs = new HashSet<>();

        for (final RXNCONSO atom : conceptData) {
            if (atom.sab.equals(this.sctSab)) {
                continue;
            }

            // Add attributes from SAT table
            this.descSat.clearParameters();
            this.descSat.setString(1, rxCui);
            this.descSat.setString(2, atom.rxaui);

            final ArrayList<RXNSAT> satData;
            boolean disableDescription;
            Long descriptionTime;

            try (ResultSet rs = this.descSat.executeQuery()) {
                satData = new ArrayList<>();
                disableDescription = false;
                descriptionTime = null;

                while (rs.next()) {
                    final RXNSAT current = new RXNSAT(rs);

                    satData.add(current);

                    if ("RXN_OBSOLETED".equals(current.atn)) {
                        disableDescription = true;
                    }

                    if ("RXN_ACTIVATED".equals(current.atn)) {
                        try {
                            final long time = this.dateParse.parse(current.atv).getTime();

                            descriptionTime = time;

                            if (time < conceptTime) {
                                conceptTime = time;
                            }
                        } catch (final ParseException e) {
                            throw new RuntimeException("Can't parse date?");
                        }
                    }
                }
            }

            final ValuePropertyPairWithSAB desc = new ValuePropertyPairWithSAB(atom.str,
                    this.ptDescriptions.getProperty(atom.tty), atom.sab, satData);

            if (disableDescription) {
                desc.setDisabled(true);
            } else {
                // if any description is active, concept is still active
                conceptState = Status.ACTIVE;
            }

            if (descriptionTime != null) {
                desc.setTime(descriptionTime);
            }

            desc.setUUID(ConverterUUID.createNamespaceUUIDFromStrings(cuiConcept.getPrimordialUuid().toString(),
                    atom.rxaui));

            // used for sorting description to figure out what to use for FULLY_QUALIFIED_NAME
            cuiDescriptions.add(desc);
            desc.addStringAttribute(this.ptUMLSAttributes.getProperty("RXAUI").getUUID(), atom.rxaui);
            desc.addUUIDAttribute(this.ptUMLSAttributes.getProperty("SAB").getUUID(),
                    this.ptSABs.getProperty(atom.sab).getUUID());

            if (StringUtils.isNotBlank(atom.code) && !atom.code.equals("NOCODE")) {
                desc.addStringAttribute(this.ptUMLSAttributes.getProperty("CODE").getUUID(), atom.code);
            }

            if (StringUtils.isNotBlank(atom.saui)) {
                desc.addStringAttribute(this.ptUMLSAttributes.getProperty("SAUI").getUUID(), atom.saui);
            }

            if (StringUtils.isNotBlank(atom.scui)) {
                desc.addStringAttribute(this.ptUMLSAttributes.getProperty("SCUI").getUUID(), atom.scui);
            }

            if (StringUtils.isNotBlank(atom.suppress)) {
                desc.addUUIDAttribute(this.ptUMLSAttributes.getProperty("SUPPRESS").getUUID(),
                        this.suppress.get(atom.suppress));
            }

            if (StringUtils.isNotBlank(atom.cvf)) {
                if (atom.cvf.equals("4096")) {
                    desc.addRefsetMembership(this.cpcRefsetConcept.getPrimordialUuid());
                } else {
                    throw new RuntimeException("Unexpected value in RXNCONSO cvf column '" + atom.cvf + "'");
                }
            }

            if (!atom.lat.equals("ENG")) {
                ConsoleUtil.printErrorln("Non-english lang settings not handled yet!");
            }

            // TODO - at this point, sometime in the future, we make make attributes out of the relationships that occur between the AUIs
            // and store them on the descriptions, since OTF doesn't allow relationships between descriptions
            // TODO am I supposed to be using sabs?
            sabs.add(atom.sab);
        }

        // sanity check on descriptions - make sure we only have one that is of type synonym with the preferred flag
        final ArrayList<String> items = new ArrayList<>();

        cuiDescriptions.stream()
                .filter((vpp) -> ((vpp.getProperty().getPropertySubType() >= BPT_Descriptions.SYNONYM)
                        && (vpp.getProperty().getPropertySubType() <= (BPT_Descriptions.SYNONYM + 20))))
                .forEachOrdered((vpp) -> {
                    items.add(vpp.getProperty().getSourcePropertyNameFQN() + " "
                            + vpp.getProperty().getPropertySubType());
                }); // Numbers come from the rankings down below in makeDescriptionType(...)

        final HashSet<String> ranksLookedAt = new HashSet<>();

        ranksLookedAt.add("204");
        ranksLookedAt.add("206");
        ranksLookedAt.add("210");
        ranksLookedAt.add("208");
        ranksLookedAt.add("212");

        boolean oneNotInList = false;

        if (items.size() > 1) {
            for (final String s : items) {
                if (!ranksLookedAt.contains(s.substring(s.length() - 3, s.length()))) {
                    oneNotInList = true;
                    break;
                }
            }
        }

        if (oneNotInList) {
            ConsoleUtil.printErrorln(
                    "Need to rank multiple synonym types that are each marked preferred, determine if ranking is appropriate!");
            items.forEach((s) -> {
                ConsoleUtil.printErrorln(s);
            });
        }

        final List<SemanticChronology> addedDescriptions = this.importUtil.addDescriptions(cuiConcept,
                cuiDescriptions);

        if (addedDescriptions.size() != cuiDescriptions.size()) {
            throw new RuntimeException("oops");
        }

        final HashSet<String> uniqueUMLSCUI = new HashSet<>();

        for (int i = 0; i < cuiDescriptions.size(); i++) {
            final SemanticChronology desc = addedDescriptions.get(i);
            final ValuePropertyPairWithSAB descPP = cuiDescriptions.get(i);
            final BiFunction<String, String, Boolean> functions = (atn, atv) -> {
                // Pull these up to the concept.
                if ("UMLSCUI".equals(atn)) {
                    uniqueUMLSCUI.add(atv);
                    return true;
                }

                return false;
            };

            // TODO should I be passing in item code here?
            processSAT(ComponentReference.fromChronology(desc), descPP.getSatData(), null, descPP.getSab(),
                    functions);
        }

        // pulling up the UMLS CUIs.
        // uniqueUMLSCUI is populated during processSAT
        uniqueUMLSCUI.forEach((umlsCui) -> {
            final UUID itemUUID = ConverterUUID.createNamespaceUUIDFromString("UMLSCUI" + umlsCui);

            this.importUtil.addStringAnnotation(cuiConcept, itemUUID, umlsCui,
                    this.ptTermAttributes.getProperty("UMLSCUI").getUUID(), Status.ACTIVE);
        });
        ValuePropertyPairWithAttributes.processAttributes(this.importUtil, cuiDescriptions, addedDescriptions);

        // there are no attributes in rxnorm without an AUI.
        //       try
        //       {
        this.importUtil.addRefsetMembership(cuiConcept, this.allCUIRefsetConcept.getPrimordialUuid(),
                Status.ACTIVE, null);

        //       }
        //       catch (RuntimeException e)
        //       {
        //               if (e.toString().contains("duplicate UUID"))
        //               {
        //                       //ok - this can happen due to multiple merges onto an existing SCT concept
        //               }
        //               else
        //               {
        //                       throw e;
        //               }
        //       }
        // add semantic types
        this.semanticTypeStatement.clearParameters();
        this.semanticTypeStatement.setString(1, rxCui);

        final ResultSet rs = this.semanticTypeStatement.executeQuery();

        processSemanticTypes(cuiConcept, rs);

        if (conceptTime < 0) {
            throw new RuntimeException("oops");
        }

        this.importUtil.createConcept(cuiConcept.getPrimordialUuid(), conceptTime, conceptState, null);
    }

    final HashSet<UUID> parents = new HashSet<>();

    this.cuiRelStatementForward.clearParameters();
    this.cuiRelStatementForward.setString(1, rxCui);
    parents.addAll(addRelationships(cuiConcept,
            REL.read(null, this.cuiRelStatementForward.executeQuery(), true, this.allowedCUIsForSABs,
                    this.skippedRelForNotMatchingCUIFilter, true, (string -> reverseRel(string)))));
    this.cuiRelStatementBackward.clearParameters();
    this.cuiRelStatementBackward.setString(1, rxCui);
    parents.addAll(addRelationships(cuiConcept,
            REL.read(null, this.cuiRelStatementBackward.executeQuery(), false, this.allowedCUIsForSABs,
                    this.skippedRelForNotMatchingCUIFilter, true, (string -> reverseRel(string)))));

    // Have to add multiple parents at once, no place to keep all the other details.  Load those as associations for now.
    if (parents.size() > 0) {
        ComponentReference.fromChronology(this.importUtil.addParent(cuiConcept, null,
                parents.toArray(new UUID[parents.size()]), null, null));
    }
}

From source file:structuredPredictionNLG.SFX.java

/**
 * During this method, we calculate the alignments (naive or random), the language models, the available content and word actions, and finally the feature vectors.
 *///from  ww w.  j  a  v  a 2  s  . c om
@Override
public void createTrainingData() {
    //setTrainingData(new ArrayList<>(getTrainingData().subList(0, 50)));
    //setTestingData(new ArrayList<>(getTrainingData()));

    // Calculate alignments between the word of the sentence and the atribute/values
    if (getUseAlignments().equals("naive")) {
        createNaiveAlignments(getTrainingData());
    } else {
        createRandomAlignments(getTrainingData());
    }

    // Create (or load from cache) the content and word language models per predicate
    if (isResetStoredCaches() || !loadLMs()) {
        HashMap<String, ArrayList<ArrayList<String>>> LMWordTrainingPerPred = new HashMap<>();
        HashMap<String, ArrayList<ArrayList<String>>> LMAttrTrainingPerPred = new HashMap<>();
        getTrainingData().stream().map((di) -> {
            if (!LMWordTrainingPerPred.containsKey(di.getMeaningRepresentation().getPredicate())) {
                LMWordTrainingPerPred.put(di.getMeaningRepresentation().getPredicate(),
                        new ArrayList<ArrayList<String>>());
                LMAttrTrainingPerPred.put(di.getMeaningRepresentation().getPredicate(),
                        new ArrayList<ArrayList<String>>());
            }
            return di;
        }).forEachOrdered((di) -> {
            HashSet<ArrayList<Action>> seqs = new HashSet<>();
            seqs.add(di.getDirectReferenceSequence());
            seqs.forEach((seq) -> {
                ArrayList<String> wordSeq = new ArrayList<>();
                ArrayList<String> attrSeq = new ArrayList<>();

                // We add some empty tokens at the start of each sequence
                wordSeq.add("@@");
                wordSeq.add("@@");
                attrSeq.add("@@");
                attrSeq.add("@@");
                for (int i = 0; i < seq.size(); i++) {
                    if (!seq.get(i).getAttribute().equals(Action.TOKEN_END)
                            && !seq.get(i).getWord().equals(Action.TOKEN_END)) {
                        wordSeq.add(seq.get(i).getWord());
                    }
                    if (attrSeq.isEmpty()) {
                        attrSeq.add(seq.get(i).getAttribute());
                    } else if (!attrSeq.get(attrSeq.size() - 1).equals(seq.get(i).getAttribute())) {
                        attrSeq.add(seq.get(i).getAttribute());
                    }
                }
                wordSeq.add(Action.TOKEN_END);
                LMWordTrainingPerPred.get(di.getMeaningRepresentation().getPredicate()).add(wordSeq);
                LMAttrTrainingPerPred.get(di.getMeaningRepresentation().getPredicate()).add(attrSeq);
            });
        });

        setWordLMsPerPredicate(new HashMap<>());
        setContentLMsPerPredicate(new HashMap<>());
        LMWordTrainingPerPred.keySet().stream().map((pred) -> {
            SimpleLM simpleWordLM = new SimpleLM(3);
            simpleWordLM.trainOnStrings(LMWordTrainingPerPred.get(pred));
            getWordLMsPerPredicate().put(pred, simpleWordLM);
            return pred;
        }).forEachOrdered((pred) -> {
            SimpleLM simpleAttrLM = new SimpleLM(3);
            simpleAttrLM.trainOnStrings(LMAttrTrainingPerPred.get(pred));
            getContentLMsPerPredicate().put(pred, simpleAttrLM);
        });
        writeLMs();
    }

    // Go through the sequences in the data and populate the available content and word action dictionaries
    // We populate a distinct word dictionary for each attribute, and populate it with the words of word sequences whose corresponding content sequences contain that attribute
    HashMap<String, HashSet<String>> availableContentActions = new HashMap<>();
    HashMap<String, HashMap<String, HashSet<Action>>> availableWordActions = new HashMap<>();
    getTrainingData().forEach((DI) -> {
        String predicate = DI.getMeaningRepresentation().getPredicate();
        if (!availableContentActions.containsKey(predicate)) {
            availableContentActions.put(predicate, new HashSet<String>());
            availableContentActions.get(predicate).add(Action.TOKEN_END);
        }
        if (!availableWordActions.containsKey(predicate)) {
            availableWordActions.put(predicate, new HashMap<String, HashSet<Action>>());
        }
        ArrayList<Action> realization = DI.getDirectReferenceSequence();
        realization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_END)))
                .forEachOrdered((Action a) -> {
                    String attr;
                    if (a.getAttribute().contains("=")) {
                        attr = a.getAttribute().substring(0, a.getAttribute().indexOf('='));
                    } else {
                        attr = a.getAttribute();
                    }
                    availableContentActions.get(predicate).add(attr);
                    if (!availableWordActions.get(predicate).containsKey(attr)) {
                        availableWordActions.get(predicate).put(attr, new HashSet<Action>());
                        availableWordActions.get(predicate).get(attr).add(new Action(Action.TOKEN_END, attr));
                    }
                    if (!a.getWord().equals(Action.TOKEN_START) && !a.getWord().equals(Action.TOKEN_END)
                            && !a.getWord().matches("([,.?!;:'])")) {
                        if (a.getWord().startsWith(Action.TOKEN_X)) {
                            if (a.getWord().substring(3, a.getWord().lastIndexOf('_')).toLowerCase().trim()
                                    .equals(attr)) {
                                availableWordActions.get(predicate).get(attr)
                                        .add(new Action(a.getWord(), attr));
                            }
                        } else {
                            availableWordActions.get(predicate).get(attr).add(new Action(a.getWord(), attr));
                        }
                    }
                });
    });
    setAvailableContentActions(availableContentActions);
    setAvailableWordActions(availableWordActions);

    //When using random alignments we do not consider the value alignments either
    if (getUseAlignments().equals("random")) {
        setValueAlignments(new HashMap<>());
    }

    // Infer the feature vectors of the training data
    if (isResetStoredCaches() || !loadTrainingData(getTrainingData().size())) {
        System.out.print("Create training data...");
        Object[] results = inferFeatureAndCostVectors();
        System.out.print("almost...");

        @SuppressWarnings("unchecked")
        ConcurrentHashMap<DatasetInstance, HashMap<String, ArrayList<Instance>>> getPredicateContentTrainingDataBefore = (ConcurrentHashMap<DatasetInstance, HashMap<String, ArrayList<Instance>>>) results[0];
        @SuppressWarnings("unchecked")
        ConcurrentHashMap<DatasetInstance, HashMap<String, HashMap<String, ArrayList<Instance>>>> getPredicateWordTrainingDataBefore = (ConcurrentHashMap<DatasetInstance, HashMap<String, HashMap<String, ArrayList<Instance>>>>) results[1];

        // Reorganize the feature/cost vector collections 
        // Initially they are mapped according to DatasetInstance (since it helps with parallel processing) but we prefer them mapped by predicate for training
        setPredicateContentTrainingData(new HashMap<>());
        getTrainingData().forEach((di) -> {
            getPredicateContentTrainingDataBefore.get(di).keySet().stream().map((predicate) -> {
                if (!getPredicateContentTrainingData().containsKey(predicate)) {
                    getPredicateContentTrainingData().put(predicate, new ArrayList<Instance>());
                }
                return predicate;
            }).forEachOrdered((predicate) -> {
                getPredicateContentTrainingData().get(predicate)
                        .addAll(getPredicateContentTrainingDataBefore.get(di).get(predicate));
            });
        });
        setPredicateWordTrainingData(new HashMap<>());
        getTrainingData().forEach((di) -> {
            getPredicateWordTrainingDataBefore.get(di).keySet().stream().map((predicate) -> {
                if (!getPredicateWordTrainingData().containsKey(predicate)) {
                    getPredicateWordTrainingData().put(predicate, new HashMap<String, ArrayList<Instance>>());
                }
                return predicate;
            }).forEachOrdered((predicate) -> {
                getPredicateWordTrainingDataBefore.get(di).get(predicate).keySet().stream().map((attribute) -> {
                    if (!getPredicateWordTrainingData().get(predicate).containsKey(attribute)) {
                        getPredicateWordTrainingData().get(predicate).put(attribute, new ArrayList<Instance>());
                    }
                    return attribute;
                }).forEachOrdered((attribute) -> {
                    getPredicateWordTrainingData().get(predicate).get(attribute)
                            .addAll(getPredicateWordTrainingDataBefore.get(di).get(predicate).get(attribute));
                });
            });
        });
        writeTrainingData(getTrainingData().size());
    }
}

From source file:structuredPredictionNLG.SFX.java

/**
 *
 * @param predicate//ww w. j a  v a 2  s  .c  o m
 * @param currentAttrValue
 * @param costs
 * @param generatedAttributes
 * @param previousGeneratedWords
 * @param nextGeneratedAttributes
 * @param attrValuesAlreadyMentioned
 * @param attrValuesThatFollow
 * @param wasValueMentioned
 * @param availableWordActions
 * @return
 */
@Override
public Instance createWordInstanceWithCosts(String predicate, String currentAttrValue,
        TObjectDoubleHashMap<String> costs, ArrayList<String> generatedAttributes,
        ArrayList<Action> previousGeneratedWords, ArrayList<String> nextGeneratedAttributes,
        HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesThatFollow,
        boolean wasValueMentioned, HashMap<String, HashSet<Action>> availableWordActions) {
    String currentAttr = currentAttrValue;
    String currentValue = "";
    if (currentAttr.contains("=")) {
        currentAttr = currentAttrValue.substring(0, currentAttrValue.indexOf('='));
        currentValue = currentAttrValue.substring(currentAttrValue.indexOf('=') + 1);
    }
    if (currentValue.contains(":")) {
        currentValue = currentAttrValue.substring(currentAttrValue.indexOf(':') + 1);
    }
    if (currentValue.isEmpty()) {
        //System.exit(0);
    }

    TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>();
    HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>();
    for (Action action : availableWordActions.get(currentAttr)) {
        valueSpecificFeatures.put(action.getAction(), new TObjectDoubleHashMap<String>());
    }

    /*if (gWords.get(wIndex).getWord().equals(Action.TOKEN_END)) {
    System.out.println("!!! "+ gWords.subList(0, wIndex + 1));
    }*/
    ArrayList<Action> generatedWords = new ArrayList<>();
    ArrayList<Action> generatedWordsInSameAttrValue = new ArrayList<>();
    ArrayList<String> generatedPhrase = new ArrayList<>();
    for (int i = 0; i < previousGeneratedWords.size(); i++) {
        Action a = previousGeneratedWords.get(i);
        if (!a.getWord().equals(Action.TOKEN_START) && !a.getWord().equals(Action.TOKEN_END)) {
            generatedWords.add(a);
            generatedPhrase.add(a.getWord());
            if (a.getAttribute().equals(currentAttrValue)) {
                generatedWordsInSameAttrValue.add(a);
            }
        }
    }

    //Previous word features
    for (int j = 1; j <= 1; j++) {
        String previousWord = "@@";
        if (generatedWords.size() - j >= 0) {
            previousWord = generatedWords.get(generatedWords.size() - j).getWord().trim();
        }
        generalFeatures.put("feature_word_" + j + "_" + previousWord.toLowerCase(), 1.0);
    }
    String prevWord = "@@";
    if (generatedWords.size() - 1 >= 0) {
        prevWord = generatedWords.get(generatedWords.size() - 1).getWord().trim();
    }
    String prev2Word = "@@";
    if (generatedWords.size() - 2 >= 0) {
        prev2Word = generatedWords.get(generatedWords.size() - 2).getWord().trim();
    }
    String prev3Word = "@@";
    if (generatedWords.size() - 3 >= 0) {
        prev3Word = generatedWords.get(generatedWords.size() - 3).getWord().trim();
    }
    String prev4Word = "@@";
    if (generatedWords.size() - 4 >= 0) {
        prev4Word = generatedWords.get(generatedWords.size() - 4).getWord().trim();
    }
    String prev5Word = "@@";
    if (generatedWords.size() - 5 >= 0) {
        prev5Word = generatedWords.get(generatedWords.size() - 5).getWord().trim();
    }

    String prevBigram = prev2Word + "|" + prevWord;
    String prevTrigram = prev3Word + "|" + prev2Word + "|" + prevWord;
    String prev4gram = prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord;
    String prev5gram = prev5Word + "|" + prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord;

    generalFeatures.put("feature_word_bigram_" + prevBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_word_trigram_" + prevTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_word_4gram_" + prev4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_word_5gram_" + prev5gram.toLowerCase(), 1.0);

    /*String bigramWord54 = prev5Word + "|" + prev4Word;
    String bigramWord43 = prev4Word + "|" + prev3Word;
    String bigramWord32 = prev3Word + "|" + prev2Word;
    generalFeatures.put("feature_word_bigramWord54_" + bigramWord54, 1.0);
    generalFeatures.put("feature_word_bigramWord43_" + bigramWord43, 1.0);
    generalFeatures.put("feature_word_bigramWord32_" + bigramWord32, 1.0);
            
    String bigramWordSkip53 = prev5Word + "|" + prev3Word;
    String bigramWordSkip42 = prev4Word + "|" + prev2Word;
    String bigramWordSkip31 = prev3Word + "|" + prevWord;
    generalFeatures.put("feature_word_bigramWordSkip53_" + bigramWordSkip53, 1.0);
    generalFeatures.put("feature_word_bigramWordSkip42_" + bigramWordSkip42, 1.0);
    generalFeatures.put("feature_word_bigramWordSkip31_" + bigramWordSkip31, 1.0);
            
    String trigramWord543 = prev5Word + "|" + prev4Word + "|" + prev3Word;
    String trigramWord432 = prev4Word + "|" + prev3Word + "|" + prev2Word;
    generalFeatures.put("feature_word_trigramWord543_" + trigramWord543, 1.0);
    generalFeatures.put("feature_word_trigramWord432_" + trigramWord432, 1.0);
            
    String trigramWordSkip542 = prev5Word + "|" + prev4Word + "|" + prev2Word;
    String trigramWordSkip532 = prev5Word + "|" + prev3Word + "|" + prev2Word;
    String trigramWordSkip431 = prev4Word + "|" + prev3Word + "|" + prevWord;
    String trigramWordSkip421 = prev4Word + "|" + prev2Word + "|" + prevWord;
    generalFeatures.put("feature_word_trigramWordSkip542_" + trigramWordSkip542, 1.0);
    generalFeatures.put("feature_word_trigramWordSkip532_" + trigramWordSkip532, 1.0);
    generalFeatures.put("feature_word_trigramWordSkip431_" + trigramWordSkip431, 1.0);
    generalFeatures.put("feature_word_trigramWordSkip421_" + trigramWordSkip421, 1.0);*/
    //Previous words in same as current attrValue features
    /*if (generatedWordsInSameAttrValue.isEmpty()) {
    generalFeatures.put("feature_currentAttrValueWord_isEmpty", 1.0);
    }
            
    for (int j = 1; j <= 1; j++) {
    String previousCurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - j >= 0) {
        previousCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - j).getWord().trim();
    }
    generalFeatures.put("feature_currentAttrValueWord_" + j + "_" + previousCurrentAttrValueWord.toLowerCase(), 1.0);
    }
    String prevCurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - 1 >= 0) {
    prevCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 1).getWord().trim();
    }
    String prev2CurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - 2 >= 0) {
    prev2CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 2).getWord().trim();
    }
    String prev3CurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - 3 >= 0) {
    prev3CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 3).getWord().trim();
    }
    String prev4CurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - 4 >= 0) {
    prev4CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 4).getWord().trim();
    }
    String prev5CurrentAttrValueWord = "@@";
    if (generatedWordsInSameAttrValue.size() - 5 >= 0) {
    prev5CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 5).getWord().trim();
    }
            
    String prevCurrentAttrValueBigram = prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
    String prevCurrentAttrValueTrigram = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
    String prevCurrentAttrValue4gram = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
    String prevCurrentAttrValue5gram = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
            
    generalFeatures.put("feature_currentAttrValueWord_bigram_" + prevCurrentAttrValueBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_currentAttrValueWord_trigram_" + prevCurrentAttrValueTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_currentAttrValueWord_4gram_" + prevCurrentAttrValue4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_currentAttrValueWord_5gram_" + prevCurrentAttrValue5gram.toLowerCase(), 1.0);*/

    /*String bigramCurrentAttrValueWord54 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord;
           String bigramCurrentAttrValueWord43 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord;
           String bigramCurrentAttrValueWord32 = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord;
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord54_" + bigramCurrentAttrValueWord54, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord43_" + bigramCurrentAttrValueWord43, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord32_" + bigramCurrentAttrValueWord32, 1.0);
                   
           String bigramCurrentAttrValueWordSkip53 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord;
           String bigramCurrentAttrValueWordSkip42 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord;
           String bigramCurrentAttrValueWordSkip31 = prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip53_" + bigramCurrentAttrValueWordSkip53, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip42_" + bigramCurrentAttrValueWordSkip42, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip31_" + bigramCurrentAttrValueWordSkip31, 1.0);
                   
           String trigramCurrentAttrValueWord543 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord;
           String trigramCurrentAttrValueWord432 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord;
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord543_" + trigramCurrentAttrValueWord543, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord432_" + trigramCurrentAttrValueWord432, 1.0);
                   
           String trigramCurrentAttrValueWordSkip542 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord;
           String trigramCurrentAttrValueWordSkip532 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord;
           String trigramCurrentAttrValueWordSkip431 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
           String trigramCurrentAttrValueWordSkip421 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord;
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip542_" + trigramCurrentAttrValueWordSkip542, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip532_" + trigramCurrentAttrValueWordSkip532, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip431_" + trigramCurrentAttrValueWordSkip431, 1.0);
           generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip421_" + trigramCurrentAttrValueWordSkip421, 1.0);*/
    //Previous Attr|Word features
    for (int j = 1; j <= 1; j++) {
        String previousAttrWord = "@@";
        if (generatedWords.size() - j >= 0) {
            if (generatedWords.get(generatedWords.size() - j).getAttribute().contains("=")) {
                previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim()
                        .substring(0, generatedWords.get(generatedWords.size() - j).getAttribute().indexOf('='))
                        + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim();
            } else {
                previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|"
                        + generatedWords.get(generatedWords.size() - j).getWord().trim();
            }
        }
        generalFeatures.put("feature_attrWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0);
    }
    String prevAttrWord = "@@";
    if (generatedWords.size() - 1 >= 0) {
        if (generatedWords.get(generatedWords.size() - 1).getAttribute().contains("=")) {
            prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim().substring(0,
                    generatedWords.get(generatedWords.size() - 1).getAttribute().indexOf('=')) + ":"
                    + generatedWords.get(generatedWords.size() - 1).getWord().trim();
        } else {
            prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":"
                    + generatedWords.get(generatedWords.size() - 1).getWord().trim();

        }
    }
    String prev2AttrWord = "@@";
    if (generatedWords.size() - 2 >= 0) {
        if (generatedWords.get(generatedWords.size() - 2).getAttribute().contains("=")) {
            prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim().substring(0,
                    generatedWords.get(generatedWords.size() - 2).getAttribute().indexOf('=')) + ":"
                    + generatedWords.get(generatedWords.size() - 2).getWord().trim();
        } else {
            prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":"
                    + generatedWords.get(generatedWords.size() - 2).getWord().trim();
        }
    }
    String prev3AttrWord = "@@";
    if (generatedWords.size() - 3 >= 0) {
        if (generatedWords.get(generatedWords.size() - 3).getAttribute().contains("=")) {
            prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim().substring(0,
                    generatedWords.get(generatedWords.size() - 3).getAttribute().indexOf('=')) + ":"
                    + generatedWords.get(generatedWords.size() - 3).getWord().trim();
        } else {
            prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":"
                    + generatedWords.get(generatedWords.size() - 3).getWord().trim();
        }
    }
    String prev4AttrWord = "@@";
    if (generatedWords.size() - 4 >= 0) {
        if (generatedWords.get(generatedWords.size() - 4).getAttribute().contains("=")) {
            prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim().substring(0,
                    generatedWords.get(generatedWords.size() - 4).getAttribute().indexOf('=')) + ":"
                    + generatedWords.get(generatedWords.size() - 4).getWord().trim();
        } else {
            prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":"
                    + generatedWords.get(generatedWords.size() - 4).getWord().trim();
        }
    }
    String prev5AttrWord = "@@";
    if (generatedWords.size() - 5 >= 0) {
        if (generatedWords.get(generatedWords.size() - 5).getAttribute().contains("=")) {
            prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim().substring(0,
                    generatedWords.get(generatedWords.size() - 5).getAttribute().indexOf('=')) + ":"
                    + generatedWords.get(generatedWords.size() - 5).getWord().trim();
        } else {
            prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":"
                    + generatedWords.get(generatedWords.size() - 5).getWord().trim();
        }
    }

    String prevAttrWordBigram = prev2AttrWord + "|" + prevAttrWord;
    String prevAttrWordTrigram = prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord;
    String prevAttrWord4gram = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord;
    String prevAttrWord5gram = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord
            + "|" + prevAttrWord;

    generalFeatures.put("feature_attrWord_bigram_" + prevAttrWordBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrWord_trigram_" + prevAttrWordTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrWord_4gram_" + prevAttrWord4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrWord_5gram_" + prevAttrWord5gram.toLowerCase(), 1.0);

    /*String bigramAttrWord54 = prev5AttrWord + "|" + prev4AttrWord;
    String bigramAttrWord43 = prev4AttrWord + "|" + prev3AttrWord;
    String bigramAttrWord32 = prev3AttrWord + "|" + prev2AttrWord;
    generalFeatures.put("feature_attrWord_bigramAttrWord54_" + bigramAttrWord54, 1.0);
    generalFeatures.put("feature_attrWord_bigramAttrWord43_" + bigramAttrWord43, 1.0);
    generalFeatures.put("feature_attrWord_bigramAttrWord32_" + bigramAttrWord32, 1.0);
            
    String bigramAttrWordSkip53 = prev5AttrWord + "|" + prev3AttrWord;
    String bigramAttrWordSkip42 = prev4AttrWord + "|" + prev2AttrWord;
    String bigramAttrWordSkip31 = prev3AttrWord + "|" + prevAttrWord;
    generalFeatures.put("feature_attrWord_bigramAttrWordSkip53_" + bigramAttrWordSkip53, 1.0);
    generalFeatures.put("feature_attrWord_bigramAttrWordSkip42_" + bigramAttrWordSkip42, 1.0);
    generalFeatures.put("feature_attrWord_bigramAttrWordSkip31_" + bigramAttrWordSkip31, 1.0);
            
    String trigramAttrWord543 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord;
    String trigramAttrWord432 = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord;
    generalFeatures.put("feature_attrWord_trigramAttrWord543_" + trigramAttrWord543, 1.0);
    generalFeatures.put("feature_attrWord_trigramAttrWord432_" + trigramAttrWord432, 1.0);
            
    String trigramAttrWordSkip542 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev2AttrWord;
    String trigramAttrWordSkip532 = prev5AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord;
    String trigramAttrWordSkip431 = prev4AttrWord + "|" + prev3AttrWord + "|" + prevAttrWord;
    String trigramAttrWordSkip421 = prev4AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord;
    generalFeatures.put("feature_attrWord_trigramAttrWordSkip542_" + trigramAttrWordSkip542, 1.0);
    generalFeatures.put("feature_attrWord_trigramAttrWordSkip532_" + trigramAttrWordSkip532, 1.0);
    generalFeatures.put("feature_attrWord_trigramAttrWordSkip431_" + trigramAttrWordSkip431, 1.0);
    generalFeatures.put("feature_attrWord_trigramAttrWordSkip421_" + trigramAttrWordSkip421, 1.0);*/
    //Previous AttrValue|Word features
    for (int j = 1; j <= 1; j++) {
        String previousAttrWord = "@@";
        if (generatedWords.size() - j >= 0) {
            previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|"
                    + generatedWords.get(generatedWords.size() - j).getWord().trim();
        }
        generalFeatures.put("feature_attrValueWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0);
    }
    String prevAttrValueWord = "@@";
    if (generatedWords.size() - 1 >= 0) {
        prevAttrValueWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":"
                + generatedWords.get(generatedWords.size() - 1).getWord().trim();
    }
    String prev2AttrValueWord = "@@";
    if (generatedWords.size() - 2 >= 0) {
        prev2AttrValueWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":"
                + generatedWords.get(generatedWords.size() - 2).getWord().trim();
    }
    String prev3AttrValueWord = "@@";
    if (generatedWords.size() - 3 >= 0) {
        prev3AttrValueWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":"
                + generatedWords.get(generatedWords.size() - 3).getWord().trim();
    }
    String prev4AttrValueWord = "@@";
    if (generatedWords.size() - 4 >= 0) {
        prev4AttrValueWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":"
                + generatedWords.get(generatedWords.size() - 4).getWord().trim();
    }
    String prev5AttrValueWord = "@@";
    if (generatedWords.size() - 5 >= 0) {
        prev5AttrValueWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":"
                + generatedWords.get(generatedWords.size() - 5).getWord().trim();
    }

    String prevAttrValueWordBigram = prev2AttrValueWord + "|" + prevAttrValueWord;
    String prevAttrValueWordTrigram = prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord;
    String prevAttrValueWord4gram = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord
            + "|" + prevAttrValueWord;
    String prevAttrValueWord5gram = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord
            + "|" + prev2AttrValueWord + "|" + prevAttrValueWord;

    generalFeatures.put("feature_attrValueWord_bigram_" + prevAttrValueWordBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValueWord_trigram_" + prevAttrValueWordTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValueWord_4gram_" + prevAttrValueWord4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValueWord_5gram_" + prevAttrValueWord5gram.toLowerCase(), 1.0);

    /*String bigramAttrValueWord54 = prev5AttrValueWord + "|" + prev4AttrValueWord;
    String bigramAttrValueWord43 = prev4AttrValueWord + "|" + prev3AttrValueWord;
    String bigramAttrValueWord32 = prev3AttrValueWord + "|" + prev2AttrValueWord;
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWord54_" + bigramAttrValueWord54, 1.0);
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWord43_" + bigramAttrValueWord43, 1.0);
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWord32_" + bigramAttrValueWord32, 1.0);
            
    String bigramAttrValueWordSkip53 = prev5AttrValueWord + "|" + prev3AttrValueWord;
    String bigramAttrValueWordSkip42 = prev4AttrValueWord + "|" + prev2AttrValueWord;
    String bigramAttrValueWordSkip31 = prev3AttrValueWord + "|" + prevAttrValueWord;
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip53_" + bigramAttrValueWordSkip53, 1.0);
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip42_" + bigramAttrValueWordSkip42, 1.0);
    generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip31_" + bigramAttrValueWordSkip31, 1.0);
            
    String trigramAttrValueWord543 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord;
    String trigramAttrValueWord432 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord;
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWord543_" + trigramAttrValueWord543, 1.0);
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWord432_" + trigramAttrValueWord432, 1.0);
            
    String trigramAttrValueWordSkip542 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev2AttrValueWord;
    String trigramAttrValueWordSkip532 = prev5AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord;
    String trigramAttrValueWordSkip431 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prevAttrValueWord;
    String trigramAttrValueWordSkip421 = prev4AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord;
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip542_" + trigramAttrValueWordSkip542, 1.0);
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip532_" + trigramAttrValueWordSkip532, 1.0);
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip431_" + trigramAttrValueWordSkip431, 1.0);
    generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip421_" + trigramAttrValueWordSkip421, 1.0);*/
    //Previous attrValue features
    int attributeSize = generatedAttributes.size();
    for (int j = 1; j <= 1; j++) {
        String previousAttrValue = "@@";
        if (attributeSize - j >= 0) {
            previousAttrValue = generatedAttributes.get(attributeSize - j).trim();
        }
        generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0);
    }
    String prevAttrValue = "@@";
    if (attributeSize - 1 >= 0) {
        prevAttrValue = generatedAttributes.get(attributeSize - 1).trim();
    }
    String prev2AttrValue = "@@";
    if (attributeSize - 2 >= 0) {
        prev2AttrValue = generatedAttributes.get(attributeSize - 2).trim();
    }
    String prev3AttrValue = "@@";
    if (attributeSize - 3 >= 0) {
        prev3AttrValue = generatedAttributes.get(attributeSize - 3).trim();
    }
    String prev4AttrValue = "@@";
    if (attributeSize - 4 >= 0) {
        prev4AttrValue = generatedAttributes.get(attributeSize - 4).trim();
    }
    String prev5AttrValue = "@@";
    if (attributeSize - 5 >= 0) {
        prev5AttrValue = generatedAttributes.get(attributeSize - 5).trim();
    }

    String prevAttrBigramValue = prev2AttrValue + "|" + prevAttrValue;
    String prevAttrTrigramValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue;
    String prevAttr4gramValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|"
            + prevAttrValue;
    String prevAttr5gramValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|"
            + prev2AttrValue + "|" + prevAttrValue;

    generalFeatures.put("feature_attrValue_bigram_" + prevAttrBigramValue.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValue_trigram_" + prevAttrTrigramValue.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValue_4gram_" + prevAttr4gramValue.toLowerCase(), 1.0);
    generalFeatures.put("feature_attrValue_5gram_" + prevAttr5gramValue.toLowerCase(), 1.0);

    /*String bigramAttrValue54 = prev5AttrValue + "|" + prev4AttrValue;
    String bigramAttrValue43 = prev4AttrValue + "|" + prev3AttrValue;
    String bigramAttrValue32 = prev3AttrValue + "|" + prev2AttrValue;
    generalFeatures.put("feature_attrValue_bigramAttrValue54_" + bigramAttrValue54, 1.0);
    generalFeatures.put("feature_attrValue_bigramAttrValue43_" + bigramAttrValue43, 1.0);
    generalFeatures.put("feature_attrValue_bigramAttrValue32_" + bigramAttrValue32, 1.0);
            
    String bigramAttrValueSkip53 = prev5AttrValue + "|" + prev3AttrValue;
    String bigramAttrValueSkip42 = prev4AttrValue + "|" + prev2AttrValue;
    String bigramAttrValueSkip31 = prev3AttrValue + "|" + prevAttrValue;
    generalFeatures.put("feature_attrValue_bigramAttrValueSkip53_" + bigramAttrValueSkip53, 1.0);
    generalFeatures.put("feature_attrValue_bigramAttrValueSkip42_" + bigramAttrValueSkip42, 1.0);
    generalFeatures.put("feature_attrValue_bigramAttrValueSkip31_" + bigramAttrValueSkip31, 1.0);
            
    String trigramAttrValue543 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue;
    String trigramAttrValue432 = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue;
    generalFeatures.put("feature_attrValue_trigramAttrValue543_" + trigramAttrValue543, 1.0);
    generalFeatures.put("feature_attrValue_trigramAttrValue432_" + trigramAttrValue432, 1.0);
            
    String trigramAttrValueSkip542 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev2AttrValue;
    String trigramAttrValueSkip532 = prev5AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue;
    String trigramAttrValueSkip431 = prev4AttrValue + "|" + prev3AttrValue + "|" + prevAttrValue;
    String trigramAttrValueSkip421 = prev4AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue;
    generalFeatures.put("feature_attrValue_trigramAttrValueSkip542_" + trigramAttrValueSkip542, 1.0);
    generalFeatures.put("feature_attrValue_trigramAttrValueSkip532_" + trigramAttrValueSkip532, 1.0);
    generalFeatures.put("feature_attrValue_trigramAttrValueSkip431_" + trigramAttrValueSkip431, 1.0);
    generalFeatures.put("feature_attrValue_trigramAttrValueSkip421_" + trigramAttrValueSkip421, 1.0);*/
    //Previous attr features
    for (int j = 1; j <= 1; j++) {
        String previousAttr = "@@";
        if (attributeSize - j >= 0) {
            if (generatedAttributes.get(attributeSize - j).contains("=")) {
                previousAttr = generatedAttributes.get(attributeSize - j).trim().substring(0,
                        generatedAttributes.get(attributeSize - j).indexOf('='));
            } else {
                previousAttr = generatedAttributes.get(attributeSize - j).trim();
            }
        }
        generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0);
    }
    String prevAttr = "@@";
    if (attributeSize - 1 >= 0) {
        if (generatedAttributes.get(attributeSize - 1).contains("=")) {
            prevAttr = generatedAttributes.get(attributeSize - 1).trim().substring(0,
                    generatedAttributes.get(attributeSize - 1).indexOf('='));
        } else {
            prevAttr = generatedAttributes.get(attributeSize - 1).trim();
        }
    }
    String prev2Attr = "@@";
    if (attributeSize - 2 >= 0) {
        if (generatedAttributes.get(attributeSize - 2).contains("=")) {
            prev2Attr = generatedAttributes.get(attributeSize - 2).trim().substring(0,
                    generatedAttributes.get(attributeSize - 2).indexOf('='));
        } else {
            prev2Attr = generatedAttributes.get(attributeSize - 2).trim();
        }
    }
    String prev3Attr = "@@";
    if (attributeSize - 3 >= 0) {
        if (generatedAttributes.get(attributeSize - 3).contains("=")) {
            prev3Attr = generatedAttributes.get(attributeSize - 3).trim().substring(0,
                    generatedAttributes.get(attributeSize - 3).indexOf('='));
        } else {
            prev3Attr = generatedAttributes.get(attributeSize - 3).trim();
        }
    }
    String prev4Attr = "@@";
    if (attributeSize - 4 >= 0) {
        if (generatedAttributes.get(attributeSize - 4).contains("=")) {
            prev4Attr = generatedAttributes.get(attributeSize - 4).trim().substring(0,
                    generatedAttributes.get(attributeSize - 4).indexOf('='));
        } else {
            prev4Attr = generatedAttributes.get(attributeSize - 4).trim();
        }
    }
    String prev5Attr = "@@";
    if (attributeSize - 5 >= 0) {
        if (generatedAttributes.get(attributeSize - 5).contains("=")) {
            prev5Attr = generatedAttributes.get(attributeSize - 5).trim().substring(0,
                    generatedAttributes.get(attributeSize - 5).indexOf('='));
        } else {
            prev5Attr = generatedAttributes.get(attributeSize - 5).trim();
        }
    }

    String prevAttrBigram = prev2Attr + "|" + prevAttr;
    String prevAttrTrigram = prev3Attr + "|" + prev2Attr + "|" + prevAttr;
    String prevAttr4gram = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr;
    String prevAttr5gram = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr;

    generalFeatures.put("feature_attr_bigram_" + prevAttrBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attr_trigram_" + prevAttrTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attr_4gram_" + prevAttr4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_attr_5gram_" + prevAttr5gram.toLowerCase(), 1.0);

    /*String bigramAttr54 = prev5Attr + "|" + prev4Attr;
    String bigramAttr43 = prev4Attr + "|" + prev3Attr;
    String bigramAttr32 = prev3Attr + "|" + prev2Attr;
    generalFeatures.put("feature_attr_bigramAttr54_" + bigramAttr54, 1.0);
    generalFeatures.put("feature_attr_bigramAttr43_" + bigramAttr43, 1.0);
    generalFeatures.put("feature_attr_bigramAttr32_" + bigramAttr32, 1.0);
            
    String bigramAttrSkip53 = prev5Attr + "|" + prev3Attr;
    String bigramAttrSkip42 = prev4Attr + "|" + prev2Attr;
    String bigramAttrSkip31 = prev3Attr + "|" + prevAttr;
    generalFeatures.put("feature_attr_bigramAttrSkip53_" + bigramAttrSkip53, 1.0);
    generalFeatures.put("feature_attr_bigramAttrSkip42_" + bigramAttrSkip42, 1.0);
    generalFeatures.put("feature_attr_bigramAttrSkip31_" + bigramAttrSkip31, 1.0);
            
    String trigramAttr543 = prev5Attr + "|" + prev4Attr + "|" + prev3Attr;
    String trigramAttr432 = prev4Attr + "|" + prev3Attr + "|" + prev2Attr;
    generalFeatures.put("feature_attr_trigramAttr543_" + trigramAttr543, 1.0);
    generalFeatures.put("feature_attr_trigramAttr432_" + trigramAttr432, 1.0);
            
    String trigramAttrSkip542 = prev5Attr + "|" + prev4Attr + "|" + prev2Attr;
    String trigramAttrSkip532 = prev5Attr + "|" + prev3Attr + "|" + prev2Attr;
    String trigramAttrSkip431 = prev4Attr + "|" + prev3Attr + "|" + prevAttr;
    String trigramAttrSkip421 = prev4Attr + "|" + prev2Attr + "|" + prevAttr;
    generalFeatures.put("feature_attr_trigramAttrSkip542_" + trigramAttrSkip542, 1.0);
    generalFeatures.put("feature_attr_trigramAttrSkip532_" + trigramAttrSkip532, 1.0);
    generalFeatures.put("feature_attr_trigramAttrSkip431_" + trigramAttrSkip431, 1.0);
    generalFeatures.put("feature_attr_trigramAttrSkip421_" + trigramAttrSkip421, 1.0);*/
    //Next attr features
    for (int j = 0; j < 1; j++) {
        String nextAttr = "@@";
        if (j < nextGeneratedAttributes.size()) {
            if (nextGeneratedAttributes.get(j).contains("=")) {
                nextAttr = nextGeneratedAttributes.get(j).trim().substring(0,
                        nextGeneratedAttributes.get(j).indexOf('='));
            } else {
                nextAttr = nextGeneratedAttributes.get(j).trim();
            }
        }
        generalFeatures.put("feature_nextAttr_" + j + "_" + nextAttr, 1.0);
    }
    String nextAttr = "@@";
    if (0 < nextGeneratedAttributes.size()) {
        if (nextGeneratedAttributes.get(0).contains("=")) {
            nextAttr = nextGeneratedAttributes.get(0).trim().substring(0,
                    nextGeneratedAttributes.get(0).indexOf('='));
        } else {
            nextAttr = nextGeneratedAttributes.get(0).trim();
        }
    }
    String next2Attr = "@@";
    if (1 < nextGeneratedAttributes.size()) {
        if (nextGeneratedAttributes.get(1).contains("=")) {
            next2Attr = nextGeneratedAttributes.get(1).trim().substring(0,
                    nextGeneratedAttributes.get(1).indexOf('='));
        } else {
            next2Attr = nextGeneratedAttributes.get(1).trim();
        }
    }
    String next3Attr = "@@";
    if (2 < nextGeneratedAttributes.size()) {
        if (nextGeneratedAttributes.get(2).contains("=")) {
            next3Attr = nextGeneratedAttributes.get(2).trim().substring(0,
                    nextGeneratedAttributes.get(2).indexOf('='));
        } else {
            next3Attr = nextGeneratedAttributes.get(2).trim();
        }
    }
    String next4Attr = "@@";
    if (3 < nextGeneratedAttributes.size()) {
        if (nextGeneratedAttributes.get(3).contains("=")) {
            next4Attr = nextGeneratedAttributes.get(3).trim().substring(0,
                    nextGeneratedAttributes.get(3).indexOf('='));
        } else {
            next4Attr = nextGeneratedAttributes.get(3).trim();
        }
    }
    String next5Attr = "@@";
    if (4 < nextGeneratedAttributes.size()) {
        if (nextGeneratedAttributes.get(4).contains("=")) {
            next5Attr = nextGeneratedAttributes.get(4).trim().substring(0,
                    nextGeneratedAttributes.get(4).indexOf('='));
        } else {
            next5Attr = nextGeneratedAttributes.get(4).trim();
        }
    }

    String nextAttrBigram = nextAttr + "|" + next2Attr;
    String nextAttrTrigram = nextAttr + "|" + next2Attr + "|" + next3Attr;
    String nextAttr4gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr;
    String nextAttr5gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr + "|" + next5Attr;

    generalFeatures.put("feature_nextAttr_bigram_" + nextAttrBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttr_trigram_" + nextAttrTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttr_4gram_" + nextAttr4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttr_5gram_" + nextAttr5gram.toLowerCase(), 1.0);

    //Next attrValue features
    for (int j = 0; j < 1; j++) {
        String nextAttrValue = "@@";
        if (j < nextGeneratedAttributes.size()) {
            nextAttrValue = nextGeneratedAttributes.get(j).trim();
        }
        generalFeatures.put("feature_nextAttrValue_" + j + "_" + nextAttrValue, 1.0);
    }
    String nextAttrValue = "@@";
    if (0 < nextGeneratedAttributes.size()) {
        nextAttrValue = nextGeneratedAttributes.get(0).trim();
    }
    String next2AttrValue = "@@";
    if (1 < nextGeneratedAttributes.size()) {
        next2AttrValue = nextGeneratedAttributes.get(1).trim();
    }
    String next3AttrValue = "@@";
    if (2 < nextGeneratedAttributes.size()) {
        next3AttrValue = nextGeneratedAttributes.get(2).trim();
    }
    String next4AttrValue = "@@";
    if (3 < nextGeneratedAttributes.size()) {
        next4AttrValue = nextGeneratedAttributes.get(3).trim();
    }
    String next5AttrValue = "@@";
    if (4 < nextGeneratedAttributes.size()) {
        next5AttrValue = nextGeneratedAttributes.get(4).trim();
    }

    String nextAttrValueBigram = nextAttrValue + "|" + next2AttrValue;
    String nextAttrValueTrigram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue;
    String nextAttrValue4gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|"
            + next4AttrValue;
    String nextAttrValue5gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|"
            + next4AttrValue + "|" + next5AttrValue;

    generalFeatures.put("feature_nextAttrValue_bigram_" + nextAttrValueBigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttrValue_trigram_" + nextAttrValueTrigram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttrValue_4gram_" + nextAttrValue4gram.toLowerCase(), 1.0);
    generalFeatures.put("feature_nextAttrValue_5gram_" + nextAttrValue5gram.toLowerCase(), 1.0);

    //If values have already been generated or not
    generalFeatures.put("feature_valueToBeMentioned_" + currentValue.toLowerCase(), 1.0);
    if (wasValueMentioned) {
        generalFeatures.put("feature_wasValueMentioned_true", 1.0);
    } else {
        //generalFeatures.put("feature_wasValueMentioned_false", 1.0);
    }
    HashSet<String> valuesThatFollow = new HashSet<>();
    attrValuesThatFollow.stream().map((attrValue) -> {
        generalFeatures.put("feature_attrValuesThatFollow_" + attrValue.toLowerCase(), 1.0);
        return attrValue;
    }).forEachOrdered((attrValue) -> {
        if (attrValue.contains("=")) {
            String v = attrValue.substring(attrValue.indexOf('=') + 1);
            if (v.matches("[xX][0-9]+")) {
                String attr = attrValue.substring(0, attrValue.indexOf('='));
                valuesThatFollow.add(Action.TOKEN_X + attr + "_" + v.substring(1));
            } else {
                valuesThatFollow.add(v);
            }
            generalFeatures.put(
                    "feature_attrsThatFollow_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(),
                    1.0);
        } else {
            generalFeatures.put("feature_attrsThatFollow_" + attrValue.toLowerCase(), 1.0);
        }
    });
    if (valuesThatFollow.isEmpty()) {
        generalFeatures.put("feature_noAttrsFollow", 1.0);
    } else {
        generalFeatures.put("feature_noAttrsFollow", 0.0);
    }
    HashSet<String> mentionedValues = new HashSet<>();
    attrValuesAlreadyMentioned.stream().map((attrValue) -> {
        generalFeatures.put("feature_attrValuesAlreadyMentioned_" + attrValue.toLowerCase(), 1.0);
        return attrValue;
    }).forEachOrdered((attrValue) -> {
        if (attrValue.contains("=")) {
            generalFeatures.put("feature_attrsAlreadyMentioned_"
                    + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0);
            String v = attrValue.substring(attrValue.indexOf('=') + 1);
            if (v.matches("[xX][0-9]+")) {
                String attr = attrValue.substring(0, attrValue.indexOf('='));
                mentionedValues.add(Action.TOKEN_X + attr + "_" + v.substring(1));
            } else {
                mentionedValues.add(v);
            }
        } else {
            generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.toLowerCase(), 1.0);
        }
    });

    /*System.out.println("currentAttrValue: " + currentAttrValue);
    System.out.println("5W: " + prev5gram);
    System.out.println("5AW: " + prevAttrWord5gram);
    System.out.println("5A: " + prevAttr5gram);
    System.out.println("VM: " + wasValueMentioned);
    System.out.println("A_TF: " + attrValuesThatFollow);
    System.out.println("==============================");*/
    if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no")
            || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care")
    ) {
        generalFeatures.put("feature_emptyValue", 1.0);
    }

    //Word specific features (and also global features)
    for (Action action : availableWordActions.get(currentAttr)) {
        //Is word same as previous word
        if (prevWord.equals(action.getWord())) {
            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_sameAsPreviousWord", 1.0);
            valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_sameAsPreviousWord",
                    1.0);
        } else {
            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notSameAsPreviousWord", 1.0);
            valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notSameAsPreviousWord",
                    1.0);
        }
        //Has word appeared in the same attrValue before
        generatedWords.forEach((previousAction) -> {
            if (previousAction.getWord().equals(action.getWord())
                    && previousAction.getAttribute().equals(currentAttrValue)) {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appearedInSameAttrValue", 1.0);
                valueSpecificFeatures.get(action.getAction())
                        .put("global_feature_specific_appearedInSameAttrValue", 1.0);
            } else {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppearedInSameAttrValue", 1.0);
                //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppearedInSameAttrValue", 1.0);
            }
        });
        //Has word appeared before
        generatedWords.forEach((previousAction) -> {
            if (previousAction.getWord().equals(action.getWord())) {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appeared", 1.0);
                valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_appeared", 1.0);
            } else {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppeared", 1.0);
                //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppeared", 1.0);
            }
        });
        if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no")
                || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care")
        ) {
            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_emptyValue", 1.0);
            valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_emptyValue", 1.0);
        } else {
            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notEmptyValue", 1.0);
            //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notEmptyValue", 1.0);
        }

        HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action.getAction()).keySet());
        keys.forEach((feature1) -> {
            keys.stream()
                    .filter((feature2) -> (valueSpecificFeatures.get(action.getAction()).get(feature1) == 1.0
                            && valueSpecificFeatures.get(action.getAction()).get(feature2) == 1.0
                            && feature1.compareTo(feature2) < 0))
                    .forEachOrdered((feature2) -> {
                        valueSpecificFeatures.get(action.getAction()).put(feature1 + "&&" + feature2, 1.0);
                    });
        });

        if (!action.getWord().startsWith(Action.TOKEN_X) && !currentValue.equals("no")
                && !currentValue.equals("yes") && !currentValue.equals("yes or no")
                && !currentValue.equals("none") && !currentValue.equals("empty") //&& !currentValue.equals("dont_care")
        ) {
            for (String value : getValueAlignments().keySet()) {
                for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) {
                    if (alignedStr.get(0).equals(action.getWord())) {
                        if (mentionedValues.contains(value)) {
                            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_alreadyMentioned", 1.0);
                            valueSpecificFeatures.get(action.getAction())
                                    .put("global_feature_specific_beginsValue_alreadyMentioned", 1.0);

                        } else if (currentValue.equals(value)) {
                            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_current", 1.0);
                            valueSpecificFeatures.get(action.getAction())
                                    .put("global_feature_specific_beginsValue_current", 1.0);

                        } else if (valuesThatFollow.contains(value)) {
                            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_thatFollows", 1.0);
                            valueSpecificFeatures.get(action.getAction())
                                    .put("global_feature_specific_beginsValue_thatFollows", 1.0);

                        } else {
                            //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_notInMR", 1.0);
                            valueSpecificFeatures.get(action.getAction())
                                    .put("global_feature_specific_beginsValue_notInMR", 1.0);

                        }
                    } else {
                        for (int i = 1; i < alignedStr.size(); i++) {
                            if (alignedStr.get(i).equals(action.getWord())) {
                                if (endsWith(generatedPhrase,
                                        new ArrayList<String>(alignedStr.subList(0, i + 1)))) {
                                    if (mentionedValues.contains(value)) {
                                        //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_alreadyMentioned", 1.0);
                                        valueSpecificFeatures.get(action.getAction())
                                                .put("global_feature_specific_inValue_alreadyMentioned", 1.0);

                                    } else if (currentValue.equals(value)) {
                                        //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_current", 1.0);
                                        valueSpecificFeatures.get(action.getAction())
                                                .put("global_feature_specific_inValue_current", 1.0);

                                    } else if (valuesThatFollow.contains(value)) {
                                        //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_thatFollows", 1.0);
                                        valueSpecificFeatures.get(action.getAction())
                                                .put("global_feature_specific_inValue_thatFollows", 1.0);

                                    } else {
                                        //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_notInMR", 1.0);
                                        valueSpecificFeatures.get(action.getAction())
                                                .put("global_feature_specific_inValue_notInMR", 1.0);

                                    }
                                } else {
                                    /*if (mentionedValues.contains(value)) {
                                    valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_alreadyMentioned", 1.0);
                                    } else if (currentValue.equals(value)) {
                                    valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_current", 1.0);
                                    } else if (valuesThatFollow.contains(value)) {
                                    valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_thatFollows", 1.0);
                                    } else {
                                    valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_notInMR", 1.0);
                                    }*/
                                    //valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue", 1.0);
                                    valueSpecificFeatures.get(action.getAction())
                                            .put("global_feature_specific_outOfValue", 1.0);
                                }
                            }
                        }
                    }
                }
            }
            if (action.getWord().equals(Action.TOKEN_END)) {
                if (generatedWordsInSameAttrValue.isEmpty()) {
                    //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingEmptyAttr", 1.0);
                    valueSpecificFeatures.get(action.getAction())
                            .put("global_feature_specific_closingEmptyAttr", 1.0);
                }
                if (!wasValueMentioned) {
                    //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWithValueNotMentioned", 1.0);
                    valueSpecificFeatures.get(action.getAction())
                            .put("global_feature_specific_closingAttrWithValueNotMentioned", 1.0);
                }

                // if (!prevCurrentAttrValueWord.equals("@@")) {
                if (!prevWord.equals("@@")) {
                    boolean alignmentIsOpen = false;
                    for (String value : getValueAlignments().keySet()) {
                        for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) {
                            for (int i = 0; i < alignedStr.size() - 1; i++) {
                                if (alignedStr.get(i).equals(prevWord) && endsWith(generatedPhrase,
                                        new ArrayList<>(alignedStr.subList(0, i + 1)))) {
                                    alignmentIsOpen = true;
                                }
                            }
                        }
                    }
                    if (alignmentIsOpen) {
                        // valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWhileValueIsNotConcluded", 1.0);
                        valueSpecificFeatures.get(action.getAction())
                                .put("global_feature_specific_closingAttrWhileValueIsNotConcluded", 1.0);
                    }
                }
            }
        } else if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no")
                || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care")
        ) {
            valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR", 1.0);
        } else {
            String currentValueVariant = "";
            if (currentValue.matches("[xX][0-9]+")) {
                currentValueVariant = Action.TOKEN_X + currentAttr + "_" + currentValue.substring(1);
            }

            if (mentionedValues.contains(action.getWord())) {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_alreadyMentioned", 1.0);
                valueSpecificFeatures.get(action.getAction())
                        .put("global_feature_specific_XValue_alreadyMentioned", 1.0);
            } else if (currentValueVariant.equals(action.getWord()) && !currentValueVariant.isEmpty()) {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_current", 1.0);
                valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_current",
                        1.0);

            } else if (valuesThatFollow.contains(action.getWord())) {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_thatFollows", 1.0);
                valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_thatFollows",
                        1.0);
            } else {
                //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_notInMR", 1.0);
                valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR",
                        1.0);
            }
        }
        /*for (int i : nGrams.keySet()) {
        for (String nGram : nGrams.get(i)) {
        if (i == 2) {
        if (nGram.startsWith(prevWord + "|")
        && nGram.endsWith("|" + action.getAction())) {
        valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousWord", 1.0);
        }
        } else if (i == 3) {
        if (nGram.startsWith(prevBigram + "|")
        && nGram.endsWith("|" + action.getAction())) {
        valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousBigram", 1.0);
        }
        } else if (i == 4) {
        if (nGram.startsWith(prevTrigram + "|")
        && nGram.endsWith("|" + action.getAction())) {
        valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousTrigram", 1.0);
        }
        } else if (i == 5) {
        if (nGram.startsWith(prev4gram + "|")
        && nGram.endsWith("|" + action.getAction())) {
        valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious4gram", 1.0);
        }
        } else if (i == 6) {
        if (nGram.startsWith(prev5gram + "|")
        && nGram.endsWith("|" + action.getAction())) {
        valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious5gram", 1.0);
        }
        }
        }
        }*/

        //valueSpecificFeatures.get(action.getAction()).put("global_feature_abstractMR_" + mr.getAbstractMR(), 1.0);
        valueSpecificFeatures.get(action.getAction())
                .put("global_feature_currentValue_" + currentValue.toLowerCase(), 1.0);

        ArrayList<String> fullGramLM = new ArrayList<>();
        for (int i = 0; i < generatedWords.size(); i++) {
            fullGramLM.add(generatedWords.get(i).getWord());
        }

        ArrayList<String> prev5wordGramLM = new ArrayList<>();
        int j = 0;
        for (int i = generatedWords.size() - 1; (i >= 0 && j < 5); i--) {
            prev5wordGramLM.add(0, generatedWords.get(i).getWord());
            j++;
        }
        prev5wordGramLM.add(action.getWord());
        while (prev5wordGramLM.size() < 4) {
            prev5wordGramLM.add(0, "@@");
        }

        double afterLMScorePerPred5Gram = getWordLMsPerPredicate().get(predicate)
                .getProbability(prev5wordGramLM);
        valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_5gram_score",
                afterLMScorePerPred5Gram);

        double afterLMScorePerPred = getWordLMsPerPredicate().get(predicate).getProbability(fullGramLM);
        valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_score",
                afterLMScorePerPred);
    }

    /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet());
    for (String feature1 : keys) {
    if (generalFeatures.get(feature1) == 1.0) {
        generalFeatures.put("global_feature_attr_" + currentValue.toLowerCase() + "&&" + feature1, 1.0);
    }
    }*/
    //generalFeatures.put("feature_abstractMR_" + mr.getAbstractMR(), 1.0);

    /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet());
    for (String feature1 : keys) {
    for (String feature2 : keys) {
    if (generalFeatures.get(feature1) == 1.0
    && generalFeatures.get(feature2) == 1.0
    && feature1.compareTo(feature2) < 0) {
    generalFeatures.put(feature1 + "&&" + feature2, 1.0);
    }
    }
    }*/
    return new Instance(generalFeatures, valueSpecificFeatures, costs);
}

From source file:structuredPredictionNLG.SFX.java

/**
 * Populates the predicate, attribute, attribute/value pair, and value alignment collections
 * @param dataFile The dataset file.//from  ww  w  .  j ava 2  s.c  o m
 */
public void createLists(File dataFile) {
    try {
        // Initialize the collections
        setPredicates(new ArrayList<>());
        setAttributes(new HashMap<>());
        setAttributeValuePairs(new HashMap<>());
        setValueAlignments(new HashMap<>());

        // Obtain the dataset portion of the file
        String dataPart = new String();
        boolean begin = false;
        try (BufferedReader br = new BufferedReader(new FileReader(dataFile))) {
            String s;
            while ((s = br.readLine()) != null) {
                if (s.startsWith("[")) {
                    begin = true;
                }
                if (begin) {
                    dataPart += s;
                }
            }
        } catch (FileNotFoundException ex) {
            Logger.getLogger(Bagel.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(Bagel.class.getName()).log(Level.SEVERE, null, ex);
        }

        // Parse the dataset with JSON
        JSONArray overArray = new JSONArray(dataPart);
        for (int o = 0; o < overArray.length(); o++) {
            // "dial" notes each seperate dialog
            JSONArray arr = overArray.getJSONObject(o).getJSONArray("dial");
            for (int i = 0; i < arr.length(); i++) {
                String MRstr;
                String ref;
                // "dact" notes every meaning representation
                MRstr = arr.getJSONObject(i).getJSONObject("S").getString("dact");
                // "ref" notes every corresponding reference
                ref = arr.getJSONObject(i).getJSONObject("S").getString("ref").replaceAll("-s", "s");

                //We split some composite words (based on Wen et al's (2016) code)
                ref = (" " + ref + " ").replaceAll(" it's ", " it is ").replaceAll(" don't ", " do not ")
                        .replaceAll(" doesn't ", " does not ").replaceAll(" didn't ", " did not ")
                        .replaceAll(" you'd ", " you would ").replaceAll(" you're ", " you are ")
                        .replaceAll(" you'll ", " you will ").replaceAll(" i'm ", " i am ")
                        .replaceAll(" they're ", " they are ").replaceAll(" that's ", " that is ")
                        .replaceAll(" what's ", " what is ").replaceAll(" couldn't ", " could not ")
                        .replaceAll(" i've ", " i have ").replaceAll(" we've ", " we have ")
                        .replaceAll(" can't ", " cannot ").replaceAll(" i'd ", " i would ")
                        .replaceAll(" i'd ", " i would ").replaceAll(" aren't ", " are not ")
                        .replaceAll(" isn't ", " is not ").replaceAll(" wasn't ", " was not ")
                        .replaceAll(" weren't ", " were not ").replaceAll(" won't ", " will not ")
                        .replaceAll(" there's ", " there is ").replaceAll(" there're ", " there are ")
                        .replaceAll(" \\. \\. ", " \\. ").replaceAll(" restaurants ", " restaurant -s ")
                        .replaceAll(" hotels ", " hotel -s ").replaceAll(" laptops ", " laptop -s ")
                        .replaceAll(" cheaper ", " cheap -er ").replaceAll(" dinners ", " dinner -s ")
                        .replaceAll(" lunches ", " lunch -s ").replaceAll(" breakfasts ", " breakfast -s ")
                        .replaceAll(" expensively ", " expensive -ly ")
                        .replaceAll(" moderately ", " moderate -ly ").replaceAll(" cheaply ", " cheap -ly ")
                        .replaceAll(" prices ", " price -s ").replaceAll(" places ", " place -s ")
                        .replaceAll(" venues ", " venue -s ").replaceAll(" ranges ", " range -s ")
                        .replaceAll(" meals ", " meal -s ").replaceAll(" locations ", " location -s ")
                        .replaceAll(" areas ", " area -s ").replaceAll(" policies ", " policy -s ")
                        .replaceAll(" children ", " child -s ").replaceAll(" kids ", " kid -s ")
                        .replaceAll(" kidfriendly ", " kid friendly ").replaceAll(" cards ", " card -s ")
                        .replaceAll(" st ", " street ").replaceAll(" ave ", " avenue ")
                        .replaceAll(" upmarket ", " expensive ").replaceAll(" inpricey ", " cheap ")
                        .replaceAll(" inches ", " inch -s ").replaceAll(" uses ", " use -s ")
                        .replaceAll(" dimensions ", " dimension -s ")
                        .replaceAll(" driverange ", " drive range ").replaceAll(" includes ", " include -s ")
                        .replaceAll(" computers ", " computer -s ").replaceAll(" machines ", " machine -s ")
                        .replaceAll(" ecorating ", " eco rating ").replaceAll(" families ", " family -s ")
                        .replaceAll(" ratings ", " rating -s ").replaceAll(" constraints ", " constraint -s ")
                        .replaceAll(" pricerange ", " price range ")
                        .replaceAll(" batteryrating ", " battery rating ")
                        .replaceAll(" requirements ", " requirement -s ").replaceAll(" drives ", " drive -s ")
                        .replaceAll(" specifications ", " specification -s ")
                        .replaceAll(" weightrange ", " weight range ").replaceAll(" harddrive ", " hard drive ")
                        .replaceAll(" batterylife ", " battery life ")
                        .replaceAll(" businesses ", " business -s ").replaceAll(" hours ", " hour -s ")
                        .replaceAll(" accessories ", " accessory -s ").replaceAll(" ports ", " port -s ")
                        .replaceAll(" televisions ", " television -s ")
                        .replaceAll(" restrictions ", " restriction -s ")
                        .replaceAll(" extremely ", " extreme -ly ").replaceAll(" actually ", " actual -ly ")
                        .replaceAll(" typically ", " typical -ly ").replaceAll(" drivers ", " driver -s ")
                        .replaceAll(" teh ", " the ").replaceAll(" definitely ", " definite -ly ")
                        .replaceAll(" factors ", " factor -s ").replaceAll(" truly ", " true -ly ")
                        .replaceAll(" mostly ", " most -ly ").replaceAll(" nicely ", " nice -ly ")
                        .replaceAll(" surely ", " sure -ly ").replaceAll(" certainly ", " certain -ly ")
                        .replaceAll(" totally ", " total -ly ").replaceAll(" \\# ", " number ")
                        .replaceAll(" \\& ", " and ").replaceAll(" avenue ", " ave ").replaceAll(" -s ", " s ")
                        .trim();

                // If the MR concerns one of the following predicates, and a ref is available
                if ((MRstr.startsWith("inform(") || MRstr.startsWith("inform_only")
                        || MRstr.startsWith("inform_no_match(") || MRstr.startsWith("?confirm(")
                        || MRstr.startsWith("?select(") || MRstr.startsWith("?request(")
                        || MRstr.startsWith("?reqmore(") || MRstr.startsWith("goodbye(")) && !ref.isEmpty()) {
                    // Obtain the predicate
                    String predicate = MRstr.substring(0, MRstr.indexOf('('));
                    if (!getPredicates().contains(predicate) && predicate != null) {
                        getPredicates().add(predicate);

                        if (!getAttributes().containsKey(predicate)) {
                            getAttributes().put(predicate, new HashSet<String>());
                        }
                        if (!getDatasetInstances().containsKey(predicate)) {
                            getDatasetInstances().put(predicate, new ArrayList<DatasetInstance>());
                        }
                    }

                    // Obtain the attributes
                    String attributesStr = MRstr.substring(MRstr.indexOf('(') + 1, MRstr.length() - 1);
                    HashMap<String, HashSet<String>> attributeValues = new HashMap<>();
                    // Track the indexes used for variables identifiers (seperately for each attribute)
                    HashMap<String, Integer> attrXIndeces = new HashMap<>();
                    if (!attributesStr.isEmpty()) {
                        // Parse the attributes and their values
                        String[] args = attributesStr.split(";");
                        for (String arg : args) {
                            String attr;
                            String value = "";
                            // If the attribute has corresponding values
                            if (arg.contains("=")) {
                                String[] subAttr = arg.split("=");
                                value = subAttr[1].toLowerCase();
                                attr = subAttr[0].toLowerCase().replaceAll("_", "");

                                if (value.startsWith("\'")) {
                                    value = value.substring(1, value.length() - 1);
                                }
                                // Normalize some closed set values
                                if (value.equals("true")) {
                                    value = "yes";
                                }
                                if (value.equals("false")) {
                                    value = "no";
                                }
                                if (value.equals("dontcare")) {
                                    value = "dont_care";
                                }
                                if ((" " + value + " ").contains(" avenue ")) {
                                    value = (" " + value + " ").replace(" avenue ", " ave ").trim();
                                }
                                // Treat these values as seperate attributes since they are expressed quite differently
                                if (value.equals("no") || value.equals("yes") || value.equals("yes or no")
                                        || value.equals("none") || value.equals("empty")) {
                                    attr += "_" + value.replaceAll(" ", "_");
                                    value = attr;
                                }
                                // Treat "dont_care" instances, as if "dont_care" is the attribute, and the original attribute is the value
                                // We do this because the phrasing is very similar between different "dont_care" realizations
                                if (value.equals("dont_care")) {
                                    String v = value;
                                    value = attr;
                                    attr = v;
                                }
                            } else {
                                attr = arg.replaceAll("_", "");
                            }
                            if (!getAttributes().get(predicate).contains(attr)) {
                                getAttributes().get(predicate).add(attr);
                            }
                            if (!attributeValues.containsKey(attr)) {
                                attributeValues.put(attr, new HashSet<String>());
                            }
                            // If the attribute has no corresponding value, we encode it by using the attibute identifier as the value
                            if (value.isEmpty()) {
                                value = attr;
                            }

                            // If the value is a variable, we name it as {@X@ + attribute identifier + variable index (for this attribute)}
                            // This occurs when values are already set as variables in the MR, before any delexicalization happens
                            if (value.toLowerCase().startsWith("x")) {
                                int index = 0;
                                if (!attrXIndeces.containsKey(attr)) {
                                    attrXIndeces.put(attr, 1);
                                } else {
                                    index = attrXIndeces.get(attr);
                                    attrXIndeces.put(attr, index + 1);
                                }
                                value = "x" + index;
                            }
                            attributeValues.get(attr).add(value.trim().toLowerCase());
                        }
                    }

                    // Delexicalizing the attribute/value pairs
                    HashMap<String, HashSet<String>> delexicalizedAttributeValues = new HashMap<>();
                    HashMap<String, HashMap<String, Integer>> attrValuePriorities = new HashMap<>();
                    int maximumPriority = 0;
                    /* Delixalization of values needs to happen incrementally with priority given to the values of greater lenth, to avoid overlap of values in the reference
                     * e.g. for the MR: inform{name="inn on castro", near="castro"}, with the reference "inn on castro is a nice restaurant",
                     *      we need to first align and delexicalize the "inn on castro" value, before the "castro" value 
                     *      (in this case because "castro" doesn't appear in the reference, but even if it appeared later the priorities would help align it with the correct one)
                    */
                    // We begin by determining which values may require delexicalization, and which not
                    for (String attr : attributeValues.keySet()) {
                        if (!attr.isEmpty()) {
                            delexicalizedAttributeValues.put(attr, new HashSet<String>());
                            attrValuePriorities.put(attr, new HashMap<String, Integer>());
                            for (String value : attributeValues.get(attr)) {
                                if (!value.equals("none") && !value.equals("empty") && !value.equals("yes")
                                        && !value.equals("yes or no") && !value.equals("no")
                                        && !value.equals(attr)) {
                                    // Initially priorities are given according to value order
                                    attrValuePriorities.get(attr).put(value, maximumPriority);
                                    maximumPriority++;
                                } else {
                                    // No delexicalization is needed here
                                    delexicalizedAttributeValues.get(attr).add(value);
                                }
                            }
                        }
                    }
                    // We shift the priorities of different values, according to their perspective lengths (i.e. longer values have higher priority)
                    boolean change = true;
                    while (change) {
                        change = false;
                        for (String attr1 : attrValuePriorities.keySet()) {
                            for (String value1 : attrValuePriorities.get(attr1).keySet()) {
                                for (String attr2 : attrValuePriorities.keySet()) {
                                    for (String value2 : attrValuePriorities.get(attr2).keySet()) {
                                        if (!value1.equals(value2) && value1.contains(value2)
                                                && attrValuePriorities.get(attr1).get(
                                                        value1) > attrValuePriorities.get(attr2).get(value2)) {
                                            int prio1 = attrValuePriorities.get(attr1).get(value1);
                                            int prio2 = attrValuePriorities.get(attr2).get(value2);
                                            attrValuePriorities.get(attr1).put(value1, prio2);
                                            attrValuePriorities.get(attr2).put(value2, prio1);
                                            change = true;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    // Map between variables and their lexicalized values, required for relexicalization during postprocessing after the sentence generation
                    HashMap<String, String> delexicalizationMap = new HashMap<>();
                    ref = " " + ref + " ";
                    // Delexicalization occurs, in order of priority
                    for (int priority = 0; priority < maximumPriority; priority++) {
                        for (String attr : attrValuePriorities.keySet()) {
                            if (!attrXIndeces.containsKey(attr)) {
                                attrXIndeces.put(attr, 0);
                            }
                            for (String value : attrValuePriorities.get(attr).keySet()) {
                                if (attrValuePriorities.get(attr).get(value) == priority) {
                                    // If the value doesn't appear verbatim in the reference, and the value is not composed of multiple subvalues (i.e. doesn't contain connectives)
                                    if (!ref.contains(" " + value + " ") && !value.contains(" and ")
                                            && !value.contains(" or ")) {
                                        if (value.equals("restaurant") && ref.contains(" place ")) {
                                            ref = ref.replace(" place ", " " + Action.TOKEN_X + attr + "_"
                                                    + attrXIndeces.get(attr) + " ");
                                            ref = ref.replaceAll("  ", " ");
                                            delexicalizedAttributeValues.get(attr)
                                                    .add(Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr));
                                            delexicalizationMap.put(
                                                    Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr),
                                                    "place");
                                            attrXIndeces.put(attr, attrXIndeces.get(attr) + 1);
                                        } else {
                                            delexicalizedAttributeValues.get(attr).add(value);
                                        }
                                        // If the value doesn't appear verbatim in the reference, but the value is composed of multiple sub-values
                                    } else if (!ref.contains(" " + value + " ")
                                            && (value.contains(" and ") || value.contains(" or "))) {
                                        // We first check if the value appears verbatim when we switch "and" with "or" and vice versa
                                        // We do this due to some inconsistencies in the dataset on how conjuctions are treated
                                        String tempValue = value;
                                        if (value.contains(" and ")) {
                                            tempValue = value.replace(" and ", " or ");
                                        } else if (value.contains(" or ")) {
                                            tempValue = value.replace(" or ", " and ");
                                        }

                                        if (ref.contains(" " + tempValue + " ")) {
                                            ref = ref.replace(" " + tempValue + " ", " " + Action.TOKEN_X + attr
                                                    + "_" + attrXIndeces.get(attr) + " ");
                                            ref = ref.replaceAll("  ", " ");
                                            delexicalizedAttributeValues.get(attr)
                                                    .add(Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr));
                                            delexicalizationMap.put(
                                                    Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr),
                                                    value);
                                            attrXIndeces.put(attr, attrXIndeces.get(attr) + 1);
                                        } else {
                                            // We split the conjunction into the seperate values; so far the code supports only 2 sub-values
                                            String[] values = new String[2];
                                            if (value.contains(" and ")) {
                                                values = value.split(" and ");
                                            } else if (value.contains(" or ")) {
                                                values = value.split(" or ");
                                            }
                                            // And check if the conjunction appears verbatim when we switch the position of the sub-values
                                            String newValue1 = values[1] + " and " + values[0];
                                            String newValue2 = values[1] + " or " + values[0];
                                            if (ref.contains(" " + newValue1 + " ")) {
                                                ref = ref.replace(" " + newValue1 + " ", " " + Action.TOKEN_X
                                                        + attr + "_" + attrXIndeces.get(attr) + " ");
                                                ref = ref.replaceAll("  ", " ");
                                                delexicalizedAttributeValues.get(attr).add(
                                                        Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr));
                                                delexicalizationMap.put(
                                                        Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr),
                                                        value);
                                                attrXIndeces.put(attr, attrXIndeces.get(attr) + 1);
                                            } else if (ref.contains(" " + newValue2 + " ")) {
                                                ref = ref.replace(" " + newValue2 + " ", " " + Action.TOKEN_X
                                                        + attr + "_" + attrXIndeces.get(attr) + " ");
                                                ref = ref.replaceAll("  ", " ");
                                                delexicalizedAttributeValues.get(attr).add(
                                                        Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr));
                                                delexicalizationMap.put(
                                                        Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr),
                                                        value);
                                                attrXIndeces.put(attr, attrXIndeces.get(attr) + 1);
                                            }
                                        }
                                        // If the value appears verbatim in the reference, delexicalize it
                                    } else {
                                        ref = ref.replace(" " + value + " ", " " + Action.TOKEN_X + attr + "_"
                                                + attrXIndeces.get(attr) + " ");
                                        ref = ref.replaceAll("  ", " ");
                                        delexicalizedAttributeValues.get(attr)
                                                .add(Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr));
                                        delexicalizationMap.put(
                                                Action.TOKEN_X + attr + "_" + attrXIndeces.get(attr), value);
                                        attrXIndeces.put(attr, attrXIndeces.get(attr) + 1);
                                    }
                                }
                            }
                        }
                    }
                    ref = ref.trim();

                    // We construct the MeaningRepresentation
                    MeaningRepresentation MR = new MeaningRepresentation(predicate,
                            delexicalizedAttributeValues, MRstr, delexicalizationMap);

                    // Sequences of attribute/values pairs and words in the order we observe this in the reference
                    ArrayList<String> observedAttrValueSequence = new ArrayList<>();
                    ArrayList<String> observedWordSequence = new ArrayList<>();

                    // The observed word sequence does not include punctuation
                    String[] words = ref.replaceAll("([,.?!;:'])", " $1").split(" ");

                    // We construct the observed word sequence (and fix some orthographical errors along the way)
                    for (int w = 0; w < words.length; w++) {
                        if (!words[w].trim().isEmpty()) {
                            if (!words[w].trim().isEmpty()
                                    && (observedWordSequence.isEmpty() || !words[w].trim().equals(
                                            observedWordSequence.get(observedWordSequence.size() - 1)))) {
                                if (words[w].trim().equals("s") && (observedWordSequence
                                        .get(observedWordSequence.size() - 1).equals("child"))) {
                                    observedWordSequence.set(observedWordSequence.size() - 1, "children");
                                } else if (words[w].trim().equals("addres")
                                        || words[w].trim().equals("adress")) {
                                    observedWordSequence.add("address");
                                } else if (words[w].trim().equals("mathch")) {
                                    observedWordSequence.add("match");
                                } else if (words[w].trim().equals("prefered")) {
                                    observedWordSequence.add("preferred");
                                } else if (words[w].trim().equals("relevent")) {
                                    observedWordSequence.add("relevant");
                                } else if (words[w].trim().equals("alloed")) {
                                    observedWordSequence.add("allowed");
                                } else if (words[w].trim().equals("avalible")
                                        || words[w].trim().equals("avalable")) {
                                    observedWordSequence.add("available");
                                } else if (words[w].trim().equals("tha") || words[w].trim().equals("te")) {
                                    observedWordSequence.add("the");
                                } else if (words[w].trim().equals("internect")) {
                                    observedWordSequence.add("internet");
                                } else if (words[w].trim().equals("wether")) {
                                    observedWordSequence.add("whether");
                                } else if (words[w].trim().equals("aplogize")) {
                                    observedWordSequence.add("apologize");
                                } else if (words[w].trim().equals("accomodations")) {
                                    observedWordSequence.add("accommodations");
                                } else if (words[w].trim().equals("whould")) {
                                    observedWordSequence.add("would");
                                } else if (words[w].trim().equals("aceepted")) {
                                    observedWordSequence.add("accepted");
                                } else if (words[w].trim().equals("postode")) {
                                    observedWordSequence.add("postcode");
                                } else if (words[w].trim().equals("ive")) {
                                    observedWordSequence.add("i");
                                    observedWordSequence.add("have");
                                } else if (words[w].trim().equals("waht")) {
                                    observedWordSequence.add("what");
                                } else if (words[w].trim().equals("neighborhood")) {
                                    observedWordSequence.add("neighbourhood");
                                } else if (words[w].trim().equals("prefernce")) {
                                    observedWordSequence.add("preference");
                                } else if (words[w].trim().equals("dont")) {
                                    observedWordSequence.add("don't");
                                } else if (words[w].trim().equals("isnt")) {
                                    observedWordSequence.add("isn't");
                                } else if (words[w].trim().equals("intenet")
                                        || words[w].trim().equals("internetn")) {
                                    observedWordSequence.add("internet");
                                } else if (words[w].trim().equals("cannote")) {
                                    observedWordSequence.add("cannot");
                                } else if (words[w].trim().equals("notels")) {
                                    observedWordSequence.add("hotels");
                                } else if (words[w].trim().equals("phne")) {
                                    observedWordSequence.add("phone");
                                } else if (words[w].trim().equals("taht")) {
                                    observedWordSequence.add("that");
                                } else if (words[w].trim().equals("postdocde")) {
                                    observedWordSequence.add("postcode");
                                } else if (words[w].trim().equals("accpects")) {
                                    observedWordSequence.add("accepts");
                                } else if (words[w].trim().equals("doesn") || words[w].trim().equals("doesnt")
                                        || words[w].trim().equals("doesn")) {
                                    observedWordSequence.add("doesn't");
                                } else if (words[w].trim().equals("restaurnats")) {
                                    observedWordSequence.add("restarnauts");
                                } else if (words[w].trim().equals("ther") || words[w].trim().equals("thers")) {
                                    observedWordSequence.add("there");

                                    // The dataset treats the suffixes "s" and "-ly" as separate words
                                    // We combine these suffixes with their preceding words but keep a cache with these changes to revert them before evaluation (we have to do this so that the token-based evaluation metrics are calculated in a consistent manner with Wen et al.'s)
                                } else if (words[w].trim().equals("s")) {
                                    if (observedWordSequence.isEmpty()) {
                                        observedWordSequence.add(words[w].trim().toLowerCase());
                                    } else if (observedWordSequence.get(observedWordSequence.size() - 1)
                                            .startsWith(Action.TOKEN_X)) {
                                        observedWordSequence.add(words[w].trim().toLowerCase());
                                    } else {
                                        getCompositeSuffixesInData().put(
                                                observedWordSequence.get(observedWordSequence.size() - 1) + "s",
                                                observedWordSequence.get(observedWordSequence.size() - 1)
                                                        + " s");
                                        observedWordSequence.set(observedWordSequence.size() - 1,
                                                observedWordSequence.get(observedWordSequence.size() - 1)
                                                        + "s");
                                    }
                                } else if (words[w].trim().equals("-ly")) {
                                    if (observedWordSequence.isEmpty()) {
                                        observedWordSequence.add(words[w].trim().toLowerCase());
                                    } else if (observedWordSequence.get(observedWordSequence.size() - 1)
                                            .startsWith(Action.TOKEN_X)) {
                                        observedWordSequence.add(words[w].trim().toLowerCase());
                                    } else {
                                        getCompositeSuffixesInData().put(
                                                observedWordSequence.get(observedWordSequence.size() - 1)
                                                        + "ly",
                                                observedWordSequence.get(observedWordSequence.size() - 1)
                                                        + " -ly");
                                        observedWordSequence.set(observedWordSequence.size() - 1,
                                                observedWordSequence.get(observedWordSequence.size() - 1)
                                                        + "ly");
                                    }
                                } else {
                                    observedWordSequence.add(words[w].trim().toLowerCase());
                                }
                            }
                        }
                    }

                    //Probably deprecated, need to do some more tests
                    MR.getAttributeValues().keySet().forEach((attr) -> {
                        MR.getAttributeValues().get(attr).stream()
                                .filter((value) -> (attr.equals("name") && value.equals("none")))
                                .forEachOrdered((value) -> {
                                    observedAttrValueSequence.add(0,
                                            attr.toLowerCase() + "=" + value.toLowerCase());
                                });
                    });
                    observedAttrValueSequence.add(Action.TOKEN_END);

                    // We store the maximum observed word sequence length, to use as a limit during generation
                    if (observedWordSequence.size() > getMaxWordSequenceLength()) {
                        setMaxWordSequenceLength(observedWordSequence.size());
                    }

                    // We initialize the alignments between words and attribute/value pairs
                    ArrayList<String> wordToAttrValueAlignment = new ArrayList<>();
                    // And populate them with "unaligned" tokens (i.e. "[]") and punctuation alignments; we do the latter so we know to filter out punctuation when estimating the alignments in later stages
                    observedWordSequence.forEach((word) -> {
                        if (word.trim().matches("[,.?!;:']")) {
                            wordToAttrValueAlignment.add(Action.TOKEN_PUNCT);
                        } else {
                            wordToAttrValueAlignment.add("[]");
                        }
                    });
                    // And using both word sequence and initial alignments, we construct a draft sequence of word actions corresponding to the reference
                    ArrayList<Action> directReferenceSequence = new ArrayList<>();
                    for (int r = 0; r < observedWordSequence.size(); r++) {
                        directReferenceSequence
                                .add(new Action(observedWordSequence.get(r), wordToAttrValueAlignment.get(r)));
                    }
                    // Finally, we construct the DatasetInstance
                    DatasetInstance DI = new DatasetInstance(MR, directReferenceSequence,
                            postProcessRef(MR, directReferenceSequence));
                    // We add the evaluation references of all previously constructed DatasetInstances (that are identical to this one) as available evaluation references 
                    getDatasetInstances().get(predicate).stream()
                            .filter((existingDI) -> (existingDI.getMeaningRepresentation().getAbstractMR()
                                    .equals(DI.getMeaningRepresentation().getAbstractMR())))
                            .map((existingDI) -> {
                                existingDI.getEvaluationReferences().addAll(DI.getEvaluationReferences());
                                return existingDI;
                            }).forEachOrdered((existingDI) -> {
                                // We add the direct reference of this DatasetInstance as an available evaluation reference to all previously constructed DatasetInstance that are identical to this one
                                DI.getEvaluationReferences().addAll(existingDI.getEvaluationReferences());
                            });
                    getDatasetInstances().get(predicate).add(DI);

                    // Calculate the possible alignments between (non-delexicalized) attribute values and reference subphrases
                    // We do this by comparing the values with n-gram subphrases of the reference, using character-level Levenshtein distance
                    // These are used during the estimation of naive alignments, but also for tracking which values have possibly been expressed during generation
                    HashMap<String, HashMap<String, Double>> observedValueAlignments = new HashMap<>();
                    MR.getAttributeValues().keySet().forEach((attr) -> {
                        MR.getAttributeValues().get(attr).stream()
                                .filter((value) -> (!value.equals("name=none")
                                        && !value.startsWith(Action.TOKEN_X)
                                        && !(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+"))))
                                .forEachOrdered((value) -> {
                                    String valueToCompare = value;
                                    if (value.equals("no") || value.equals("yes") || value.equals("yes or no")
                                            || value.equals("none") || value.equals("empty")) {
                                        // If the value is boolean or non-existant, we also compare using the attribute name
                                        valueToCompare = attr;
                                        observedValueAlignments.put(valueToCompare + ":" + value,
                                                new HashMap<String, Double>());
                                    } else {
                                        observedValueAlignments.put(valueToCompare,
                                                new HashMap<String, Double>());
                                    }
                                    //For all n-grams in the referenec
                                    for (int n = 1; n < observedWordSequence.size(); n++) {
                                        //Calculate the similaritie between them and valueToCompare
                                        for (int r = 0; r <= observedWordSequence.size() - n; r++) {
                                            boolean compareAgainstNGram = true;
                                            for (int j = 0; j < n; j++) {
                                                if (observedWordSequence.get(r + j).startsWith(Action.TOKEN_X)
                                                        || wordToAttrValueAlignment.get(r + j)
                                                                .equals(Action.TOKEN_PUNCT)
                                                        || StringNLPUtilities
                                                                .isArticle(observedWordSequence.get(r + j))
                                                        || observedWordSequence.get(r + j)
                                                                .equalsIgnoreCase("and")
                                                        || observedWordSequence.get(r + j)
                                                                .equalsIgnoreCase("or")) {
                                                    // We ignore n-grams that contain variables, punctuation, articles, or conjuctions
                                                    // In other words, we do not allow values to align with such n-grams
                                                    compareAgainstNGram = false;
                                                }
                                            }
                                            if (compareAgainstNGram) {
                                                String align = "";
                                                String compare = "";
                                                String backwardCompare = "";
                                                for (int j = 0; j < n; j++) {
                                                    // The coordinates of the alignment
                                                    align += (r + j) + " ";
                                                    compare += observedWordSequence.get(r + j);
                                                    backwardCompare = observedWordSequence.get(r + j)
                                                            + backwardCompare;
                                                }
                                                align = align.trim();

                                                // Calculate the character-level distance between the value and the nGram (in its original and reversed order)
                                                Double distance = Levenshtein.getSimilarity(
                                                        valueToCompare.toLowerCase(), compare.toLowerCase(),
                                                        true);
                                                Double backwardDistance = Levenshtein.getSimilarity(
                                                        valueToCompare.toLowerCase(),
                                                        backwardCompare.toLowerCase(), true);

                                                // We keep the best distance score; note that the Levenshtein distance is normalized so that greater is better 
                                                if (backwardDistance > distance) {
                                                    distance = backwardDistance;
                                                }
                                                // We ignore all nGrams that are less similar than a threshold
                                                if (distance > 0.3) {
                                                    if (value.equals("no") || value.equals("yes")
                                                            || value.equals("yes or no") || value.equals("none")
                                                            || value.equals("empty")) {
                                                        observedValueAlignments
                                                                .get(valueToCompare + ":" + value)
                                                                .put(align, distance);
                                                    } else {
                                                        observedValueAlignments.get(valueToCompare).put(align,
                                                                distance);
                                                    }
                                                }
                                            }
                                        }
                                    }
                                });
                    });

                    // We filter out any values that haven't been aligned
                    HashSet<String> toRemove = new HashSet<>();
                    for (String value : observedValueAlignments.keySet()) {
                        if (observedValueAlignments.get(value).isEmpty()) {
                            toRemove.add(value);
                        }
                    }
                    for (String value : toRemove) {
                        observedValueAlignments.remove(value);
                    }

                    // We keep the best aligned nGrams; since we do not want the aligned nGrams to be overlapping, we remove any overlapping alignments after we pick each one
                    while (!observedValueAlignments.keySet().isEmpty()) {
                        // Find the best aligned nGram
                        Double max = Double.NEGATIVE_INFINITY;
                        String[] bestAlignment = new String[2];
                        for (String value : observedValueAlignments.keySet()) {
                            for (String alignment : observedValueAlignments.get(value).keySet()) {
                                if (observedValueAlignments.get(value).get(alignment) > max) {
                                    max = observedValueAlignments.get(value).get(alignment);
                                    bestAlignment[0] = value;
                                    bestAlignment[1] = alignment;
                                }
                            }
                        }

                        // Find the subphrase that corresponds to the best aligned nGram, according to the coordinates
                        ArrayList<String> alignedStr = new ArrayList<>();
                        String[] coords = bestAlignment[1].split(" ");
                        if (coords.length == 1) {
                            alignedStr.add(observedWordSequence.get(Integer.parseInt(coords[0].trim())));
                        } else {
                            for (int a = Integer.parseInt(coords[0].trim()); a <= Integer
                                    .parseInt(coords[coords.length - 1].trim()); a++) {
                                alignedStr.add(observedWordSequence.get(a));
                            }
                        }

                        // Store the best aligned nGram
                        if (!getValueAlignments().containsKey(bestAlignment[0])) {
                            getValueAlignments().put(bestAlignment[0],
                                    new HashMap<ArrayList<String>, Double>());
                        }
                        getValueAlignments().get(bestAlignment[0]).put(alignedStr, max);

                        // And remove it from the observed ones for this instance
                        observedValueAlignments.remove(bestAlignment[0]);
                        // And also remove any other aligned nGrams that are overlapping with the best aligned nGram
                        observedValueAlignments.keySet().forEach((value) -> {
                            HashSet<String> alignmentsToBeRemoved = new HashSet<>();
                            observedValueAlignments.get(value).keySet().forEach((alignment) -> {
                                String[] othCoords = alignment.split(" ");
                                if (Integer.parseInt(coords[0].trim()) <= Integer.parseInt(othCoords[0].trim())
                                        && (Integer.parseInt(coords[coords.length - 1].trim()) >= Integer
                                                .parseInt(othCoords[0].trim()))
                                        || (Integer.parseInt(othCoords[0].trim()) <= Integer
                                                .parseInt(coords[0].trim())
                                                && Integer.parseInt(
                                                        othCoords[othCoords.length - 1].trim()) >= Integer
                                                                .parseInt(coords[0].trim()))) {
                                    alignmentsToBeRemoved.add(alignment);
                                }
                            });
                            alignmentsToBeRemoved.forEach((alignment) -> {
                                observedValueAlignments.get(value).remove(alignment);
                            });
                        });
                        // We filter out any values that are no logner aligned (due to overlapping conflicts)
                        toRemove = new HashSet<>();
                        for (String value : observedValueAlignments.keySet()) {
                            if (observedValueAlignments.get(value).isEmpty()) {
                                toRemove.add(value);
                            }
                        }
                        for (String value : toRemove) {
                            observedValueAlignments.remove(value);
                        }
                    }
                    getObservedAttrValueSequences().add(observedAttrValueSequence);
                }
            }
        }
    } catch (JSONException ex) {
    }
}

From source file:structuredPredictionNLG.SFX.java

/**
 *
 * @param trainingData//w w  w.java2  s  .co m
 */
public void createRandomAlignments(ArrayList<DatasetInstance> trainingData) {
    HashMap<String, HashMap<ArrayList<Action>, HashMap<Action, Integer>>> punctPatterns = new HashMap<>();
    getPredicates().forEach((predicate) -> {
        punctPatterns.put(predicate, new HashMap<ArrayList<Action>, HashMap<Action, Integer>>());
    });
    HashMap<DatasetInstance, ArrayList<Action>> punctRealizations = new HashMap<DatasetInstance, ArrayList<Action>>();

    HashMap<ArrayList<Action>, ArrayList<Action>> calculatedRealizationsCache = new HashMap<>();
    trainingData.stream().map((di) -> {
        HashSet<ArrayList<Action>> initRealizations = new HashSet<>();
        if (!calculatedRealizationsCache.containsKey(di.getDirectReferenceSequence())) {
            initRealizations.add(di.getDirectReferenceSequence());
        }
        initRealizations.stream().map((realization) -> {
            HashMap<String, HashSet<String>> values = new HashMap<>();
            di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attr) -> {
                values.put(attr, new HashSet<>(di.getMeaningRepresentation().getAttributeValues().get(attr)));
            });
            ArrayList<Action> randomRealization = new ArrayList<Action>();
            realization.forEach((a) -> {
                if (a.getAttribute().equals(Action.TOKEN_PUNCT)) {
                    randomRealization.add(new Action(a.getWord(), a.getAttribute()));
                } else {
                    randomRealization.add(new Action(a.getWord(), ""));
                }
            });
            HashSet<String> unalignedAttrs = new HashSet<>();
            if (values.keySet().isEmpty()) {
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!getAttributes().get(di.getMeaningRepresentation().getPredicate())
                                .contains("empty")) {
                            getAttributes().get(di.getMeaningRepresentation().getPredicate()).add("empty");
                        }
                        randomRealization.get(i).setAttribute("empty=empty");
                    }
                }
            } else {
                values.keySet().forEach((attr) -> {
                    values.get(attr).forEach((value) -> {
                        if ((!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+")
                                || value.startsWith(Action.TOKEN_X))) && !value.isEmpty()) {
                            String valueToCheck = value;
                            if (valueToCheck.equals("no") || valueToCheck.equals("yes")
                                    || valueToCheck.equals("yes or no") || valueToCheck.equals("none")
                            //|| valueToCheck.equals("dont_care")
                                    || valueToCheck.equals("empty")) {
                                valueToCheck = attr + ":" + value;
                                unalignedAttrs.add(attr + "=" + value);
                            }
                            if (valueToCheck.equals(attr)) {
                                unalignedAttrs.add(attr + "=" + value);
                            }
                            if (!valueToCheck.equals("empty:empty")
                                    && getValueAlignments().containsKey(valueToCheck)) {
                                unalignedAttrs.add(attr + "=" + valueToCheck);
                            }
                        } else {
                            unalignedAttrs.add(attr + "=" + value);
                        }
                    });
                });
                unalignedAttrs.forEach((attrValue) -> {
                    int index = getRandomGen().nextInt(randomRealization.size());
                    boolean change = false;
                    while (!change) {
                        if (!randomRealization.get(index).getAttribute().equals(Action.TOKEN_PUNCT)) {
                            randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim());
                            change = true;
                        } else {
                            index = getRandomGen().nextInt(randomRealization.size());
                        }
                    }
                });
                String previousAttr = "";
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    } else {
                        previousAttr = "";
                    }
                }
                //System.out.println("1: " + randomRealization);
                previousAttr = "";
                for (int i = randomRealization.size() - 1; i >= 0; i--) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    } else {
                        previousAttr = "";
                    }
                }
                //System.out.println("2: " + randomRealization);
                previousAttr = "";
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    }
                }
                //System.out.println("3: " + randomRealization);
                previousAttr = "";
                for (int i = randomRealization.size() - 1; i >= 0; i--) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    }
                }
                //System.out.println("4: " + randomRealization);
            }
            //FIX WRONG @PUNCT@
            String previousAttr = "";
            for (int i = randomRealization.size() - 1; i >= 0; i--) {
                if (randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)
                        && !randomRealization.get(i).getWord().matches("[,.?!;:']")) {
                    if (!previousAttr.isEmpty()) {
                        randomRealization.get(i).setAttribute(previousAttr);
                    }
                } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                    previousAttr = randomRealization.get(i).getAttribute();
                }
            }
            ArrayList<Action> cleanRandomRealization = new ArrayList<>();
            randomRealization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_PUNCT)))
                    .forEachOrdered((a) -> {
                        cleanRandomRealization.add(a);
                    });
            //ADD END TOKENS
            ArrayList<Action> endRandomRealization = new ArrayList<>();
            previousAttr = "";
            for (int i = 0; i < cleanRandomRealization.size(); i++) {
                Action a = cleanRandomRealization.get(i);
                if (!previousAttr.isEmpty() && !a.getAttribute().equals(previousAttr)) {
                    endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr));
                }
                endRandomRealization.add(a);
                previousAttr = a.getAttribute();
            }
            endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr));
            endRandomRealization.add(new Action(Action.TOKEN_END, Action.TOKEN_END));
            calculatedRealizationsCache.put(realization, endRandomRealization);
            //System.out.println(di.getMeaningRepresentation().getPredicate() + ": " + endRandomRealization);
            ArrayList<String> attrValues = new ArrayList<String>();
            endRandomRealization.forEach((a) -> {
                if (attrValues.isEmpty()) {
                    attrValues.add(a.getAttribute());
                } else if (!attrValues.get(attrValues.size() - 1).equals(a.getAttribute())) {
                    attrValues.add(a.getAttribute());
                }
            });
            if (attrValues.size() > getMaxContentSequenceLength()) {
                setMaxContentSequenceLength(attrValues.size());
            }
            ArrayList<Action> punctRealization = new ArrayList<>();
            punctRealization.addAll(randomRealization);
            previousAttr = "";
            for (int i = 0; i < punctRealization.size(); i++) {
                if (!punctRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                    if (!punctRealization.get(i).getAttribute().equals(previousAttr)
                            && !previousAttr.isEmpty()) {
                        punctRealization.add(i, new Action(Action.TOKEN_END, previousAttr));
                        i++;
                    }
                    previousAttr = punctRealization.get(i).getAttribute();
                }
            }
            if (!punctRealization.get(punctRealization.size() - 1).getWord().equals(Action.TOKEN_END)) {
                punctRealization.add(new Action(Action.TOKEN_END, previousAttr));
            }
            return punctRealization;
        }).map((punctRealization) -> {
            punctRealizations.put(di, punctRealization);
            return punctRealization;
        }).forEachOrdered((punctRealization) -> {
            for (int i = 0; i < punctRealization.size(); i++) {
                Action a = punctRealization.get(i);
                if (a.getAttribute().equals(Action.TOKEN_PUNCT)) {
                    boolean legal = true;
                    ArrayList<Action> surroundingActions = new ArrayList<>();
                    /*if (i - 3 >= 0) {
                    surroundingActions.add(punctRealization.get(i - 3));
                    } else {
                    surroundingActions.add(null);
                    }*/
                    if (i - 2 >= 0) {
                        surroundingActions.add(punctRealization.get(i - 2));
                    } else {
                        surroundingActions.add(null);
                    }
                    if (i - 1 >= 0) {
                        surroundingActions.add(punctRealization.get(i - 1));
                    } else {
                        legal = false;
                    }
                    boolean oneMore = false;
                    if (i + 1 < punctRealization.size()) {
                        surroundingActions.add(punctRealization.get(i + 1));
                        if (!punctRealization.get(i + 1).getAttribute().equals(Action.TOKEN_END)) {
                            oneMore = true;
                        }
                    } else {
                        legal = false;
                    }
                    if (oneMore && i + 2 < punctRealization.size()) {
                        surroundingActions.add(punctRealization.get(i + 2));
                    } else {
                        surroundingActions.add(null);
                    }
                    if (legal) {
                        if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                .containsKey(surroundingActions)) {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .put(surroundingActions, new HashMap<Action, Integer>());
                        }
                        if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                .get(surroundingActions).containsKey(a)) {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .get(surroundingActions).put(a, 1);
                        } else {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .get(surroundingActions)
                                    .put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                            .get(surroundingActions).get(a) + 1);
                        }
                    }
                }
            }
        });
        return di;
    }).map((di) -> {
        di.setDirectReferenceSequence(calculatedRealizationsCache.get(di.getDirectReferenceSequence()));
        return di;
    });
    punctRealizations.keySet().forEach((di) -> {
        ArrayList<Action> punctRealization = punctRealizations.get(di);
        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).keySet().forEach((surrounds) -> {
            int beforeNulls = 0;
            if (surrounds.get(0) == null) {
                beforeNulls++;
            }
            if (surrounds.get(1) == null) {
                beforeNulls++;
            }
            for (int i = 0 - beforeNulls; i < punctRealization.size(); i++) {
                boolean matches = true;
                int m = 0;
                for (int s = 0; s < surrounds.size(); s++) {
                    if (surrounds.get(s) != null) {
                        if (i + s < punctRealization.size()) {
                            if (!punctRealization.get(i + s).getWord().equals(surrounds.get(s)
                                    .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) {
                                matches = false;
                                s = surrounds.size();
                            } else {
                                m++;
                            }
                        } else {
                            matches = false;
                            s = surrounds.size();
                        }
                    } else if (s < 2 && i + s >= 0) {
                        matches = false;
                        s = surrounds.size();
                    } else if (s >= 2 && i + s < punctRealization.size()) {
                        matches = false;
                        s = surrounds.size();
                    }
                }
                if (matches && m > 0) {
                    Action a = new Action("", "");
                    if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds)
                            .containsKey(a)) {
                        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a,
                                1);
                    } else {
                        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a,
                                punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds)
                                        .get(a) + 1);
                    }
                }
            }
        });
    });
    punctPatterns.keySet().forEach((predicate) -> {
        punctPatterns.get(predicate).keySet().forEach((punct) -> {
            Action bestAction = null;
            int bestCount = 0;
            for (Action a : punctPatterns.get(predicate).get(punct).keySet()) {
                if (punctPatterns.get(predicate).get(punct).get(a) > bestCount) {
                    bestAction = a;
                    bestCount = punctPatterns.get(predicate).get(punct).get(a);
                } else if (punctPatterns.get(predicate).get(punct).get(a) == bestCount
                        && bestAction.getWord().isEmpty()) {
                    bestAction = a;
                }
            }
            if (!getPunctuationPatterns().containsKey(predicate)) {
                getPunctuationPatterns().put(predicate, new HashMap<ArrayList<Action>, Action>());
            }
            if (!bestAction.getWord().isEmpty()) {
                getPunctuationPatterns().get(predicate).put(punct, bestAction);
            }
        });
    });
}

From source file:structuredPredictionNLG.SFX.java

/**
 *
 * @param predicate//w  w  w . ja  v a2 s.  com
 * @param costs
 * @param previousGeneratedAttrs
 * @param attrValuesAlreadyMentioned
 * @param attrValuesToBeMentioned
 * @param availableAttributeActions
 * @param MR
 * @return
 */
@Override
public Instance createContentInstanceWithCosts(String predicate, TObjectDoubleHashMap<String> costs,
        ArrayList<String> previousGeneratedAttrs, HashSet<String> attrValuesAlreadyMentioned,
        HashSet<String> attrValuesToBeMentioned, HashMap<String, HashSet<String>> availableAttributeActions,
        MeaningRepresentation MR) {
    TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>();
    HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>();
    if (availableAttributeActions.containsKey(predicate)) {
        availableAttributeActions.get(predicate).forEach((action) -> {
            valueSpecificFeatures.put(action, new TObjectDoubleHashMap<String>());
        });
    }

    ArrayList<String> mentionedAttrValues = new ArrayList<>();
    previousGeneratedAttrs.stream().filter(
            (attrValue) -> (!attrValue.equals(Action.TOKEN_START) && !attrValue.equals(Action.TOKEN_END)))
            .forEachOrdered((attrValue) -> {
                mentionedAttrValues.add(attrValue);
            });

    for (int j = 1; j <= 1; j++) {
        String previousAttrValue = "@@";
        if (mentionedAttrValues.size() - j >= 0) {
            previousAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - j).trim();
        }
        generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0);
    }
    //Word N-Grams
    String prevAttrValue = "@@";
    if (mentionedAttrValues.size() - 1 >= 0) {
        prevAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 1).trim();
    }
    String prev2AttrValue = "@@";
    if (mentionedAttrValues.size() - 2 >= 0) {
        prev2AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 2).trim();
    }
    String prev3AttrValue = "@@";
    if (mentionedAttrValues.size() - 3 >= 0) {
        prev3AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 3).trim();
    }
    String prev4AttrValue = "@@";
    if (mentionedAttrValues.size() - 4 >= 0) {
        prev4AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 4).trim();
    }
    String prev5AttrValue = "@@";
    if (mentionedAttrValues.size() - 5 >= 0) {
        prev5AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 5).trim();
    }

    String prevBigramAttrValue = prev2AttrValue + "|" + prevAttrValue;
    String prevTrigramAttrValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue;
    String prev4gramAttrValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|"
            + prevAttrValue;
    String prev5gramAttrValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|"
            + prev2AttrValue + "|" + prevAttrValue;
    generalFeatures.put("feature_attrValue_bigram_" + prevBigramAttrValue, 1.0);
    generalFeatures.put("feature_attrValue_trigram_" + prevTrigramAttrValue, 1.0);
    generalFeatures.put("feature_attrValue_4gram_" + prev4gramAttrValue, 1.0);
    generalFeatures.put("feature_attrValue_5gram_" + prev5gramAttrValue, 1.0);

    //If arguments have been generated or not
    for (int i = 0; i < mentionedAttrValues.size(); i++) {
        generalFeatures.put("feature_attrValue_allreadyMentioned_" + mentionedAttrValues.get(i), 1.0);
    }
    //If arguments should still be generated or not
    attrValuesToBeMentioned.forEach((attrValue) -> {
        generalFeatures.put("feature_attrValue_toBeMentioned_" + attrValue, 1.0);
    }); //Which attrs are in the MR and which are not

    if (availableAttributeActions.containsKey(predicate)) {
        availableAttributeActions.get(predicate).forEach((attribute) -> {
            if (MR.getAttributeValues().keySet().contains(attribute)) {
                generalFeatures.put("feature_attr_inMR_" + attribute, 1.0);
            } else {
                generalFeatures.put("feature_attr_notInMR_" + attribute, 1.0);
            }
        });
    }

    ArrayList<String> mentionedAttrs = new ArrayList<>();
    for (int i = 0; i < mentionedAttrValues.size(); i++) {
        String attr = mentionedAttrValues.get(i);
        if (attr.contains("=")) {
            attr = mentionedAttrValues.get(i).substring(0, mentionedAttrValues.get(i).indexOf('='));
        }
        mentionedAttrs.add(attr);
    }
    HashSet<String> attrsToBeMentioned = new HashSet<>();
    attrValuesToBeMentioned.stream().map((attrValue) -> {
        String attr = attrValue;
        if (attr.contains("=")) {
            attr = attrValue.substring(0, attrValue.indexOf('='));
        }
        return attr;
    }).forEachOrdered((attr) -> {
        attrsToBeMentioned.add(attr);
    });

    for (int j = 1; j <= 1; j++) {
        String previousAttr = "";
        if (mentionedAttrs.size() - j >= 0) {
            previousAttr = mentionedAttrs.get(mentionedAttrs.size() - j).trim();
        }
        if (!previousAttr.isEmpty()) {
            generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0);
        } else {
            generalFeatures.put("feature_attr_" + j + "_@@", 1.0);
        }
    }
    //Word N-Grams
    String prevAttr = "@@";
    if (mentionedAttrs.size() - 1 >= 0) {
        prevAttr = mentionedAttrs.get(mentionedAttrs.size() - 1).trim();
    }
    String prev2Attr = "@@";
    if (mentionedAttrs.size() - 2 >= 0) {
        prev2Attr = mentionedAttrs.get(mentionedAttrs.size() - 2).trim();
    }
    String prev3Attr = "@@";
    if (mentionedAttrs.size() - 3 >= 0) {
        prev3Attr = mentionedAttrs.get(mentionedAttrs.size() - 3).trim();
    }
    String prev4Attr = "@@";
    if (mentionedAttrs.size() - 4 >= 0) {
        prev4Attr = mentionedAttrs.get(mentionedAttrs.size() - 4).trim();
    }
    String prev5Attr = "@@";
    if (mentionedAttrs.size() - 5 >= 0) {
        prev5Attr = mentionedAttrs.get(mentionedAttrs.size() - 5).trim();
    }

    String prevBigramAttr = prev2Attr + "|" + prevAttr;
    String prevTrigramAttr = prev3Attr + "|" + prev2Attr + "|" + prevAttr;
    String prev4gramAttr = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr;
    String prev5gramAttr = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr;

    generalFeatures.put("feature_attr_bigram_" + prevBigramAttr, 1.0);
    generalFeatures.put("feature_attr_trigram_" + prevTrigramAttr, 1.0);
    generalFeatures.put("feature_attr_4gram_" + prev4gramAttr, 1.0);
    generalFeatures.put("feature_attr_5gram_" + prev5gramAttr, 1.0);

    //If arguments have been generated or not
    attrValuesAlreadyMentioned.forEach((attr) -> {
        generalFeatures.put("feature_attr_alreadyMentioned_" + attr, 1.0);
    });
    //If arguments should still be generated or not
    attrsToBeMentioned.forEach((attr) -> {
        generalFeatures.put("feature_attr_toBeMentioned_" + attr, 1.0);
    });

    //Attr specific features (and global features)
    if (availableAttributeActions.containsKey(predicate)) {
        for (String action : availableAttributeActions.get(predicate)) {
            if (action.equals(Action.TOKEN_END)) {
                if (attrsToBeMentioned.isEmpty()) {
                    valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesMentioned",
                            1.0);
                } else {
                    valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesNotMentioned",
                            1.0);
                }
            } else {
                //Is attr in MR?
                if (MR.getAttributeValues().get(action) != null) {
                    valueSpecificFeatures.get(action).put("global_feature_specific_isInMR", 1.0);
                } else {
                    valueSpecificFeatures.get(action).put("global_feature_specific_isNotInMR", 1.0);
                }
                //Is attr already mentioned right before
                if (prevAttr.equals(action)) {
                    valueSpecificFeatures.get(action).put("global_feature_specific_attrFollowingSameAttr", 1.0);
                } else {
                    valueSpecificFeatures.get(action).put("global_feature_specific_attrNotFollowingSameAttr",
                            1.0);
                }
                //Is attr already mentioned
                attrValuesAlreadyMentioned.stream().map((attrValue) -> {
                    if (attrValue.indexOf('=') == -1) {
                    }
                    return attrValue;
                }).filter((attrValue) -> (attrValue.substring(0, attrValue.indexOf('=')).equals(action)))
                        .forEachOrdered((_item) -> {
                            valueSpecificFeatures.get(action)
                                    .put("global_feature_specific_attrAlreadyMentioned", 1.0);
                        });
                //Is attr to be mentioned (has value to express)
                boolean toBeMentioned = false;
                for (String attrValue : attrValuesToBeMentioned) {
                    if (attrValue.substring(0, attrValue.indexOf('=')).equals(action)) {
                        toBeMentioned = true;
                        valueSpecificFeatures.get(action).put("global_feature_specific_attrToBeMentioned", 1.0);
                    }
                }
                if (!toBeMentioned) {
                    valueSpecificFeatures.get(action).put("global_feature_specific_attrNotToBeMentioned", 1.0);
                }
            }
            HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action).keySet());
            keys.forEach((feature1) -> {
                keys.stream()
                        .filter((feature2) -> (valueSpecificFeatures.get(action).get(feature1) == 1.0
                                && valueSpecificFeatures.get(action).get(feature2) == 1.0
                                && feature1.compareTo(feature2) < 0))
                        .forEachOrdered((feature2) -> {
                            valueSpecificFeatures.get(action).put(feature1 + "&&" + feature2, 1.0);
                        });
            });

            String nextValue = chooseNextValue(action, attrValuesToBeMentioned);
            if (nextValue.isEmpty() && !action.equals(Action.TOKEN_END)) {
                valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", 0.0);
            } else {
                ArrayList<String> fullGramLM = new ArrayList<>();
                for (int i = 0; i < mentionedAttrValues.size(); i++) {
                    fullGramLM.add(mentionedAttrValues.get(i));
                }
                ArrayList<String> prev5attrValueGramLM = new ArrayList<>();
                int j = 0;
                for (int i = mentionedAttrValues.size() - 1; (i >= 0 && j < 5); i--) {
                    prev5attrValueGramLM.add(0, mentionedAttrValues.get(i));
                    j++;
                }
                if (!action.equals(Action.TOKEN_END)) {
                    prev5attrValueGramLM.add(action + "=" + chooseNextValue(action, attrValuesToBeMentioned));
                } else {
                    prev5attrValueGramLM.add(action);
                }
                while (prev5attrValueGramLM.size() < 4) {
                    prev5attrValueGramLM.add(0, "@@");
                }

                double afterLMScore = getContentLMsPerPredicate().get(predicate)
                        .getProbability(prev5attrValueGramLM);
                valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", afterLMScore);

                afterLMScore = getContentLMsPerPredicate().get(predicate).getProbability(fullGramLM);
                valueSpecificFeatures.get(action).put("global_feature_LMAttrFull_score", afterLMScore);
            }
        }
    }
    return new Instance(generalFeatures, valueSpecificFeatures, costs);
}

From source file:structuredPredictionNLG.SFX.java

/**
 *
 * @param trainingData/* ww w  .ja v  a  2 s .  c  om*/
 */
@Override
public void createNaiveAlignments(ArrayList<DatasetInstance> trainingData) {
    HashMap<String, HashMap<ArrayList<Action>, HashMap<Action, Integer>>> punctPatterns = new HashMap<>();
    getPredicates().forEach((predicate) -> {
        punctPatterns.put(predicate, new HashMap<ArrayList<Action>, HashMap<Action, Integer>>());
    });
    HashMap<DatasetInstance, ArrayList<Action>> punctRealizations = new HashMap<DatasetInstance, ArrayList<Action>>();

    trainingData.stream().map((di) -> {
        HashMap<ArrayList<Action>, ArrayList<Action>> calculatedRealizationsCache = new HashMap<>();
        HashSet<ArrayList<Action>> initRealizations = new HashSet<>();
        if (!calculatedRealizationsCache.containsKey(di.getDirectReferenceSequence())) {
            initRealizations.add(di.getDirectReferenceSequence());
        }
        initRealizations.stream().map((realization) -> {
            HashMap<String, HashSet<String>> values = new HashMap<>();
            di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attr) -> {
                values.put(attr, new HashSet<>(di.getMeaningRepresentation().getAttributeValues().get(attr)));
            });
            ArrayList<Action> randomRealization = new ArrayList<>();
            for (int i = 0; i < realization.size(); i++) {
                Action a = realization.get(i);
                if (a.getAttribute().equals(Action.TOKEN_PUNCT)) {
                    randomRealization.add(new Action(a.getWord(), a.getAttribute()));
                } else {
                    randomRealization.add(new Action(a.getWord(), ""));
                }
            }
            if (values.keySet().isEmpty()) {
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!getAttributes().get(di.getMeaningRepresentation().getPredicate())
                                .contains("empty")) {
                            getAttributes().get(di.getMeaningRepresentation().getPredicate()).add("empty");
                        }
                        randomRealization.get(i).setAttribute("empty=empty");
                    }
                }
            } else {
                HashMap<Double, HashMap<String, ArrayList<Integer>>> indexAlignments = new HashMap<>();
                HashSet<String> noValueAttrs = new HashSet<String>();
                values.keySet().forEach((attr) -> {
                    values.get(attr).stream().filter(
                            (value) -> ((!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+")
                                    || value.startsWith(Action.TOKEN_X))) && !value.isEmpty()))
                            .map((value) -> {
                                String valueToCheck = value;
                                if (valueToCheck.equals("no") || valueToCheck.equals("yes")
                                        || valueToCheck.equals("yes or no") || valueToCheck.equals("none")
                                //|| attr.equals("dont_care")
                                        || valueToCheck.equals("empty")) {
                                    valueToCheck = attr + ":" + value;
                                    noValueAttrs.add(attr + "=" + value);
                                }
                                if (valueToCheck.equals(attr)) {
                                    noValueAttrs.add(attr + "=" + value);
                                }
                                return valueToCheck;
                            })
                            .filter((valueToCheck) -> (!valueToCheck.equals("empty:empty")
                                    && getValueAlignments().containsKey(valueToCheck)))
                            .forEachOrdered((valueToCheck) -> {
                                for (ArrayList<String> align : getValueAlignments().get(valueToCheck)
                                        .keySet()) {
                                    int n = align.size();
                                    for (int i = 0; i <= randomRealization.size() - n; i++) {
                                        ArrayList<String> compare = new ArrayList<String>();
                                        ArrayList<Integer> indexAlignment = new ArrayList<Integer>();
                                        for (int j = 0; j < n; j++) {
                                            compare.add(randomRealization.get(i + j).getWord());
                                            indexAlignment.add(i + j);
                                        }
                                        if (compare.equals(align)) {
                                            if (!indexAlignments.containsKey(
                                                    getValueAlignments().get(valueToCheck).get(align))) {
                                                indexAlignments.put(
                                                        getValueAlignments().get(valueToCheck).get(align),
                                                        new HashMap());
                                            }
                                            indexAlignments
                                                    .get(getValueAlignments().get(valueToCheck).get(align))
                                                    .put(attr + "=" + valueToCheck, indexAlignment);
                                        }
                                    }
                                }
                            });
                });
                ArrayList<Double> similarities = new ArrayList<>(indexAlignments.keySet());
                Collections.sort(similarities);
                HashSet<String> assignedAttrValues = new HashSet<String>();
                HashSet<Integer> assignedIntegers = new HashSet<Integer>();
                for (int i = similarities.size() - 1; i >= 0; i--) {
                    for (String attrValue : indexAlignments.get(similarities.get(i)).keySet()) {
                        if (!assignedAttrValues.contains(attrValue)) {
                            boolean isUnassigned = true;
                            for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) {
                                if (assignedIntegers.contains(index)) {
                                    isUnassigned = false;
                                }
                            }
                            if (isUnassigned) {
                                assignedAttrValues.add(attrValue);
                                for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) {
                                    assignedIntegers.add(index);
                                    randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim());
                                }
                            }
                        }
                    }
                }
                //System.out.println("-1: " + randomRealization);
                randomRealization.stream().filter((a) -> (a.getWord().startsWith(Action.TOKEN_X)))
                        .forEachOrdered((a) -> {
                            String attr = a.getWord().substring(3, a.getWord().lastIndexOf('_')).toLowerCase()
                                    .trim();
                            a.setAttribute(attr + "=" + a.getWord());
                        });
                HashSet<String> unalignedNoValueAttrs = new HashSet<>();
                noValueAttrs.forEach((noValueAttr) -> {
                    boolean assigned = false;
                    for (Action a : randomRealization) {
                        if (a.getAttribute().equals(noValueAttr)) {
                            assigned = true;
                        }
                    }
                    if (!assigned) {
                        unalignedNoValueAttrs.add(noValueAttr);
                    }
                });
                boolean isAllEmpty = true;
                boolean hasSpace = false;
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (!randomRealization.get(i).getAttribute().isEmpty()
                            && !randomRealization.get(i).getAttribute().equals("[]")
                            && !randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        isAllEmpty = false;
                    }
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        hasSpace = true;
                    }
                }
                if (isAllEmpty && hasSpace && !unalignedNoValueAttrs.isEmpty()) {
                    unalignedNoValueAttrs.forEach((attrValue) -> {
                        int index = getRandomGen().nextInt(randomRealization.size());
                        boolean change = false;
                        while (!change) {
                            if (!randomRealization.get(index).getAttribute().equals(Action.TOKEN_PUNCT)) {
                                randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim());
                                change = true;
                            } else {
                                index = getRandomGen().nextInt(randomRealization.size());
                            }
                        }
                    });
                }
                //System.out.println(isAllEmpty + " " + hasSpace + " " + unalignedNoValueAttrs);
                //System.out.println(">> " + noValueAttrs);
                //System.out.println(">> " + values);
                //System.out.println("0: " + randomRealization);
                String previousAttr = "";
                int start = -1;
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)
                            && !randomRealization.get(i).getAttribute().isEmpty()
                            && !randomRealization.get(i).getAttribute().equals("[]")) {
                        if (start != -1) {
                            int middle = (start + i - 1) / 2 + 1;
                            for (int j = start; j < middle; j++) {
                                if (randomRealization.get(j).getAttribute().isEmpty()
                                        || randomRealization.get(j).getAttribute().equals("[]")) {
                                    randomRealization.get(j).setAttribute(previousAttr);
                                }
                            }
                            for (int j = middle; j < i; j++) {
                                if (randomRealization.get(j).getAttribute().isEmpty()
                                        || randomRealization.get(j).getAttribute().equals("[]")) {
                                    randomRealization.get(j)
                                            .setAttribute(randomRealization.get(i).getAttribute());
                                }
                            }
                        }
                        start = i;
                        previousAttr = randomRealization.get(i).getAttribute();
                    } else {
                        previousAttr = "";
                    }
                }
                //System.out.println("1: " + randomRealization);
                previousAttr = "";
                for (int i = randomRealization.size() - 1; i >= 0; i--) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    } else {
                        previousAttr = "";
                    }
                }
                //System.out.println("2: " + randomRealization);
                previousAttr = "";
                for (int i = 0; i < randomRealization.size(); i++) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    }
                }
                //System.out.println("3: " + randomRealization);
                previousAttr = "";
                for (int i = randomRealization.size() - 1; i >= 0; i--) {
                    if (randomRealization.get(i).getAttribute().isEmpty()
                            || randomRealization.get(i).getAttribute().equals("[]")) {
                        if (!previousAttr.isEmpty()) {
                            randomRealization.get(i).setAttribute(previousAttr);
                        }
                    } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                        previousAttr = randomRealization.get(i).getAttribute();
                    }
                }
                //System.out.println("4: " + randomRealization);
            }
            //FIX WRONG @PUNCT@
            String previousAttr = "";
            for (int i = randomRealization.size() - 1; i >= 0; i--) {
                if (randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)
                        && !randomRealization.get(i).getWord().matches("[,.?!;:']")) {
                    if (!previousAttr.isEmpty()) {
                        randomRealization.get(i).setAttribute(previousAttr);
                    }
                } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                    previousAttr = randomRealization.get(i).getAttribute();
                }
            }
            ArrayList<Action> cleanRandomRealization = new ArrayList<>();
            randomRealization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_PUNCT)))
                    .forEachOrdered((a) -> {
                        cleanRandomRealization.add(a);
                    });
            //ADD END TOKENS
            ArrayList<Action> endRandomRealization = new ArrayList<>();
            previousAttr = "";
            for (int i = 0; i < cleanRandomRealization.size(); i++) {
                Action a = cleanRandomRealization.get(i);
                if (!previousAttr.isEmpty() && !a.getAttribute().equals(previousAttr)) {
                    endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr));
                }
                endRandomRealization.add(a);
                previousAttr = a.getAttribute();
            }
            endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr));
            endRandomRealization.add(new Action(Action.TOKEN_END, Action.TOKEN_END));
            calculatedRealizationsCache.put(realization, endRandomRealization);
            //System.out.println(di.getMeaningRepresentation().getPredicate() + ": " + endRandomRealization);
            ArrayList<String> attrValues = new ArrayList<String>();
            endRandomRealization.forEach((a) -> {
                if (attrValues.isEmpty()) {
                    attrValues.add(a.getAttribute());
                } else if (!attrValues.get(attrValues.size() - 1).equals(a.getAttribute())) {
                    attrValues.add(a.getAttribute());
                }
            });
            if (attrValues.size() > getMaxContentSequenceLength()) {
                setMaxContentSequenceLength(attrValues.size());
            }
            ArrayList<Action> punctRealization = new ArrayList<>();
            punctRealization.addAll(randomRealization);
            previousAttr = "";
            for (int i = 0; i < punctRealization.size(); i++) {
                if (!punctRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) {
                    if (!punctRealization.get(i).getAttribute().equals(previousAttr)
                            && !previousAttr.isEmpty()) {
                        punctRealization.add(i, new Action(Action.TOKEN_END, previousAttr));
                        i++;
                    }
                    previousAttr = punctRealization.get(i).getAttribute();
                }
            }
            if (!punctRealization.get(punctRealization.size() - 1).getWord().equals(Action.TOKEN_END)) {
                punctRealization.add(new Action(Action.TOKEN_END, previousAttr));
            }
            return punctRealization;
        }).map((punctRealization) -> {
            punctRealizations.put(di, punctRealization);
            return punctRealization;
        }).forEachOrdered((punctRealization) -> {
            for (int i = 0; i < punctRealization.size(); i++) {
                Action a = punctRealization.get(i);
                if (a.getAttribute().equals(Action.TOKEN_PUNCT)) {
                    boolean legal = true;
                    ArrayList<Action> surroundingActions = new ArrayList<>();
                    if (i - 2 >= 0) {
                        surroundingActions.add(punctRealization.get(i - 2));
                    } else {
                        surroundingActions.add(null);
                    }
                    if (i - 1 >= 0) {
                        surroundingActions.add(punctRealization.get(i - 1));
                    } else {
                        legal = false;
                    }
                    boolean oneMore = false;
                    if (i + 1 < punctRealization.size()) {
                        surroundingActions.add(punctRealization.get(i + 1));
                        if (!punctRealization.get(i + 1).getAttribute().equals(Action.TOKEN_END)) {
                            oneMore = true;
                        }
                    } else {
                        legal = false;
                    }
                    if (oneMore && i + 2 < punctRealization.size()) {
                        surroundingActions.add(punctRealization.get(i + 2));
                    } else {
                        surroundingActions.add(null);
                    }
                    if (legal) {
                        if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                .containsKey(surroundingActions)) {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .put(surroundingActions, new HashMap<Action, Integer>());
                        }
                        if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                .get(surroundingActions).containsKey(a)) {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .get(surroundingActions).put(a, 1);
                        } else {
                            punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                    .get(surroundingActions)
                                    .put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate())
                                            .get(surroundingActions).get(a) + 1);
                        }
                    }
                }
            }
        });
        di.setDirectReferenceSequence(calculatedRealizationsCache.get(di.getDirectReferenceSequence()));
        return di;
    }).forEachOrdered((di) -> {
        HashSet<String> attrValuesToBeMentioned = new HashSet<>();
        di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attribute) -> {
            int a = 0;
            for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) {
                if (value.startsWith("\"x")) {
                    value = "x" + a;
                    a++;
                } else if (value.startsWith("\"")) {
                    value = value.substring(1, value.length() - 1).replaceAll(" ", "_");
                }
                attrValuesToBeMentioned.add(attribute + "=" + value);
            }
        });
        di.getDirectReferenceSequence().stream().map((key) -> {
            attrValuesToBeMentioned.remove(key.getAttribute());
            return key;
        });
    });
    punctRealizations.keySet().forEach((di) -> {
        ArrayList<Action> punctRealization = punctRealizations.get(di);
        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).keySet().forEach((surrounds) -> {
            int beforeNulls = 0;
            if (surrounds.get(0) == null) {
                beforeNulls++;
            }
            if (surrounds.get(1) == null) {
                beforeNulls++;
            }
            for (int i = 0 - beforeNulls; i < punctRealization.size(); i++) {
                boolean matches = true;
                int m = 0;
                for (int s = 0; s < surrounds.size(); s++) {
                    if (surrounds.get(s) != null) {
                        if (i + s < punctRealization.size()) {
                            if (!punctRealization.get(i + s).getWord().equals(surrounds.get(s)
                                    .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) {
                                matches = false;
                                s = surrounds.size();
                            } else {
                                m++;
                            }
                        } else {
                            matches = false;
                            s = surrounds.size();
                        }
                    } else if (s < 2 && i + s >= 0) {
                        matches = false;
                        s = surrounds.size();
                    } else if (s >= 2 && i + s < punctRealization.size()) {
                        matches = false;
                        s = surrounds.size();
                    }
                }
                if (matches && m > 0) {
                    Action a = new Action("", "");
                    if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds)
                            .containsKey(a)) {
                        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a,
                                1);
                    } else {
                        punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a,
                                punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds)
                                        .get(a) + 1);
                    }
                }
            }
        });
    });
    punctPatterns.keySet().forEach((predicate) -> {
        punctPatterns.get(predicate).keySet().forEach((punct) -> {
            Action bestAction = null;
            int bestCount = 0;
            for (Action a : punctPatterns.get(predicate).get(punct).keySet()) {
                if (punctPatterns.get(predicate).get(punct).get(a) > bestCount) {
                    bestAction = a;
                    bestCount = punctPatterns.get(predicate).get(punct).get(a);
                } else if (punctPatterns.get(predicate).get(punct).get(a) == bestCount
                        && bestAction.getWord().isEmpty()) {
                    bestAction = a;
                }
            }
            if (!getPunctuationPatterns().containsKey(predicate)) {
                getPunctuationPatterns().put(predicate, new HashMap<ArrayList<Action>, Action>());
            }
            if (!bestAction.getWord().isEmpty()) {
                getPunctuationPatterns().get(predicate).put(punct, bestAction);
            }
        });
    });
}