List of usage examples for java.util HashSet stream
default Stream<E> stream()
From source file:structuredPredictionNLG.SFX.java
/** * * @param trainingData/*w ww .j a v a2 s.c o m*/ */ @Override public void createNaiveAlignments(ArrayList<DatasetInstance> trainingData) { HashMap<String, HashMap<ArrayList<Action>, HashMap<Action, Integer>>> punctPatterns = new HashMap<>(); getPredicates().forEach((predicate) -> { punctPatterns.put(predicate, new HashMap<ArrayList<Action>, HashMap<Action, Integer>>()); }); HashMap<DatasetInstance, ArrayList<Action>> punctRealizations = new HashMap<DatasetInstance, ArrayList<Action>>(); trainingData.stream().map((di) -> { HashMap<ArrayList<Action>, ArrayList<Action>> calculatedRealizationsCache = new HashMap<>(); HashSet<ArrayList<Action>> initRealizations = new HashSet<>(); if (!calculatedRealizationsCache.containsKey(di.getDirectReferenceSequence())) { initRealizations.add(di.getDirectReferenceSequence()); } initRealizations.stream().map((realization) -> { HashMap<String, HashSet<String>> values = new HashMap<>(); di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attr) -> { values.put(attr, new HashSet<>(di.getMeaningRepresentation().getAttributeValues().get(attr))); }); ArrayList<Action> randomRealization = new ArrayList<>(); for (int i = 0; i < realization.size(); i++) { Action a = realization.get(i); if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.add(new Action(a.getWord(), a.getAttribute())); } else { randomRealization.add(new Action(a.getWord(), "")); } } if (values.keySet().isEmpty()) { for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!getAttributes().get(di.getMeaningRepresentation().getPredicate()) .contains("empty")) { getAttributes().get(di.getMeaningRepresentation().getPredicate()).add("empty"); } randomRealization.get(i).setAttribute("empty=empty"); } } } else { HashMap<Double, HashMap<String, ArrayList<Integer>>> indexAlignments = new HashMap<>(); HashSet<String> noValueAttrs = new HashSet<String>(); values.keySet().forEach((attr) -> { values.get(attr).stream().filter( (value) -> ((!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) && !value.isEmpty())) .map((value) -> { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| attr.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attr + ":" + value; noValueAttrs.add(attr + "=" + value); } if (valueToCheck.equals(attr)) { noValueAttrs.add(attr + "=" + value); } return valueToCheck; }) .filter((valueToCheck) -> (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck))) .forEachOrdered((valueToCheck) -> { for (ArrayList<String> align : getValueAlignments().get(valueToCheck) .keySet()) { int n = align.size(); for (int i = 0; i <= randomRealization.size() - n; i++) { ArrayList<String> compare = new ArrayList<String>(); ArrayList<Integer> indexAlignment = new ArrayList<Integer>(); for (int j = 0; j < n; j++) { compare.add(randomRealization.get(i + j).getWord()); indexAlignment.add(i + j); } if (compare.equals(align)) { if (!indexAlignments.containsKey( getValueAlignments().get(valueToCheck).get(align))) { indexAlignments.put( getValueAlignments().get(valueToCheck).get(align), new HashMap()); } indexAlignments .get(getValueAlignments().get(valueToCheck).get(align)) .put(attr + "=" + valueToCheck, indexAlignment); } } } }); }); ArrayList<Double> similarities = new ArrayList<>(indexAlignments.keySet()); Collections.sort(similarities); HashSet<String> assignedAttrValues = new HashSet<String>(); HashSet<Integer> assignedIntegers = new HashSet<Integer>(); for (int i = similarities.size() - 1; i >= 0; i--) { for (String attrValue : indexAlignments.get(similarities.get(i)).keySet()) { if (!assignedAttrValues.contains(attrValue)) { boolean isUnassigned = true; for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) { if (assignedIntegers.contains(index)) { isUnassigned = false; } } if (isUnassigned) { assignedAttrValues.add(attrValue); for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) { assignedIntegers.add(index); randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim()); } } } } } //System.out.println("-1: " + randomRealization); randomRealization.stream().filter((a) -> (a.getWord().startsWith(Action.TOKEN_X))) .forEachOrdered((a) -> { String attr = a.getWord().substring(3, a.getWord().lastIndexOf('_')).toLowerCase() .trim(); a.setAttribute(attr + "=" + a.getWord()); }); HashSet<String> unalignedNoValueAttrs = new HashSet<>(); noValueAttrs.forEach((noValueAttr) -> { boolean assigned = false; for (Action a : randomRealization) { if (a.getAttribute().equals(noValueAttr)) { assigned = true; } } if (!assigned) { unalignedNoValueAttrs.add(noValueAttr); } }); boolean isAllEmpty = true; boolean hasSpace = false; for (int i = 0; i < randomRealization.size(); i++) { if (!randomRealization.get(i).getAttribute().isEmpty() && !randomRealization.get(i).getAttribute().equals("[]") && !randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { isAllEmpty = false; } if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { hasSpace = true; } } if (isAllEmpty && hasSpace && !unalignedNoValueAttrs.isEmpty()) { unalignedNoValueAttrs.forEach((attrValue) -> { int index = getRandomGen().nextInt(randomRealization.size()); boolean change = false; while (!change) { if (!randomRealization.get(index).getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim()); change = true; } else { index = getRandomGen().nextInt(randomRealization.size()); } } }); } //System.out.println(isAllEmpty + " " + hasSpace + " " + unalignedNoValueAttrs); //System.out.println(">> " + noValueAttrs); //System.out.println(">> " + values); //System.out.println("0: " + randomRealization); String previousAttr = ""; int start = -1; for (int i = 0; i < randomRealization.size(); i++) { if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT) && !randomRealization.get(i).getAttribute().isEmpty() && !randomRealization.get(i).getAttribute().equals("[]")) { if (start != -1) { int middle = (start + i - 1) / 2 + 1; for (int j = start; j < middle; j++) { if (randomRealization.get(j).getAttribute().isEmpty() || randomRealization.get(j).getAttribute().equals("[]")) { randomRealization.get(j).setAttribute(previousAttr); } } for (int j = middle; j < i; j++) { if (randomRealization.get(j).getAttribute().isEmpty() || randomRealization.get(j).getAttribute().equals("[]")) { randomRealization.get(j) .setAttribute(randomRealization.get(i).getAttribute()); } } } start = i; previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("1: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("2: " + randomRealization); previousAttr = ""; for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("3: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("4: " + randomRealization); } //FIX WRONG @PUNCT@ String previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT) && !randomRealization.get(i).getWord().matches("[,.?!;:']")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } ArrayList<Action> cleanRandomRealization = new ArrayList<>(); randomRealization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_PUNCT))) .forEachOrdered((a) -> { cleanRandomRealization.add(a); }); //ADD END TOKENS ArrayList<Action> endRandomRealization = new ArrayList<>(); previousAttr = ""; for (int i = 0; i < cleanRandomRealization.size(); i++) { Action a = cleanRandomRealization.get(i); if (!previousAttr.isEmpty() && !a.getAttribute().equals(previousAttr)) { endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); } endRandomRealization.add(a); previousAttr = a.getAttribute(); } endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); endRandomRealization.add(new Action(Action.TOKEN_END, Action.TOKEN_END)); calculatedRealizationsCache.put(realization, endRandomRealization); //System.out.println(di.getMeaningRepresentation().getPredicate() + ": " + endRandomRealization); ArrayList<String> attrValues = new ArrayList<String>(); endRandomRealization.forEach((a) -> { if (attrValues.isEmpty()) { attrValues.add(a.getAttribute()); } else if (!attrValues.get(attrValues.size() - 1).equals(a.getAttribute())) { attrValues.add(a.getAttribute()); } }); if (attrValues.size() > getMaxContentSequenceLength()) { setMaxContentSequenceLength(attrValues.size()); } ArrayList<Action> punctRealization = new ArrayList<>(); punctRealization.addAll(randomRealization); previousAttr = ""; for (int i = 0; i < punctRealization.size(); i++) { if (!punctRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { if (!punctRealization.get(i).getAttribute().equals(previousAttr) && !previousAttr.isEmpty()) { punctRealization.add(i, new Action(Action.TOKEN_END, previousAttr)); i++; } previousAttr = punctRealization.get(i).getAttribute(); } } if (!punctRealization.get(punctRealization.size() - 1).getWord().equals(Action.TOKEN_END)) { punctRealization.add(new Action(Action.TOKEN_END, previousAttr)); } return punctRealization; }).map((punctRealization) -> { punctRealizations.put(di, punctRealization); return punctRealization; }).forEachOrdered((punctRealization) -> { for (int i = 0; i < punctRealization.size(); i++) { Action a = punctRealization.get(i); if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { boolean legal = true; ArrayList<Action> surroundingActions = new ArrayList<>(); if (i - 2 >= 0) { surroundingActions.add(punctRealization.get(i - 2)); } else { surroundingActions.add(null); } if (i - 1 >= 0) { surroundingActions.add(punctRealization.get(i - 1)); } else { legal = false; } boolean oneMore = false; if (i + 1 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 1)); if (!punctRealization.get(i + 1).getAttribute().equals(Action.TOKEN_END)) { oneMore = true; } } else { legal = false; } if (oneMore && i + 2 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 2)); } else { surroundingActions.add(null); } if (legal) { if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .containsKey(surroundingActions)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .put(surroundingActions, new HashMap<Action, Integer>()); } if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions) .put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).get(a) + 1); } } } } }); di.setDirectReferenceSequence(calculatedRealizationsCache.get(di.getDirectReferenceSequence())); return di; }).forEachOrdered((di) -> { HashSet<String> attrValuesToBeMentioned = new HashSet<>(); di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attribute) -> { int a = 0; for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { if (value.startsWith("\"x")) { value = "x" + a; a++; } else if (value.startsWith("\"")) { value = value.substring(1, value.length() - 1).replaceAll(" ", "_"); } attrValuesToBeMentioned.add(attribute + "=" + value); } }); di.getDirectReferenceSequence().stream().map((key) -> { attrValuesToBeMentioned.remove(key.getAttribute()); return key; }); }); punctRealizations.keySet().forEach((di) -> { ArrayList<Action> punctRealization = punctRealizations.get(di); punctPatterns.get(di.getMeaningRepresentation().getPredicate()).keySet().forEach((surrounds) -> { int beforeNulls = 0; if (surrounds.get(0) == null) { beforeNulls++; } if (surrounds.get(1) == null) { beforeNulls++; } for (int i = 0 - beforeNulls; i < punctRealization.size(); i++) { boolean matches = true; int m = 0; for (int s = 0; s < surrounds.size(); s++) { if (surrounds.get(s) != null) { if (i + s < punctRealization.size()) { if (!punctRealization.get(i + s).getWord().equals(surrounds.get(s) .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) { matches = false; s = surrounds.size(); } else { m++; } } else { matches = false; s = surrounds.size(); } } else if (s < 2 && i + s >= 0) { matches = false; s = surrounds.size(); } else if (s >= 2 && i + s < punctRealization.size()) { matches = false; s = surrounds.size(); } } if (matches && m > 0) { Action a = new Action("", ""); if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .get(a) + 1); } } } }); }); punctPatterns.keySet().forEach((predicate) -> { punctPatterns.get(predicate).keySet().forEach((punct) -> { Action bestAction = null; int bestCount = 0; for (Action a : punctPatterns.get(predicate).get(punct).keySet()) { if (punctPatterns.get(predicate).get(punct).get(a) > bestCount) { bestAction = a; bestCount = punctPatterns.get(predicate).get(punct).get(a); } else if (punctPatterns.get(predicate).get(punct).get(a) == bestCount && bestAction.getWord().isEmpty()) { bestAction = a; } } if (!getPunctuationPatterns().containsKey(predicate)) { getPunctuationPatterns().put(predicate, new HashMap<ArrayList<Action>, Action>()); } if (!bestAction.getWord().isEmpty()) { getPunctuationPatterns().get(predicate).put(punct, bestAction); } }); }); }
From source file:structuredPredictionNLG.SFX.java
/** * * @param predicate// w w w. j a va2 s. c om * @param costs * @param previousGeneratedAttrs * @param attrValuesAlreadyMentioned * @param attrValuesToBeMentioned * @param availableAttributeActions * @param MR * @return */ @Override public Instance createContentInstanceWithCosts(String predicate, TObjectDoubleHashMap<String> costs, ArrayList<String> previousGeneratedAttrs, HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesToBeMentioned, HashMap<String, HashSet<String>> availableAttributeActions, MeaningRepresentation MR) { TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>(); HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>(); if (availableAttributeActions.containsKey(predicate)) { availableAttributeActions.get(predicate).forEach((action) -> { valueSpecificFeatures.put(action, new TObjectDoubleHashMap<String>()); }); } ArrayList<String> mentionedAttrValues = new ArrayList<>(); previousGeneratedAttrs.stream().filter( (attrValue) -> (!attrValue.equals(Action.TOKEN_START) && !attrValue.equals(Action.TOKEN_END))) .forEachOrdered((attrValue) -> { mentionedAttrValues.add(attrValue); }); for (int j = 1; j <= 1; j++) { String previousAttrValue = "@@"; if (mentionedAttrValues.size() - j >= 0) { previousAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - j).trim(); } generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0); } //Word N-Grams String prevAttrValue = "@@"; if (mentionedAttrValues.size() - 1 >= 0) { prevAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 1).trim(); } String prev2AttrValue = "@@"; if (mentionedAttrValues.size() - 2 >= 0) { prev2AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 2).trim(); } String prev3AttrValue = "@@"; if (mentionedAttrValues.size() - 3 >= 0) { prev3AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 3).trim(); } String prev4AttrValue = "@@"; if (mentionedAttrValues.size() - 4 >= 0) { prev4AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 4).trim(); } String prev5AttrValue = "@@"; if (mentionedAttrValues.size() - 5 >= 0) { prev5AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 5).trim(); } String prevBigramAttrValue = prev2AttrValue + "|" + prevAttrValue; String prevTrigramAttrValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prev4gramAttrValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prev5gramAttrValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigram_" + prevBigramAttrValue, 1.0); generalFeatures.put("feature_attrValue_trigram_" + prevTrigramAttrValue, 1.0); generalFeatures.put("feature_attrValue_4gram_" + prev4gramAttrValue, 1.0); generalFeatures.put("feature_attrValue_5gram_" + prev5gramAttrValue, 1.0); //If arguments have been generated or not for (int i = 0; i < mentionedAttrValues.size(); i++) { generalFeatures.put("feature_attrValue_allreadyMentioned_" + mentionedAttrValues.get(i), 1.0); } //If arguments should still be generated or not attrValuesToBeMentioned.forEach((attrValue) -> { generalFeatures.put("feature_attrValue_toBeMentioned_" + attrValue, 1.0); }); //Which attrs are in the MR and which are not if (availableAttributeActions.containsKey(predicate)) { availableAttributeActions.get(predicate).forEach((attribute) -> { if (MR.getAttributeValues().keySet().contains(attribute)) { generalFeatures.put("feature_attr_inMR_" + attribute, 1.0); } else { generalFeatures.put("feature_attr_notInMR_" + attribute, 1.0); } }); } ArrayList<String> mentionedAttrs = new ArrayList<>(); for (int i = 0; i < mentionedAttrValues.size(); i++) { String attr = mentionedAttrValues.get(i); if (attr.contains("=")) { attr = mentionedAttrValues.get(i).substring(0, mentionedAttrValues.get(i).indexOf('=')); } mentionedAttrs.add(attr); } HashSet<String> attrsToBeMentioned = new HashSet<>(); attrValuesToBeMentioned.stream().map((attrValue) -> { String attr = attrValue; if (attr.contains("=")) { attr = attrValue.substring(0, attrValue.indexOf('=')); } return attr; }).forEachOrdered((attr) -> { attrsToBeMentioned.add(attr); }); for (int j = 1; j <= 1; j++) { String previousAttr = ""; if (mentionedAttrs.size() - j >= 0) { previousAttr = mentionedAttrs.get(mentionedAttrs.size() - j).trim(); } if (!previousAttr.isEmpty()) { generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0); } else { generalFeatures.put("feature_attr_" + j + "_@@", 1.0); } } //Word N-Grams String prevAttr = "@@"; if (mentionedAttrs.size() - 1 >= 0) { prevAttr = mentionedAttrs.get(mentionedAttrs.size() - 1).trim(); } String prev2Attr = "@@"; if (mentionedAttrs.size() - 2 >= 0) { prev2Attr = mentionedAttrs.get(mentionedAttrs.size() - 2).trim(); } String prev3Attr = "@@"; if (mentionedAttrs.size() - 3 >= 0) { prev3Attr = mentionedAttrs.get(mentionedAttrs.size() - 3).trim(); } String prev4Attr = "@@"; if (mentionedAttrs.size() - 4 >= 0) { prev4Attr = mentionedAttrs.get(mentionedAttrs.size() - 4).trim(); } String prev5Attr = "@@"; if (mentionedAttrs.size() - 5 >= 0) { prev5Attr = mentionedAttrs.get(mentionedAttrs.size() - 5).trim(); } String prevBigramAttr = prev2Attr + "|" + prevAttr; String prevTrigramAttr = prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prev4gramAttr = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prev5gramAttr = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigram_" + prevBigramAttr, 1.0); generalFeatures.put("feature_attr_trigram_" + prevTrigramAttr, 1.0); generalFeatures.put("feature_attr_4gram_" + prev4gramAttr, 1.0); generalFeatures.put("feature_attr_5gram_" + prev5gramAttr, 1.0); //If arguments have been generated or not attrValuesAlreadyMentioned.forEach((attr) -> { generalFeatures.put("feature_attr_alreadyMentioned_" + attr, 1.0); }); //If arguments should still be generated or not attrsToBeMentioned.forEach((attr) -> { generalFeatures.put("feature_attr_toBeMentioned_" + attr, 1.0); }); //Attr specific features (and global features) if (availableAttributeActions.containsKey(predicate)) { for (String action : availableAttributeActions.get(predicate)) { if (action.equals(Action.TOKEN_END)) { if (attrsToBeMentioned.isEmpty()) { valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesMentioned", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesNotMentioned", 1.0); } } else { //Is attr in MR? if (MR.getAttributeValues().get(action) != null) { valueSpecificFeatures.get(action).put("global_feature_specific_isInMR", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_isNotInMR", 1.0); } //Is attr already mentioned right before if (prevAttr.equals(action)) { valueSpecificFeatures.get(action).put("global_feature_specific_attrFollowingSameAttr", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_attrNotFollowingSameAttr", 1.0); } //Is attr already mentioned attrValuesAlreadyMentioned.stream().map((attrValue) -> { if (attrValue.indexOf('=') == -1) { } return attrValue; }).filter((attrValue) -> (attrValue.substring(0, attrValue.indexOf('=')).equals(action))) .forEachOrdered((_item) -> { valueSpecificFeatures.get(action) .put("global_feature_specific_attrAlreadyMentioned", 1.0); }); //Is attr to be mentioned (has value to express) boolean toBeMentioned = false; for (String attrValue : attrValuesToBeMentioned) { if (attrValue.substring(0, attrValue.indexOf('=')).equals(action)) { toBeMentioned = true; valueSpecificFeatures.get(action).put("global_feature_specific_attrToBeMentioned", 1.0); } } if (!toBeMentioned) { valueSpecificFeatures.get(action).put("global_feature_specific_attrNotToBeMentioned", 1.0); } } HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action).keySet()); keys.forEach((feature1) -> { keys.stream() .filter((feature2) -> (valueSpecificFeatures.get(action).get(feature1) == 1.0 && valueSpecificFeatures.get(action).get(feature2) == 1.0 && feature1.compareTo(feature2) < 0)) .forEachOrdered((feature2) -> { valueSpecificFeatures.get(action).put(feature1 + "&&" + feature2, 1.0); }); }); String nextValue = chooseNextValue(action, attrValuesToBeMentioned); if (nextValue.isEmpty() && !action.equals(Action.TOKEN_END)) { valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", 0.0); } else { ArrayList<String> fullGramLM = new ArrayList<>(); for (int i = 0; i < mentionedAttrValues.size(); i++) { fullGramLM.add(mentionedAttrValues.get(i)); } ArrayList<String> prev5attrValueGramLM = new ArrayList<>(); int j = 0; for (int i = mentionedAttrValues.size() - 1; (i >= 0 && j < 5); i--) { prev5attrValueGramLM.add(0, mentionedAttrValues.get(i)); j++; } if (!action.equals(Action.TOKEN_END)) { prev5attrValueGramLM.add(action + "=" + chooseNextValue(action, attrValuesToBeMentioned)); } else { prev5attrValueGramLM.add(action); } while (prev5attrValueGramLM.size() < 4) { prev5attrValueGramLM.add(0, "@@"); } double afterLMScore = getContentLMsPerPredicate().get(predicate) .getProbability(prev5attrValueGramLM); valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", afterLMScore); afterLMScore = getContentLMsPerPredicate().get(predicate).getProbability(fullGramLM); valueSpecificFeatures.get(action).put("global_feature_LMAttrFull_score", afterLMScore); } } } return new Instance(generalFeatures, valueSpecificFeatures, costs); }
From source file:com.sonicle.webtop.calendar.CalendarManager.java
public void syncRemoteCalendar(int calendarId, boolean full) throws WTException { final UserProfile.Data udata = WT.getUserData(getTargetProfileId()); final ICalendarInput icalInput = new ICalendarInput(udata.getTimeZone()); final String PENDING_KEY = String.valueOf(calendarId); CalendarDAO calDao = CalendarDAO.getInstance(); Connection con = null;// w ww.j a va2 s. c om if (pendingRemoteCalendarSyncs.putIfAbsent(PENDING_KEY, RunContext.getRunProfileId()) != null) { throw new ConcurrentSyncException("Sync activity is already running [{}, {}]", calendarId, RunContext.getRunProfileId()); } try { //checkRightsOnCalendarFolder(calendarId, "READ"); con = WT.getConnection(SERVICE_ID, false); Calendar cal = ManagerUtils.createCalendar(calDao.selectById(con, calendarId)); if (cal == null) throw new WTException("Calendar not found [{0}]", calendarId); if (!Calendar.Provider.WEBCAL.equals(cal.getProvider()) && !Calendar.Provider.CALDAV.equals(cal.getProvider())) { throw new WTException("Specified calendar is not remote (webcal or CalDAV) [{0}]", calendarId); } // Force a full update if last-sync date is null if (cal.getRemoteSyncTimestamp() == null) full = true; CalendarRemoteParameters params = LangUtils.deserialize(cal.getParameters(), CalendarRemoteParameters.class); if (params == null) throw new WTException("Unable to deserialize remote parameters"); if (params.url == null) throw new WTException("Remote URL is undefined"); if (Calendar.Provider.WEBCAL.equals(cal.getProvider())) { final String PREFIX = "webcal-"; File tempFile = null; URIBuilder builder = new URIBuilder(params.url); if (StringUtils.equalsIgnoreCase(builder.getScheme(), "webcal")) { builder.setScheme("http"); // Force http scheme } if (!StringUtils.isBlank(params.username) && !StringUtils.isBlank(params.username)) { builder.setUserInfo(params.username, params.password); } URI newUrl = URIUtils.buildQuietly(builder); try { final DateTime newLastSync = DateTimeUtils.now(); tempFile = WT.createTempFile(PREFIX, null); // Retrieve webcal content (iCalendar) from the specified URL // and save it locally logger.debug("Downloading iCalendar file from URL [{}]", newUrl); HttpClient httpCli = null; FileOutputStream os = null; try { httpCli = HttpClientUtils.createBasicHttpClient(HttpClientUtils.configureSSLAcceptAll(), newUrl); os = new FileOutputStream(tempFile); HttpClientUtils.writeContent(httpCli, newUrl, os); } catch (IOException ex) { throw new WTException(ex, "Unable to retrieve webcal [{0}]", newUrl); } finally { IOUtils.closeQuietly(os); HttpClientUtils.closeQuietly(httpCli); } logger.debug("Saved to temp file [{}]", tempFile.getName()); // Parse downloaded iCalendar logger.debug("Parsing downloaded iCalendar file"); net.fortuna.ical4j.model.Calendar ical = null; FileInputStream is = null; try { is = new FileInputStream(tempFile); ICalendarUtils.relaxParsingAndCompatibility(); ical = ICalendarUtils.parse(is); //TODO: add support to FILENAME property (Google https://github.com/ical4j/ical4j/issues/69) } catch (IOException | ParserException ex) { throw new WTException(ex, "Unable to read webcal"); } finally { IOUtils.closeQuietly(os); } icalInput.withIncludeVEventSourceInOutput(true); ArrayList<EventInput> input = icalInput.fromICalendarFile(ical, null); logger.debug("Found {} events", input.size()); Map<String, VEventHrefSync> syncByHref = null; if (full) { logger.debug("Cleaning up calendar [{}]", calendarId); doEventsDeleteByCalendar(con, calendarId, false); } else { EventDAO evtDao = EventDAO.getInstance(); syncByHref = evtDao.viewHrefSyncDataByCalendar(con, calendarId); } // Inserts/Updates data... logger.debug("Inserting/Updating events..."); try { String autoUidPrefix = DigestUtils.md5Hex(newUrl.toString()); // auto-gen base prefix in case of missing UID HashSet<String> hrefs = new HashSet<>(); HashMap<String, OEvent> cache = new HashMap<>(); int i = 0; for (EventInput ei : input) { if (StringUtils.isBlank(ei.event.getPublicUid())) { String autoUid = autoUidPrefix + "-" + i; ei.event.setPublicUid(autoUid); logger.trace("Missing UID: using auto-gen value. [{}]", autoUid); } String href = ManagerUtils.buildHref(ei.event.getPublicUid()); //if (logger.isTraceEnabled()) logger.trace("{}", ICalendarUtils.print(ICalendarUtils.getVEvent(devt.getCalendar()))); if (hrefs.contains(href)) { logger.trace("Event duplicated. Skipped! [{}]", href); continue; } boolean skip = false; Integer matchingEventId = null; String eiHash = DigestUtils.md5Hex(ei.sourceEvent.toString()); if (syncByHref != null) { // Only if... (!full) see above! VEventHrefSync hrefSync = syncByHref.remove(href); if (hrefSync != null) { // Href found -> maybe updated item if (!StringUtils.equals(hrefSync.getEtag(), eiHash)) { matchingEventId = hrefSync.getEventId(); logger.trace("Event updated [{}, {}]", href, eiHash); } else { skip = true; logger.trace("Event not modified [{}, {}]", href, eiHash); } } else { // Href not found -> added item logger.trace("Event newly added [{}, {}]", href, eiHash); } } if (!skip) { ei.event.setCalendarId(calendarId); ei.event.setHref(href); ei.event.setEtag(eiHash); if (matchingEventId != null) { ei.event.setEventId(matchingEventId); boolean updated = doEventInputUpdate(con, cache, ei); if (!updated) throw new WTException("Event not found [{}]", ei.event.getEventId()); } else { doEventInputInsert(con, cache, ei); } } hrefs.add(href); // Marks as processed! } if (syncByHref != null) { // Only if... (!full) see above! // Remaining hrefs -> deleted items for (VEventHrefSync hrefSync : syncByHref.values()) { logger.trace("Event deleted [{}]", hrefSync.getHref()); doEventDelete(con, hrefSync.getEventId(), false); } } cache.clear(); calDao.updateRemoteSyncById(con, calendarId, newLastSync, null); DbUtils.commitQuietly(con); } catch (Exception ex) { DbUtils.rollbackQuietly(con); throw new WTException(ex, "Error importing iCalendar"); } } finally { if (tempFile != null) { logger.debug("Removing temp file [{}]", tempFile.getName()); WT.deleteTempFile(tempFile); } } } else if (Calendar.Provider.CALDAV.equals(cal.getProvider())) { CalDav dav = getCalDav(params.username, params.password); try { DavCalendar dcal = dav.getCalendarSyncToken(params.url.toString()); if (dcal == null) throw new WTException("DAV calendar not found"); final boolean syncIsSupported = !StringUtils.isBlank(dcal.getSyncToken()); final DateTime newLastSync = DateTimeUtils.now(); if (!full && (syncIsSupported && !StringUtils.isBlank(cal.getRemoteSyncTag()))) { // Partial update using SYNC mode String newSyncToken = dcal.getSyncToken(); logger.debug("Querying CalDAV endpoint for changes [{}, {}]", params.url.toString(), cal.getRemoteSyncTag()); List<DavSyncStatus> changes = dav.getCalendarChanges(params.url.toString(), cal.getRemoteSyncTag()); logger.debug("Returned {} items", changes.size()); try { if (!changes.isEmpty()) { EventDAO evtDao = EventDAO.getInstance(); Map<String, List<Integer>> eventIdsByHref = evtDao.selectHrefsByByCalendar(con, calendarId); // Process changes... logger.debug("Processing changes..."); HashSet<String> hrefs = new HashSet<>(); for (DavSyncStatus change : changes) { String href = FilenameUtils.getName(change.getPath()); //String href = change.getPath(); if (DavUtil.HTTP_SC_TEXT_OK.equals(change.getResponseStatus())) { hrefs.add(href); } else { // Event deleted List<Integer> eventIds = eventIdsByHref.get(href); Integer eventId = (eventIds != null) ? eventIds.get(eventIds.size() - 1) : null; if (eventId == null) { logger.warn("Deletion not possible. Event path not found [{}]", PathUtils.concatPaths(dcal.getPath(), FilenameUtils.getName(href))); continue; } doEventDelete(con, eventId, false); } } // Retrieves events list from DAV endpoint (using multiget) logger.debug("Retrieving inserted/updated events [{}]", hrefs.size()); Collection<String> paths = hrefs.stream().map( href -> PathUtils.concatPaths(dcal.getPath(), FilenameUtils.getName(href))) .collect(Collectors.toList()); List<DavCalendarEvent> devts = dav.listCalendarEvents(params.url.toString(), paths); //List<DavCalendarEvent> devts = dav.listCalendarEvents(params.url.toString(), hrefs); // Inserts/Updates data... logger.debug("Inserting/Updating events..."); HashMap<String, OEvent> cache = new HashMap<>(); for (DavCalendarEvent devt : devts) { String href = FilenameUtils.getName(devt.getPath()); //String href = devt.getPath(); if (logger.isTraceEnabled()) logger.trace("{}", ICalendarUtils.print(ICalendarUtils.getVEvent(devt.getCalendar()))); List<Integer> eventIds = eventIdsByHref.get(href); Integer eventId = (eventIds != null) ? eventIds.get(eventIds.size() - 1) : null; final ArrayList<EventInput> input = icalInput .fromICalendarFile(devt.getCalendar(), null); if (input.size() != 1) throw new WTException("iCal must contain one event"); final EventInput ei = input.get(0); if (eventId != null) { doEventDelete(con, eventId, false); } ei.event.setCalendarId(calendarId); ei.event.setHref(href); ei.event.setEtag(devt.geteTag()); doEventInputInsert(con, cache, ei); } } calDao.updateRemoteSyncById(con, calendarId, newLastSync, newSyncToken); DbUtils.commitQuietly(con); } catch (Exception ex) { DbUtils.rollbackQuietly(con); throw new WTException(ex, "Error importing iCalendar"); } } else { // Full update or partial computing hashes String newSyncToken = null; if (syncIsSupported) { // If supported, saves last sync-token issued by the server newSyncToken = dcal.getSyncToken(); } // Retrieves cards from DAV endpoint logger.debug("Querying CalDAV endpoint [{}]", params.url.toString()); List<DavCalendarEvent> devts = dav.listCalendarEvents(params.url.toString()); logger.debug("Returned {} items", devts.size()); // Handles data... try { Map<String, VEventHrefSync> syncByHref = null; if (full) { logger.debug("Cleaning up calendar [{}]", calendarId); doEventsDeleteByCalendar(con, calendarId, false); } else if (!full && !syncIsSupported) { // This hash-map is only needed when syncing using hashes EventDAO evtDao = EventDAO.getInstance(); syncByHref = evtDao.viewHrefSyncDataByCalendar(con, calendarId); } logger.debug("Processing results..."); // Define a simple map in order to check duplicates. // eg. SOGo passes same card twice :( HashSet<String> hrefs = new HashSet<>(); HashMap<String, OEvent> cache = new HashMap<>(); for (DavCalendarEvent devt : devts) { String href = PathUtils.getFileName(devt.getPath()); //String href = devt.getPath(); String etag = devt.geteTag(); if (logger.isTraceEnabled()) logger.trace("{}", ICalendarUtils.print(ICalendarUtils.getVEvent(devt.getCalendar()))); if (hrefs.contains(href)) { logger.trace("Card duplicated. Skipped! [{}]", href); continue; } boolean skip = false; Integer matchingEventId = null; if (syncByHref != null) { // Only if... (!full && !syncIsSupported) see above! //String prodId = ICalendarUtils.buildProdId(ManagerUtils.getProductName()); //String hash = DigestUtils.md5Hex(new ICalendarOutput(prodId, true).write(devt.getCalendar())); String hash = DigestUtils .md5Hex(ICalendarUtils.getVEvent(devt.getCalendar()).toString()); VEventHrefSync hrefSync = syncByHref.remove(href); if (hrefSync != null) { // Href found -> maybe updated item if (!StringUtils.equals(hrefSync.getEtag(), hash)) { matchingEventId = hrefSync.getEventId(); etag = hash; logger.trace("Event updated [{}, {}]", href, hash); } else { skip = true; logger.trace("Event not modified [{}, {}]", href, hash); } } else { // Href not found -> added item logger.trace("Event newly added [{}]", href); etag = hash; } } if (!skip) { final ArrayList<EventInput> input = icalInput .fromICalendarFile(devt.getCalendar(), null); if (input.size() != 1) throw new WTException("iCal must contain one event"); final EventInput ei = input.get(0); ei.event.setCalendarId(calendarId); ei.event.setHref(href); ei.event.setEtag(etag); if (matchingEventId == null) { doEventInputInsert(con, cache, ei); } else { ei.event.setEventId(matchingEventId); boolean updated = doEventInputUpdate(con, cache, ei); if (!updated) throw new WTException("Event not found [{}]", ei.event.getEventId()); } } hrefs.add(href); // Marks as processed! } if (syncByHref != null) { // Only if... (!full && !syncIsSupported) see above! // Remaining hrefs -> deleted items for (VEventHrefSync hrefSync : syncByHref.values()) { logger.trace("Event deleted [{}]", hrefSync.getHref()); doEventDelete(con, hrefSync.getEventId(), false); } } calDao.updateRemoteSyncById(con, calendarId, newLastSync, newSyncToken); DbUtils.commitQuietly(con); } catch (Exception ex) { DbUtils.rollbackQuietly(con); throw new WTException(ex, "Error importing iCalendar"); } } } catch (DavException ex) { throw new WTException(ex, "CalDAV error"); } } } catch (SQLException | DAOException ex) { throw wrapException(ex); } finally { DbUtils.closeQuietly(con); pendingRemoteCalendarSyncs.remove(PENDING_KEY); } }
From source file:structuredPredictionNLG.SFX.java
/** * * @param classifierAttrs//from w w w . j a va2 s . c om * @param classifierWords * @param testingData * @param epoch * @return */ @Override public Double evaluateGeneration(HashMap<String, JAROW> classifierAttrs, HashMap<String, HashMap<String, JAROW>> classifierWords, ArrayList<DatasetInstance> testingData, int epoch) { System.out.println("Evaluate argument generation "); ArrayList<ScoredFeaturizedTranslation<IString, String>> generations = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<Action>> generationActions = new HashMap<>(); ArrayList<ArrayList<Sequence<IString>>> finalReferences = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<String>> finalReferencesWordSequences = new HashMap<>(); HashMap<DatasetInstance, String> predictedWordSequences_overAllPredicates = new HashMap<>(); ArrayList<String> allPredictedWordSequences = new ArrayList<>(); ArrayList<String> allPredictedMRStr = new ArrayList<>(); ArrayList<ArrayList<String>> allPredictedReferences = new ArrayList<>(); HashMap<String, Double> attrCoverage = new HashMap<>(); HashMap<String, HashSet<String>> abstractMRsToMRs = new HashMap<>(); for (DatasetInstance di : testingData) { String predicate = di.getMeaningRepresentation().getPredicate(); ArrayList<Action> predictedActionList = new ArrayList<>(); ArrayList<Action> predictedWordList = new ArrayList<>(); //PHRASE GENERATION EVALUATION String predictedAttr = ""; ArrayList<String> predictedAttrValues = new ArrayList<>(); HashSet<String> attrValuesToBeMentioned = new HashSet<>(); HashSet<String> attrValuesAlreadyMentioned = new HashSet<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } while (!predictedAttr.equals(Action.TOKEN_END) && predictedAttrValues.size() < getMaxContentSequenceLength()) { if (!predictedAttr.isEmpty()) { attrValuesToBeMentioned.remove(predictedAttr); } if (!attrValuesToBeMentioned.isEmpty()) { Instance attrTrainingVector = createContentInstance(predicate, "@TOK@", predictedAttrValues, attrValuesAlreadyMentioned, attrValuesToBeMentioned, di.getMeaningRepresentation(), getAvailableContentActions()); if (attrTrainingVector != null) { Prediction predictAttr = classifierAttrs.get(predicate).predict(attrTrainingVector); if (predictAttr.getLabel() != null) { predictedAttr = predictAttr.getLabel().trim(); if (!classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation().getAttributeValues().keySet())) { System.out.println("MR ATTR NOT IN CLASSIFIERS"); System.out .println(classifierAttrs.get(predicate).getCurrentWeightVectors().keySet()); } String predictedValue = ""; if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); HashSet<String> rejectedAttrs = new HashSet<>(); while (predictedValue.isEmpty() && (!predictedAttr.equals(Action.TOKEN_END) || (predictedAttrValues.isEmpty() && classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation() .getAttributeValues().keySet())))) { rejectedAttrs.add(predictedAttr); predictedAttr = Action.TOKEN_END; double maxScore = -Double.MAX_VALUE; for (String attr : predictAttr.getLabel2Score().keySet()) { if (!rejectedAttrs.contains(attr) && (Double .compare(predictAttr.getLabel2Score().get(attr), maxScore) > 0)) { maxScore = predictAttr.getLabel2Score().get(attr); predictedAttr = attr; } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); } } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedAttr += "=" + predictedValue; } predictedAttrValues.add(predictedAttr); if (!predictedAttr.isEmpty()) { attrValuesAlreadyMentioned.add(predictedAttr); attrValuesToBeMentioned.remove(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } //WORD SEQUENCE EVALUATION predictedAttr = ""; ArrayList<String> predictedAttributes = new ArrayList<>(); attrValuesToBeMentioned = new HashSet<>(); attrValuesAlreadyMentioned = new HashSet<>(); HashMap<String, ArrayList<String>> valuesToBeMentioned = new HashMap<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } valuesToBeMentioned.put(attribute, new ArrayList<>(di.getMeaningRepresentation().getAttributeValues().get(attribute))); } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } HashSet<String> attrValuesToBeMentionedCopy = new HashSet<>(attrValuesToBeMentioned); int a = -1; for (String attrValue : predictedAttrValues) { a++; if (!attrValue.equals(Action.TOKEN_END)) { String attribute = attrValue.split("=")[0]; predictedAttributes.add(attrValue); //GENERATE PHRASES if (!attribute.equals(Action.TOKEN_END)) { if (classifierWords.get(predicate).containsKey(attribute)) { ArrayList<String> nextAttributesForInstance = new ArrayList<>( predictedAttrValues.subList(a + 1, predictedAttrValues.size())); String predictedWord = ""; boolean isValueMentioned = false; String valueTBM = ""; if (attrValue.contains("=")) { valueTBM = attrValue.substring(attrValue.indexOf('=') + 1); } if (valueTBM.isEmpty()) { isValueMentioned = true; } ArrayList<String> subPhrase = new ArrayList<>(); while (!predictedWord.equals(Action.TOKEN_END) && predictedWordList.size() < getMaxWordSequenceLength()) { ArrayList<String> predictedAttributesForInstance = new ArrayList<>(); for (int i = 0; i < predictedAttributes.size() - 1; i++) { predictedAttributesForInstance.add(predictedAttributes.get(i)); } if (!predictedAttributes.get(predictedAttributes.size() - 1).equals(attrValue)) { predictedAttributesForInstance .add(predictedAttributes.get(predictedAttributes.size() - 1)); } Instance wordTrainingVector = createWordInstance(predicate, new Action("@TOK@", attrValue), predictedAttributesForInstance, predictedActionList, nextAttributesForInstance, attrValuesAlreadyMentioned, attrValuesToBeMentioned, isValueMentioned, getAvailableWordActions().get(predicate)); if (wordTrainingVector != null && classifierWords.get(predicate) != null) { if (classifierWords.get(predicate).get(attribute) != null) { Prediction predictWord = classifierWords.get(predicate).get(attribute) .predict(wordTrainingVector); if (predictWord.getLabel() != null) { predictedWord = predictWord.getLabel().trim(); while (predictedWord.equals(Action.TOKEN_END) && !predictedActionList.isEmpty() && predictedActionList.get(predictedActionList.size() - 1) .getWord().equals(Action.TOKEN_END)) { double maxScore = -Double.MAX_VALUE; for (String word : predictWord.getLabel2Score().keySet()) { if (!word.equals(Action.TOKEN_END) && (Double.compare( predictWord.getLabel2Score().get(word), maxScore) > 0)) { maxScore = predictWord.getLabel2Score().get(word); predictedWord = word; } } } predictedActionList.add(new Action(predictedWord, attrValue)); if (!predictedWord.equals(Action.TOKEN_START) && !predictedWord.equals(Action.TOKEN_END)) { subPhrase.add(predictedWord); predictedWordList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } if (!isValueMentioned) { if (!predictedWord.equals(Action.TOKEN_END)) { if (predictedWord.startsWith(Action.TOKEN_X) && (valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { isValueMentioned = true; } else if (!predictedWord.startsWith(Action.TOKEN_X) && !(valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { String valueToCheck = valueTBM; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { if (attribute.contains("=")) { valueToCheck = attribute.replace("=", ":"); } else { valueToCheck = attribute + ":" + valueTBM; } } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { isValueMentioned = true; break; } } } } } if (isValueMentioned) { attrValuesAlreadyMentioned.add(attrValue); attrValuesToBeMentioned.remove(attrValue); } } String mentionedAttrValue = ""; if (!predictedWord.startsWith(Action.TOKEN_X)) { for (String attrValueTBM : attrValuesToBeMentioned) { if (attrValueTBM.contains("=")) { String value = attrValueTBM.substring(attrValueTBM.indexOf('=') + 1); if (!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attrValueTBM.replace("=", ":"); } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { mentionedAttrValue = attrValueTBM; break; } } } } } } } if (!mentionedAttrValue.isEmpty()) { attrValuesAlreadyMentioned.add(mentionedAttrValue); attrValuesToBeMentioned.remove(mentionedAttrValue); } } if (predictedWordList.size() >= getMaxWordSequenceLength() && !predictedActionList .get(predictedActionList.size() - 1).getWord().equals(Action.TOKEN_END)) { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { String predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } } } ArrayList<String> predictedAttrs = new ArrayList<>(); predictedAttrValues.forEach((attributeValuePair) -> { predictedAttrs.add(attributeValuePair.split("=")[0]); }); String predictedWordSequence = postProcessWordSequence(di, predictedActionList); ArrayList<String> predictedAttrList = getPredictedAttrList(predictedActionList); if (attrValuesToBeMentionedCopy.size() != 0.0) { double missingAttrs = 0.0; missingAttrs = attrValuesToBeMentionedCopy.stream() .filter((attr) -> (!predictedAttrList.contains(attr))).map((_item) -> 1.0) .reduce(missingAttrs, (accumulator, _item) -> accumulator + _item); double attrSize = attrValuesToBeMentionedCopy.size(); attrCoverage.put(predictedWordSequence, missingAttrs / attrSize); } allPredictedWordSequences.add(predictedWordSequence); allPredictedMRStr.add(di.getMeaningRepresentation().getMRstr()); predictedWordSequences_overAllPredicates.put(di, predictedWordSequence); if (!abstractMRsToMRs.containsKey(di.getMeaningRepresentation().getAbstractMR())) { abstractMRsToMRs.put(di.getMeaningRepresentation().getAbstractMR(), new HashSet<String>()); } abstractMRsToMRs.get(di.getMeaningRepresentation().getAbstractMR()) .add(di.getMeaningRepresentation().getMRstr()); Sequence<IString> translation = IStrings .tokenize(NISTTokenizer.tokenize(predictedWordSequence.toLowerCase())); ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>(translation, null, 0); generations.add(tran); generationActions.put(di, predictedActionList); ArrayList<Sequence<IString>> references = new ArrayList<>(); ArrayList<String> referencesStrings = new ArrayList<>(); if (getPerformEvaluationOn().equals("valid") || getPerformEvaluationOn().equals("train")) { for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } else { references = wenEvaluationReferenceSequences.get(di.getMeaningRepresentation().getMRstr()); referencesStrings = wenEvaluationReferences.get(di.getMeaningRepresentation().getMRstr()); if (references == null) { references = new ArrayList<>(); referencesStrings = new ArrayList<>(); for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } } allPredictedReferences.add(referencesStrings); finalReferencesWordSequences.put(di, referencesStrings); finalReferences.add(references); } BLEUMetric BLEU = new BLEUMetric(finalReferences, 4, false); Double bleuScore = BLEU.score(generations); double finalCoverageError = 0.0; finalCoverageError = attrCoverage.values().stream().map((c) -> c).reduce(finalCoverageError, (accumulator, _item) -> accumulator + _item); finalCoverageError /= attrCoverage.size(); for (int i = 0; i < allPredictedWordSequences.size(); i++) { double maxRouge = 0.0; String predictedWordSequence = allPredictedWordSequences.get(i).replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ") .trim(); for (String ref : allPredictedReferences.get(i)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } //System.out.println(allPredictedMRStr.get(i) + "\t" + maxRouge + "\t" + allPredictedWordSequences.get(i) + "\t" + refs); } double avgRougeScore = 0.0; String detailedRes = ""; avgRougeScore = testingData.stream().map((di) -> { double maxRouge = 0.0; if (!finalReferencesWordSequences.containsKey(di)) { System.out.println(di.getMeaningRepresentation().getAbstractMR()); } String predictedWordSequence = predictedWordSequences_overAllPredicates.get(di) .replaceAll("\\?", " \\? ").replaceAll(":", " : ").replaceAll("\\.", " \\. ") .replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } return maxRouge; }).map((maxRouge) -> maxRouge).reduce(avgRougeScore, (accumulator, _item) -> accumulator + _item); System.out.println("BLEU: \t" + bleuScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); System.out.println("ROUGE: \t" + (avgRougeScore / allPredictedWordSequences.size())); System.out.println("COVERAGE ERROR: \t" + finalCoverageError); System.out.println("BRC: \t" + ((avgRougeScore / allPredictedWordSequences.size()) + bleuScore + (1.0 - finalCoverageError)) / 3.0); if (isCalculateResultsPerPredicate()) { //////////////////////// //ArrayList<String> bestPredictedStrings = new ArrayList<>(); //ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueMRsInTestAndNotInTrainAllPredWordBLEU = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordROUGE = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordBRC = 0.0; detailedRes = ""; ArrayList<DatasetInstance> abstractMRList = new ArrayList<>(); HashSet<String> reportedAbstractMRs = new HashSet<>(); testingData.stream() .filter((di) -> (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR()))) .map((di) -> { reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); return di; }).forEachOrdered((di) -> { boolean isInTraining = false; for (DatasetInstance di2 : getTrainingData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } if (!isInTraining) { for (DatasetInstance di2 : getValidationData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } } if (!isInTraining) { abstractMRList.add(di); } }); for (DatasetInstance di : abstractMRList) { Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU += bestBLEU; uniqueMRsInTestAndNotInTrainAllPredWordROUGE += bestROUGE; uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR += bestCover; uniqueMRsInTestAndNotInTrainAllPredWordBRC += bestHarmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordROUGE /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordBRC /= abstractMRList.size(); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BLEU: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBLEU); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED ROUGE: \t" + uniqueMRsInTestAndNotInTrainAllPredWordROUGE); System.out.println("UNIQUE (NOT IN TRAIN) WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR)); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BRC: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBRC); abstractMRList.forEach((di) -> { System.out.println(di.getMeaningRepresentation().getAbstractMR() + "\t" + predictedWordSequences_overAllPredicates.get(di)); }); System.out.println("TOTAL SET SIZE: \t" + abstractMRList.size()); //System.out.println(abstractMRList); //System.out.println(detailedRes); } ArrayList<String> bestPredictedStrings = new ArrayList<>(); ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueAllPredWordBLEU = 0.0; double uniqueAllPredWordROUGE = 0.0; double uniqueAllPredWordCOVERAGEERR = 0.0; double uniqueAllPredWordBRC = 0.0; HashSet<String> reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniqueAllPredWordBLEU += bestBLEU; uniqueAllPredWordROUGE += bestROUGE; uniqueAllPredWordCOVERAGEERR += bestCover; uniqueAllPredWordBRC += bestHarmonicMean; } //} } if (isCalculateResultsPerPredicate()) { uniqueAllPredWordBLEU /= reportedAbstractMRs.size(); uniqueAllPredWordROUGE /= reportedAbstractMRs.size(); uniqueAllPredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniqueAllPredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD ALL PRED BLEU: \t" + uniqueAllPredWordBLEU); System.out.println("UNIQUE WORD ALL PRED ROUGE: \t" + uniqueAllPredWordROUGE); System.out.println("UNIQUE WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueAllPredWordCOVERAGEERR)); System.out.println("UNIQUE WORD ALL PRED BRC: \t" + uniqueAllPredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL: \t" + reportedAbstractMRs.size()); //////////////////////// for (String predicate : getPredicates()) { detailedRes = ""; bestPredictedStrings = new ArrayList<>(); bestPredictedStringsMRs = new ArrayList<>(); double uniquePredWordBLEU = 0.0; double uniquePredWordROUGE = 0.0; double uniquePredWordCOVERAGEERR = 0.0; double uniquePredWordBRC = 0.0; reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (di.getMeaningRepresentation().getPredicate().equals(predicate) && !reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ") .replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniquePredWordBLEU += bestBLEU; uniquePredWordROUGE += bestROUGE; uniquePredWordCOVERAGEERR += bestCover; uniquePredWordBRC += bestHarmonicMean; } } uniquePredWordBLEU /= reportedAbstractMRs.size(); uniquePredWordROUGE /= reportedAbstractMRs.size(); uniquePredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniquePredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD " + predicate + " BLEU: \t" + uniquePredWordBLEU); System.out.println("UNIQUE WORD " + predicate + " ROUGE: \t" + uniquePredWordROUGE); System.out.println( "UNIQUE WORD " + predicate + " COVERAGE ERROR: \t" + (1.0 - uniquePredWordCOVERAGEERR)); System.out.println("UNIQUE WORD " + predicate + " BRC: \t" + uniquePredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL " + predicate + ": \t" + reportedAbstractMRs.size()); } } if (isCalculateResultsPerPredicate()) { BufferedWriter bw = null; File f = null; try { f = new File("results/random_SFX" + getDataset() + "TextsAfter" + (epoch) + "_" + JLOLS.sentenceCorrectionFurtherSteps + "_" + JLOLS.p + "epochsTESTINGDATA.txt"); } catch (NullPointerException e) { } try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f))); } catch (FileNotFoundException e) { } try { bw.write("BLEU:" + bleuScore); bw.write("\n"); } catch (IOException e) { } for (int i = 0; i < bestPredictedStrings.size(); i++) { try { String mr = bestPredictedStringsMRs.get(i); bw.write("MR;" + mr.replaceAll(";", ",") + ";"); if (getDataset().equals("hotel")) { bw.write("LOLS_SFHOT;"); } else { bw.write("LOLS_SFRES;"); } bw.write("\n"); } catch (IOException e) { } } try { bw.close(); } catch (IOException e) { } } return bleuScore; }
From source file:structuredPredictionNLG.SFX.java
/** * * @param predicate//from w ww. j a v a 2 s . c o m * @param currentAttrValue * @param costs * @param generatedAttributes * @param previousGeneratedWords * @param nextGeneratedAttributes * @param attrValuesAlreadyMentioned * @param attrValuesThatFollow * @param wasValueMentioned * @param availableWordActions * @return */ @Override public Instance createWordInstanceWithCosts(String predicate, String currentAttrValue, TObjectDoubleHashMap<String> costs, ArrayList<String> generatedAttributes, ArrayList<Action> previousGeneratedWords, ArrayList<String> nextGeneratedAttributes, HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesThatFollow, boolean wasValueMentioned, HashMap<String, HashSet<Action>> availableWordActions) { String currentAttr = currentAttrValue; String currentValue = ""; if (currentAttr.contains("=")) { currentAttr = currentAttrValue.substring(0, currentAttrValue.indexOf('=')); currentValue = currentAttrValue.substring(currentAttrValue.indexOf('=') + 1); } if (currentValue.contains(":")) { currentValue = currentAttrValue.substring(currentAttrValue.indexOf(':') + 1); } if (currentValue.isEmpty()) { //System.exit(0); } TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>(); HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>(); for (Action action : availableWordActions.get(currentAttr)) { valueSpecificFeatures.put(action.getAction(), new TObjectDoubleHashMap<String>()); } /*if (gWords.get(wIndex).getWord().equals(Action.TOKEN_END)) { System.out.println("!!! "+ gWords.subList(0, wIndex + 1)); }*/ ArrayList<Action> generatedWords = new ArrayList<>(); ArrayList<Action> generatedWordsInSameAttrValue = new ArrayList<>(); ArrayList<String> generatedPhrase = new ArrayList<>(); for (int i = 0; i < previousGeneratedWords.size(); i++) { Action a = previousGeneratedWords.get(i); if (!a.getWord().equals(Action.TOKEN_START) && !a.getWord().equals(Action.TOKEN_END)) { generatedWords.add(a); generatedPhrase.add(a.getWord()); if (a.getAttribute().equals(currentAttrValue)) { generatedWordsInSameAttrValue.add(a); } } } //Previous word features for (int j = 1; j <= 1; j++) { String previousWord = "@@"; if (generatedWords.size() - j >= 0) { previousWord = generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_word_" + j + "_" + previousWord.toLowerCase(), 1.0); } String prevWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevWord = generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2Word = "@@"; if (generatedWords.size() - 2 >= 0) { prev2Word = generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3Word = "@@"; if (generatedWords.size() - 3 >= 0) { prev3Word = generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4Word = "@@"; if (generatedWords.size() - 4 >= 0) { prev4Word = generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5Word = "@@"; if (generatedWords.size() - 5 >= 0) { prev5Word = generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevBigram = prev2Word + "|" + prevWord; String prevTrigram = prev3Word + "|" + prev2Word + "|" + prevWord; String prev4gram = prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; String prev5gram = prev5Word + "|" + prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_bigram_" + prevBigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_trigram_" + prevTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_4gram_" + prev4gram.toLowerCase(), 1.0); generalFeatures.put("feature_word_5gram_" + prev5gram.toLowerCase(), 1.0); /*String bigramWord54 = prev5Word + "|" + prev4Word; String bigramWord43 = prev4Word + "|" + prev3Word; String bigramWord32 = prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_bigramWord54_" + bigramWord54, 1.0); generalFeatures.put("feature_word_bigramWord43_" + bigramWord43, 1.0); generalFeatures.put("feature_word_bigramWord32_" + bigramWord32, 1.0); String bigramWordSkip53 = prev5Word + "|" + prev3Word; String bigramWordSkip42 = prev4Word + "|" + prev2Word; String bigramWordSkip31 = prev3Word + "|" + prevWord; generalFeatures.put("feature_word_bigramWordSkip53_" + bigramWordSkip53, 1.0); generalFeatures.put("feature_word_bigramWordSkip42_" + bigramWordSkip42, 1.0); generalFeatures.put("feature_word_bigramWordSkip31_" + bigramWordSkip31, 1.0); String trigramWord543 = prev5Word + "|" + prev4Word + "|" + prev3Word; String trigramWord432 = prev4Word + "|" + prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_trigramWord543_" + trigramWord543, 1.0); generalFeatures.put("feature_word_trigramWord432_" + trigramWord432, 1.0); String trigramWordSkip542 = prev5Word + "|" + prev4Word + "|" + prev2Word; String trigramWordSkip532 = prev5Word + "|" + prev3Word + "|" + prev2Word; String trigramWordSkip431 = prev4Word + "|" + prev3Word + "|" + prevWord; String trigramWordSkip421 = prev4Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_trigramWordSkip542_" + trigramWordSkip542, 1.0); generalFeatures.put("feature_word_trigramWordSkip532_" + trigramWordSkip532, 1.0); generalFeatures.put("feature_word_trigramWordSkip431_" + trigramWordSkip431, 1.0); generalFeatures.put("feature_word_trigramWordSkip421_" + trigramWordSkip421, 1.0);*/ //Previous words in same as current attrValue features /*if (generatedWordsInSameAttrValue.isEmpty()) { generalFeatures.put("feature_currentAttrValueWord_isEmpty", 1.0); } for (int j = 1; j <= 1; j++) { String previousCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - j >= 0) { previousCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - j).getWord().trim(); } generalFeatures.put("feature_currentAttrValueWord_" + j + "_" + previousCurrentAttrValueWord.toLowerCase(), 1.0); } String prevCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 1 >= 0) { prevCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 1).getWord().trim(); } String prev2CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 2 >= 0) { prev2CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 2).getWord().trim(); } String prev3CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 3 >= 0) { prev3CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 3).getWord().trim(); } String prev4CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 4 >= 0) { prev4CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 4).getWord().trim(); } String prev5CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 5 >= 0) { prev5CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 5).getWord().trim(); } String prevCurrentAttrValueBigram = prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValueTrigram = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue4gram = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue5gram = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigram_" + prevCurrentAttrValueBigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_trigram_" + prevCurrentAttrValueTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_4gram_" + prevCurrentAttrValue4gram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_5gram_" + prevCurrentAttrValue5gram.toLowerCase(), 1.0);*/ /*String bigramCurrentAttrValueWord54 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord; String bigramCurrentAttrValueWord43 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWord32 = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord54_" + bigramCurrentAttrValueWord54, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord43_" + bigramCurrentAttrValueWord43, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord32_" + bigramCurrentAttrValueWord32, 1.0); String bigramCurrentAttrValueWordSkip53 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip42 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip31 = prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip53_" + bigramCurrentAttrValueWordSkip53, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip42_" + bigramCurrentAttrValueWordSkip42, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip31_" + bigramCurrentAttrValueWordSkip31, 1.0); String trigramCurrentAttrValueWord543 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String trigramCurrentAttrValueWord432 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord543_" + trigramCurrentAttrValueWord543, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord432_" + trigramCurrentAttrValueWord432, 1.0); String trigramCurrentAttrValueWordSkip542 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip532 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip431 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String trigramCurrentAttrValueWordSkip421 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip542_" + trigramCurrentAttrValueWordSkip542, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip532_" + trigramCurrentAttrValueWordSkip532, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip431_" + trigramCurrentAttrValueWordSkip431, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip421_" + trigramCurrentAttrValueWordSkip421, 1.0);*/ //Previous Attr|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { if (generatedWords.get(generatedWords.size() - j).getAttribute().contains("=")) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() .substring(0, generatedWords.get(generatedWords.size() - j).getAttribute().indexOf('=')) + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } else { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } } generalFeatures.put("feature_attrWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrWord = "@@"; if (generatedWords.size() - 1 >= 0) { if (generatedWords.get(generatedWords.size() - 1).getAttribute().contains("=")) { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 1).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } else { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } } String prev2AttrWord = "@@"; if (generatedWords.size() - 2 >= 0) { if (generatedWords.get(generatedWords.size() - 2).getAttribute().contains("=")) { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 2).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } else { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } } String prev3AttrWord = "@@"; if (generatedWords.size() - 3 >= 0) { if (generatedWords.get(generatedWords.size() - 3).getAttribute().contains("=")) { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 3).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } else { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } } String prev4AttrWord = "@@"; if (generatedWords.size() - 4 >= 0) { if (generatedWords.get(generatedWords.size() - 4).getAttribute().contains("=")) { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 4).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } else { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } } String prev5AttrWord = "@@"; if (generatedWords.size() - 5 >= 0) { if (generatedWords.get(generatedWords.size() - 5).getAttribute().contains("=")) { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 5).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } else { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } } String prevAttrWordBigram = prev2AttrWord + "|" + prevAttrWord; String prevAttrWordTrigram = prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord4gram = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord5gram = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigram_" + prevAttrWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_trigram_" + prevAttrWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_4gram_" + prevAttrWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_5gram_" + prevAttrWord5gram.toLowerCase(), 1.0); /*String bigramAttrWord54 = prev5AttrWord + "|" + prev4AttrWord; String bigramAttrWord43 = prev4AttrWord + "|" + prev3AttrWord; String bigramAttrWord32 = prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_bigramAttrWord54_" + bigramAttrWord54, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord43_" + bigramAttrWord43, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord32_" + bigramAttrWord32, 1.0); String bigramAttrWordSkip53 = prev5AttrWord + "|" + prev3AttrWord; String bigramAttrWordSkip42 = prev4AttrWord + "|" + prev2AttrWord; String bigramAttrWordSkip31 = prev3AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigramAttrWordSkip53_" + bigramAttrWordSkip53, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip42_" + bigramAttrWordSkip42, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip31_" + bigramAttrWordSkip31, 1.0); String trigramAttrWord543 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord; String trigramAttrWord432 = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_trigramAttrWord543_" + trigramAttrWord543, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWord432_" + trigramAttrWord432, 1.0); String trigramAttrWordSkip542 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip532 = prev5AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip431 = prev4AttrWord + "|" + prev3AttrWord + "|" + prevAttrWord; String trigramAttrWordSkip421 = prev4AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_trigramAttrWordSkip542_" + trigramAttrWordSkip542, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip532_" + trigramAttrWordSkip532, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip431_" + trigramAttrWordSkip431, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip421_" + trigramAttrWordSkip421, 1.0);*/ //Previous AttrValue|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_attrValueWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrValueWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevAttrValueWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2AttrValueWord = "@@"; if (generatedWords.size() - 2 >= 0) { prev2AttrValueWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3AttrValueWord = "@@"; if (generatedWords.size() - 3 >= 0) { prev3AttrValueWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4AttrValueWord = "@@"; if (generatedWords.size() - 4 >= 0) { prev4AttrValueWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5AttrValueWord = "@@"; if (generatedWords.size() - 5 >= 0) { prev5AttrValueWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevAttrValueWordBigram = prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWordTrigram = prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord4gram = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord5gram = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigram_" + prevAttrValueWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_trigram_" + prevAttrValueWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_4gram_" + prevAttrValueWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_5gram_" + prevAttrValueWord5gram.toLowerCase(), 1.0); /*String bigramAttrValueWord54 = prev5AttrValueWord + "|" + prev4AttrValueWord; String bigramAttrValueWord43 = prev4AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWord32 = prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWord54_" + bigramAttrValueWord54, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord43_" + bigramAttrValueWord43, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord32_" + bigramAttrValueWord32, 1.0); String bigramAttrValueWordSkip53 = prev5AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWordSkip42 = prev4AttrValueWord + "|" + prev2AttrValueWord; String bigramAttrValueWordSkip31 = prev3AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip53_" + bigramAttrValueWordSkip53, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip42_" + bigramAttrValueWordSkip42, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip31_" + bigramAttrValueWordSkip31, 1.0); String trigramAttrValueWord543 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord; String trigramAttrValueWord432 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWord543_" + trigramAttrValueWord543, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWord432_" + trigramAttrValueWord432, 1.0); String trigramAttrValueWordSkip542 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip532 = prev5AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip431 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prevAttrValueWord; String trigramAttrValueWordSkip421 = prev4AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip542_" + trigramAttrValueWordSkip542, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip532_" + trigramAttrValueWordSkip532, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip431_" + trigramAttrValueWordSkip431, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip421_" + trigramAttrValueWordSkip421, 1.0);*/ //Previous attrValue features int attributeSize = generatedAttributes.size(); for (int j = 1; j <= 1; j++) { String previousAttrValue = "@@"; if (attributeSize - j >= 0) { previousAttrValue = generatedAttributes.get(attributeSize - j).trim(); } generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0); } String prevAttrValue = "@@"; if (attributeSize - 1 >= 0) { prevAttrValue = generatedAttributes.get(attributeSize - 1).trim(); } String prev2AttrValue = "@@"; if (attributeSize - 2 >= 0) { prev2AttrValue = generatedAttributes.get(attributeSize - 2).trim(); } String prev3AttrValue = "@@"; if (attributeSize - 3 >= 0) { prev3AttrValue = generatedAttributes.get(attributeSize - 3).trim(); } String prev4AttrValue = "@@"; if (attributeSize - 4 >= 0) { prev4AttrValue = generatedAttributes.get(attributeSize - 4).trim(); } String prev5AttrValue = "@@"; if (attributeSize - 5 >= 0) { prev5AttrValue = generatedAttributes.get(attributeSize - 5).trim(); } String prevAttrBigramValue = prev2AttrValue + "|" + prevAttrValue; String prevAttrTrigramValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr4gramValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr5gramValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigram_" + prevAttrBigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_trigram_" + prevAttrTrigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_4gram_" + prevAttr4gramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_5gram_" + prevAttr5gramValue.toLowerCase(), 1.0); /*String bigramAttrValue54 = prev5AttrValue + "|" + prev4AttrValue; String bigramAttrValue43 = prev4AttrValue + "|" + prev3AttrValue; String bigramAttrValue32 = prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_bigramAttrValue54_" + bigramAttrValue54, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue43_" + bigramAttrValue43, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue32_" + bigramAttrValue32, 1.0); String bigramAttrValueSkip53 = prev5AttrValue + "|" + prev3AttrValue; String bigramAttrValueSkip42 = prev4AttrValue + "|" + prev2AttrValue; String bigramAttrValueSkip31 = prev3AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigramAttrValueSkip53_" + bigramAttrValueSkip53, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip42_" + bigramAttrValueSkip42, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip31_" + bigramAttrValueSkip31, 1.0); String trigramAttrValue543 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue; String trigramAttrValue432 = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_trigramAttrValue543_" + trigramAttrValue543, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValue432_" + trigramAttrValue432, 1.0); String trigramAttrValueSkip542 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip532 = prev5AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip431 = prev4AttrValue + "|" + prev3AttrValue + "|" + prevAttrValue; String trigramAttrValueSkip421 = prev4AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_trigramAttrValueSkip542_" + trigramAttrValueSkip542, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip532_" + trigramAttrValueSkip532, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip431_" + trigramAttrValueSkip431, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip421_" + trigramAttrValueSkip421, 1.0);*/ //Previous attr features for (int j = 1; j <= 1; j++) { String previousAttr = "@@"; if (attributeSize - j >= 0) { if (generatedAttributes.get(attributeSize - j).contains("=")) { previousAttr = generatedAttributes.get(attributeSize - j).trim().substring(0, generatedAttributes.get(attributeSize - j).indexOf('=')); } else { previousAttr = generatedAttributes.get(attributeSize - j).trim(); } } generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0); } String prevAttr = "@@"; if (attributeSize - 1 >= 0) { if (generatedAttributes.get(attributeSize - 1).contains("=")) { prevAttr = generatedAttributes.get(attributeSize - 1).trim().substring(0, generatedAttributes.get(attributeSize - 1).indexOf('=')); } else { prevAttr = generatedAttributes.get(attributeSize - 1).trim(); } } String prev2Attr = "@@"; if (attributeSize - 2 >= 0) { if (generatedAttributes.get(attributeSize - 2).contains("=")) { prev2Attr = generatedAttributes.get(attributeSize - 2).trim().substring(0, generatedAttributes.get(attributeSize - 2).indexOf('=')); } else { prev2Attr = generatedAttributes.get(attributeSize - 2).trim(); } } String prev3Attr = "@@"; if (attributeSize - 3 >= 0) { if (generatedAttributes.get(attributeSize - 3).contains("=")) { prev3Attr = generatedAttributes.get(attributeSize - 3).trim().substring(0, generatedAttributes.get(attributeSize - 3).indexOf('=')); } else { prev3Attr = generatedAttributes.get(attributeSize - 3).trim(); } } String prev4Attr = "@@"; if (attributeSize - 4 >= 0) { if (generatedAttributes.get(attributeSize - 4).contains("=")) { prev4Attr = generatedAttributes.get(attributeSize - 4).trim().substring(0, generatedAttributes.get(attributeSize - 4).indexOf('=')); } else { prev4Attr = generatedAttributes.get(attributeSize - 4).trim(); } } String prev5Attr = "@@"; if (attributeSize - 5 >= 0) { if (generatedAttributes.get(attributeSize - 5).contains("=")) { prev5Attr = generatedAttributes.get(attributeSize - 5).trim().substring(0, generatedAttributes.get(attributeSize - 5).indexOf('=')); } else { prev5Attr = generatedAttributes.get(attributeSize - 5).trim(); } } String prevAttrBigram = prev2Attr + "|" + prevAttr; String prevAttrTrigram = prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr4gram = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr5gram = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigram_" + prevAttrBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_trigram_" + prevAttrTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_4gram_" + prevAttr4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_5gram_" + prevAttr5gram.toLowerCase(), 1.0); /*String bigramAttr54 = prev5Attr + "|" + prev4Attr; String bigramAttr43 = prev4Attr + "|" + prev3Attr; String bigramAttr32 = prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_bigramAttr54_" + bigramAttr54, 1.0); generalFeatures.put("feature_attr_bigramAttr43_" + bigramAttr43, 1.0); generalFeatures.put("feature_attr_bigramAttr32_" + bigramAttr32, 1.0); String bigramAttrSkip53 = prev5Attr + "|" + prev3Attr; String bigramAttrSkip42 = prev4Attr + "|" + prev2Attr; String bigramAttrSkip31 = prev3Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigramAttrSkip53_" + bigramAttrSkip53, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip42_" + bigramAttrSkip42, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip31_" + bigramAttrSkip31, 1.0); String trigramAttr543 = prev5Attr + "|" + prev4Attr + "|" + prev3Attr; String trigramAttr432 = prev4Attr + "|" + prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_trigramAttr543_" + trigramAttr543, 1.0); generalFeatures.put("feature_attr_trigramAttr432_" + trigramAttr432, 1.0); String trigramAttrSkip542 = prev5Attr + "|" + prev4Attr + "|" + prev2Attr; String trigramAttrSkip532 = prev5Attr + "|" + prev3Attr + "|" + prev2Attr; String trigramAttrSkip431 = prev4Attr + "|" + prev3Attr + "|" + prevAttr; String trigramAttrSkip421 = prev4Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_trigramAttrSkip542_" + trigramAttrSkip542, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip532_" + trigramAttrSkip532, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip431_" + trigramAttrSkip431, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip421_" + trigramAttrSkip421, 1.0);*/ //Next attr features for (int j = 0; j < 1; j++) { String nextAttr = "@@"; if (j < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(j).contains("=")) { nextAttr = nextGeneratedAttributes.get(j).trim().substring(0, nextGeneratedAttributes.get(j).indexOf('=')); } else { nextAttr = nextGeneratedAttributes.get(j).trim(); } } generalFeatures.put("feature_nextAttr_" + j + "_" + nextAttr, 1.0); } String nextAttr = "@@"; if (0 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(0).contains("=")) { nextAttr = nextGeneratedAttributes.get(0).trim().substring(0, nextGeneratedAttributes.get(0).indexOf('=')); } else { nextAttr = nextGeneratedAttributes.get(0).trim(); } } String next2Attr = "@@"; if (1 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(1).contains("=")) { next2Attr = nextGeneratedAttributes.get(1).trim().substring(0, nextGeneratedAttributes.get(1).indexOf('=')); } else { next2Attr = nextGeneratedAttributes.get(1).trim(); } } String next3Attr = "@@"; if (2 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(2).contains("=")) { next3Attr = nextGeneratedAttributes.get(2).trim().substring(0, nextGeneratedAttributes.get(2).indexOf('=')); } else { next3Attr = nextGeneratedAttributes.get(2).trim(); } } String next4Attr = "@@"; if (3 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(3).contains("=")) { next4Attr = nextGeneratedAttributes.get(3).trim().substring(0, nextGeneratedAttributes.get(3).indexOf('=')); } else { next4Attr = nextGeneratedAttributes.get(3).trim(); } } String next5Attr = "@@"; if (4 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(4).contains("=")) { next5Attr = nextGeneratedAttributes.get(4).trim().substring(0, nextGeneratedAttributes.get(4).indexOf('=')); } else { next5Attr = nextGeneratedAttributes.get(4).trim(); } } String nextAttrBigram = nextAttr + "|" + next2Attr; String nextAttrTrigram = nextAttr + "|" + next2Attr + "|" + next3Attr; String nextAttr4gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr; String nextAttr5gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr + "|" + next5Attr; generalFeatures.put("feature_nextAttr_bigram_" + nextAttrBigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_trigram_" + nextAttrTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_4gram_" + nextAttr4gram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_5gram_" + nextAttr5gram.toLowerCase(), 1.0); //Next attrValue features for (int j = 0; j < 1; j++) { String nextAttrValue = "@@"; if (j < nextGeneratedAttributes.size()) { nextAttrValue = nextGeneratedAttributes.get(j).trim(); } generalFeatures.put("feature_nextAttrValue_" + j + "_" + nextAttrValue, 1.0); } String nextAttrValue = "@@"; if (0 < nextGeneratedAttributes.size()) { nextAttrValue = nextGeneratedAttributes.get(0).trim(); } String next2AttrValue = "@@"; if (1 < nextGeneratedAttributes.size()) { next2AttrValue = nextGeneratedAttributes.get(1).trim(); } String next3AttrValue = "@@"; if (2 < nextGeneratedAttributes.size()) { next3AttrValue = nextGeneratedAttributes.get(2).trim(); } String next4AttrValue = "@@"; if (3 < nextGeneratedAttributes.size()) { next4AttrValue = nextGeneratedAttributes.get(3).trim(); } String next5AttrValue = "@@"; if (4 < nextGeneratedAttributes.size()) { next5AttrValue = nextGeneratedAttributes.get(4).trim(); } String nextAttrValueBigram = nextAttrValue + "|" + next2AttrValue; String nextAttrValueTrigram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue; String nextAttrValue4gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|" + next4AttrValue; String nextAttrValue5gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|" + next4AttrValue + "|" + next5AttrValue; generalFeatures.put("feature_nextAttrValue_bigram_" + nextAttrValueBigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_trigram_" + nextAttrValueTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_4gram_" + nextAttrValue4gram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_5gram_" + nextAttrValue5gram.toLowerCase(), 1.0); //If values have already been generated or not generalFeatures.put("feature_valueToBeMentioned_" + currentValue.toLowerCase(), 1.0); if (wasValueMentioned) { generalFeatures.put("feature_wasValueMentioned_true", 1.0); } else { //generalFeatures.put("feature_wasValueMentioned_false", 1.0); } HashSet<String> valuesThatFollow = new HashSet<>(); attrValuesThatFollow.stream().map((attrValue) -> { generalFeatures.put("feature_attrValuesThatFollow_" + attrValue.toLowerCase(), 1.0); return attrValue; }).forEachOrdered((attrValue) -> { if (attrValue.contains("=")) { String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); valuesThatFollow.add(Action.TOKEN_X + attr + "_" + v.substring(1)); } else { valuesThatFollow.add(v); } generalFeatures.put( "feature_attrsThatFollow_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); } else { generalFeatures.put("feature_attrsThatFollow_" + attrValue.toLowerCase(), 1.0); } }); if (valuesThatFollow.isEmpty()) { generalFeatures.put("feature_noAttrsFollow", 1.0); } else { generalFeatures.put("feature_noAttrsFollow", 0.0); } HashSet<String> mentionedValues = new HashSet<>(); attrValuesAlreadyMentioned.stream().map((attrValue) -> { generalFeatures.put("feature_attrValuesAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); return attrValue; }).forEachOrdered((attrValue) -> { if (attrValue.contains("=")) { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); mentionedValues.add(Action.TOKEN_X + attr + "_" + v.substring(1)); } else { mentionedValues.add(v); } } else { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); } }); /*System.out.println("currentAttrValue: " + currentAttrValue); System.out.println("5W: " + prev5gram); System.out.println("5AW: " + prevAttrWord5gram); System.out.println("5A: " + prevAttr5gram); System.out.println("VM: " + wasValueMentioned); System.out.println("A_TF: " + attrValuesThatFollow); System.out.println("==============================");*/ if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { generalFeatures.put("feature_emptyValue", 1.0); } //Word specific features (and also global features) for (Action action : availableWordActions.get(currentAttr)) { //Is word same as previous word if (prevWord.equals(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_sameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_sameAsPreviousWord", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notSameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notSameAsPreviousWord", 1.0); } //Has word appeared in the same attrValue before generatedWords.forEach((previousAction) -> { if (previousAction.getWord().equals(action.getWord()) && previousAction.getAttribute().equals(currentAttrValue)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appearedInSameAttrValue", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_appearedInSameAttrValue", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppearedInSameAttrValue", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppearedInSameAttrValue", 1.0); } }); //Has word appeared before generatedWords.forEach((previousAction) -> { if (previousAction.getWord().equals(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appeared", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_appeared", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppeared", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppeared", 1.0); } }); if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_emptyValue", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_emptyValue", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notEmptyValue", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notEmptyValue", 1.0); } HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action.getAction()).keySet()); keys.forEach((feature1) -> { keys.stream() .filter((feature2) -> (valueSpecificFeatures.get(action.getAction()).get(feature1) == 1.0 && valueSpecificFeatures.get(action.getAction()).get(feature2) == 1.0 && feature1.compareTo(feature2) < 0)) .forEachOrdered((feature2) -> { valueSpecificFeatures.get(action.getAction()).put(feature1 + "&&" + feature2, 1.0); }); }); if (!action.getWord().startsWith(Action.TOKEN_X) && !currentValue.equals("no") && !currentValue.equals("yes") && !currentValue.equals("yes or no") && !currentValue.equals("none") && !currentValue.equals("empty") //&& !currentValue.equals("dont_care") ) { for (String value : getValueAlignments().keySet()) { for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) { if (alignedStr.get(0).equals(action.getWord())) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_current", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_notInMR", 1.0); } } else { for (int i = 1; i < alignedStr.size(); i++) { if (alignedStr.get(i).equals(action.getWord())) { if (endsWith(generatedPhrase, new ArrayList<String>(alignedStr.subList(0, i + 1)))) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_current", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_notInMR", 1.0); } } else { /*if (mentionedValues.contains(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_thatFollows", 1.0); } else { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_notInMR", 1.0); }*/ //valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_outOfValue", 1.0); } } } } } } if (action.getWord().equals(Action.TOKEN_END)) { if (generatedWordsInSameAttrValue.isEmpty()) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingEmptyAttr", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingEmptyAttr", 1.0); } if (!wasValueMentioned) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWithValueNotMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingAttrWithValueNotMentioned", 1.0); } // if (!prevCurrentAttrValueWord.equals("@@")) { if (!prevWord.equals("@@")) { boolean alignmentIsOpen = false; for (String value : getValueAlignments().keySet()) { for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) { for (int i = 0; i < alignedStr.size() - 1; i++) { if (alignedStr.get(i).equals(prevWord) && endsWith(generatedPhrase, new ArrayList<>(alignedStr.subList(0, i + 1)))) { alignmentIsOpen = true; } } } } if (alignmentIsOpen) { // valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); } } } } else if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR", 1.0); } else { String currentValueVariant = ""; if (currentValue.matches("[xX][0-9]+")) { currentValueVariant = Action.TOKEN_X + currentAttr + "_" + currentValue.substring(1); } if (mentionedValues.contains(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_XValue_alreadyMentioned", 1.0); } else if (currentValueVariant.equals(action.getWord()) && !currentValueVariant.isEmpty()) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_current", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_current", 1.0); } else if (valuesThatFollow.contains(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR", 1.0); } } /*for (int i : nGrams.keySet()) { for (String nGram : nGrams.get(i)) { if (i == 2) { if (nGram.startsWith(prevWord + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousWord", 1.0); } } else if (i == 3) { if (nGram.startsWith(prevBigram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousBigram", 1.0); } } else if (i == 4) { if (nGram.startsWith(prevTrigram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousTrigram", 1.0); } } else if (i == 5) { if (nGram.startsWith(prev4gram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious4gram", 1.0); } } else if (i == 6) { if (nGram.startsWith(prev5gram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious5gram", 1.0); } } } }*/ //valueSpecificFeatures.get(action.getAction()).put("global_feature_abstractMR_" + mr.getAbstractMR(), 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_currentValue_" + currentValue.toLowerCase(), 1.0); ArrayList<String> fullGramLM = new ArrayList<>(); for (int i = 0; i < generatedWords.size(); i++) { fullGramLM.add(generatedWords.get(i).getWord()); } ArrayList<String> prev5wordGramLM = new ArrayList<>(); int j = 0; for (int i = generatedWords.size() - 1; (i >= 0 && j < 5); i--) { prev5wordGramLM.add(0, generatedWords.get(i).getWord()); j++; } prev5wordGramLM.add(action.getWord()); while (prev5wordGramLM.size() < 4) { prev5wordGramLM.add(0, "@@"); } double afterLMScorePerPred5Gram = getWordLMsPerPredicate().get(predicate) .getProbability(prev5wordGramLM); valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_5gram_score", afterLMScorePerPred5Gram); double afterLMScorePerPred = getWordLMsPerPredicate().get(predicate).getProbability(fullGramLM); valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_score", afterLMScorePerPred); } /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet()); for (String feature1 : keys) { if (generalFeatures.get(feature1) == 1.0) { generalFeatures.put("global_feature_attr_" + currentValue.toLowerCase() + "&&" + feature1, 1.0); } }*/ //generalFeatures.put("feature_abstractMR_" + mr.getAbstractMR(), 1.0); /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet()); for (String feature1 : keys) { for (String feature2 : keys) { if (generalFeatures.get(feature1) == 1.0 && generalFeatures.get(feature2) == 1.0 && feature1.compareTo(feature2) < 0) { generalFeatures.put(feature1 + "&&" + feature2, 1.0); } } }*/ return new Instance(generalFeatures, valueSpecificFeatures, costs); }