List of usage examples for java.util ArrayList subList
public List<E> subList(int fromIndex, int toIndex)
From source file:imitationNLG.SFX.java
public Instance createWordInstance(String predicate, String currentAttrValue, ArrayList<String> generatedAttributes, ArrayList<Action> previousGeneratedWords, TObjectDoubleHashMap<String> costs, boolean wasValueMentioned, HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesThatFollow, MeaningRepresentation MR, HashMap<String, HashSet<Action>> availableWordActions, HashMap<Integer, HashSet<String>> nGrams) { String currentAttr = currentAttrValue; String currentValue = ""; if (currentAttr.contains("=")) { currentAttr = currentAttrValue.substring(0, currentAttrValue.indexOf('=')); currentValue = currentAttrValue.substring(currentAttrValue.indexOf('=') + 1); }/*from w w w . j ava 2 s . c o m*/ if (currentValue.contains(":")) { currentValue = currentAttrValue.substring(currentAttrValue.indexOf(':') + 1); } if (currentValue.isEmpty()) { //System.exit(0); } TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>(); HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>(); for (Action action : availableWordActions.get(currentAttr)) { valueSpecificFeatures.put(action.getWord(), new TObjectDoubleHashMap<String>()); } /*if (gWords.get(wIndex).getWord().equals(SFX.TOKEN_END)) { System.out.println("!!! "+ gWords.subList(0, wIndex + 1)); }*/ ArrayList<Action> generatedWords = new ArrayList<>(); ArrayList<Action> generatedWordsInSameAttrValue = new ArrayList<>(); ArrayList<String> generatedPhrase = new ArrayList<>(); for (int i = 0; i < previousGeneratedWords.size(); i++) { Action a = previousGeneratedWords.get(i); if (!a.getWord().equals(SFX.TOKEN_START) && !a.getWord().equals(SFX.TOKEN_END)) { generatedWords.add(a); generatedPhrase.add(a.getWord()); if (a.getAttribute().equals(currentAttrValue)) { generatedWordsInSameAttrValue.add(a); } } } //Previous word features for (int j = 1; j <= 1; j++) { String previousWord = "@@"; if (generatedWords.size() - j >= 0) { previousWord = generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_word_" + j + "_" + previousWord.toLowerCase(), 1.0); } String prevWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevWord = generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2Word = "@@"; if (generatedWords.size() - 2 >= 0) { prev2Word = generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3Word = "@@"; if (generatedWords.size() - 3 >= 0) { prev3Word = generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4Word = "@@"; if (generatedWords.size() - 4 >= 0) { prev4Word = generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5Word = "@@"; if (generatedWords.size() - 5 >= 0) { prev5Word = generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevBigram = prev2Word + "|" + prevWord; String prevTrigram = prev3Word + "|" + prev2Word + "|" + prevWord; String prev4gram = prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; String prev5gram = prev5Word + "|" + prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_bigram_" + prevBigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_trigram_" + prevTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_4gram_" + prev4gram.toLowerCase(), 1.0); generalFeatures.put("feature_word_5gram_" + prev5gram.toLowerCase(), 1.0); /*String bigramWord54 = prev5Word + "|" + prev4Word; String bigramWord43 = prev4Word + "|" + prev3Word; String bigramWord32 = prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_bigramWord54_" + bigramWord54, 1.0); generalFeatures.put("feature_word_bigramWord43_" + bigramWord43, 1.0); generalFeatures.put("feature_word_bigramWord32_" + bigramWord32, 1.0); String bigramWordSkip53 = prev5Word + "|" + prev3Word; String bigramWordSkip42 = prev4Word + "|" + prev2Word; String bigramWordSkip31 = prev3Word + "|" + prevWord; generalFeatures.put("feature_word_bigramWordSkip53_" + bigramWordSkip53, 1.0); generalFeatures.put("feature_word_bigramWordSkip42_" + bigramWordSkip42, 1.0); generalFeatures.put("feature_word_bigramWordSkip31_" + bigramWordSkip31, 1.0); String trigramWord543 = prev5Word + "|" + prev4Word + "|" + prev3Word; String trigramWord432 = prev4Word + "|" + prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_trigramWord543_" + trigramWord543, 1.0); generalFeatures.put("feature_word_trigramWord432_" + trigramWord432, 1.0); String trigramWordSkip542 = prev5Word + "|" + prev4Word + "|" + prev2Word; String trigramWordSkip532 = prev5Word + "|" + prev3Word + "|" + prev2Word; String trigramWordSkip431 = prev4Word + "|" + prev3Word + "|" + prevWord; String trigramWordSkip421 = prev4Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_trigramWordSkip542_" + trigramWordSkip542, 1.0); generalFeatures.put("feature_word_trigramWordSkip532_" + trigramWordSkip532, 1.0); generalFeatures.put("feature_word_trigramWordSkip431_" + trigramWordSkip431, 1.0); generalFeatures.put("feature_word_trigramWordSkip421_" + trigramWordSkip421, 1.0);*/ //Previous words in same as current attrValue features if (generatedWordsInSameAttrValue.isEmpty()) { generalFeatures.put("feature_currentAttrValueWord_isEmpty", 1.0); } for (int j = 1; j <= 1; j++) { String previousCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - j >= 0) { previousCurrentAttrValueWord = generatedWordsInSameAttrValue .get(generatedWordsInSameAttrValue.size() - j).getWord().trim(); } generalFeatures.put( "feature_currentAttrValueWord_" + j + "_" + previousCurrentAttrValueWord.toLowerCase(), 1.0); } String prevCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 1 >= 0) { prevCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 1) .getWord().trim(); } String prev2CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 2 >= 0) { prev2CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 2) .getWord().trim(); } String prev3CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 3 >= 0) { prev3CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 3) .getWord().trim(); } String prev4CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 4 >= 0) { prev4CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 4) .getWord().trim(); } String prev5CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 5 >= 0) { prev5CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 5) .getWord().trim(); } String prevCurrentAttrValueBigram = prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValueTrigram = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue4gram = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue5gram = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigram_" + prevCurrentAttrValueBigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_trigram_" + prevCurrentAttrValueTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_4gram_" + prevCurrentAttrValue4gram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_5gram_" + prevCurrentAttrValue5gram.toLowerCase(), 1.0); /*String bigramCurrentAttrValueWord54 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord; String bigramCurrentAttrValueWord43 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWord32 = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord54_" + bigramCurrentAttrValueWord54, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord43_" + bigramCurrentAttrValueWord43, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord32_" + bigramCurrentAttrValueWord32, 1.0); String bigramCurrentAttrValueWordSkip53 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip42 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip31 = prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip53_" + bigramCurrentAttrValueWordSkip53, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip42_" + bigramCurrentAttrValueWordSkip42, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip31_" + bigramCurrentAttrValueWordSkip31, 1.0); String trigramCurrentAttrValueWord543 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String trigramCurrentAttrValueWord432 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord543_" + trigramCurrentAttrValueWord543, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord432_" + trigramCurrentAttrValueWord432, 1.0); String trigramCurrentAttrValueWordSkip542 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip532 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip431 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String trigramCurrentAttrValueWordSkip421 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip542_" + trigramCurrentAttrValueWordSkip542, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip532_" + trigramCurrentAttrValueWordSkip532, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip431_" + trigramCurrentAttrValueWordSkip431, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip421_" + trigramCurrentAttrValueWordSkip421, 1.0);*/ //Previous Attr|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { if (generatedWords.get(generatedWords.size() - j).getAttribute().contains("=")) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() .substring(0, generatedWords.get(generatedWords.size() - j).getAttribute().indexOf('=')) + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } else { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } } generalFeatures.put("feature_attrWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrWord = "@@"; if (generatedWords.size() - 1 >= 0) { if (generatedWords.get(generatedWords.size() - 1).getAttribute().contains("=")) { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 1).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } else { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } } String prev2AttrWord = "@@"; if (generatedWords.size() - 2 >= 0) { if (generatedWords.get(generatedWords.size() - 2).getAttribute().contains("=")) { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 2).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } else { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } } String prev3AttrWord = "@@"; if (generatedWords.size() - 3 >= 0) { if (generatedWords.get(generatedWords.size() - 3).getAttribute().contains("=")) { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 3).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } else { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } } String prev4AttrWord = "@@"; if (generatedWords.size() - 4 >= 0) { if (generatedWords.get(generatedWords.size() - 4).getAttribute().contains("=")) { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 4).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } else { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } } String prev5AttrWord = "@@"; if (generatedWords.size() - 5 >= 0) { if (generatedWords.get(generatedWords.size() - 5).getAttribute().contains("=")) { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 5).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } else { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } } String prevAttrWordBigram = prev2AttrWord + "|" + prevAttrWord; String prevAttrWordTrigram = prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord4gram = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord5gram = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigram_" + prevAttrWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_trigram_" + prevAttrWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_4gram_" + prevAttrWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_5gram_" + prevAttrWord5gram.toLowerCase(), 1.0); /*String bigramAttrWord54 = prev5AttrWord + "|" + prev4AttrWord; String bigramAttrWord43 = prev4AttrWord + "|" + prev3AttrWord; String bigramAttrWord32 = prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_bigramAttrWord54_" + bigramAttrWord54, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord43_" + bigramAttrWord43, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord32_" + bigramAttrWord32, 1.0); String bigramAttrWordSkip53 = prev5AttrWord + "|" + prev3AttrWord; String bigramAttrWordSkip42 = prev4AttrWord + "|" + prev2AttrWord; String bigramAttrWordSkip31 = prev3AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigramAttrWordSkip53_" + bigramAttrWordSkip53, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip42_" + bigramAttrWordSkip42, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip31_" + bigramAttrWordSkip31, 1.0); String trigramAttrWord543 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord; String trigramAttrWord432 = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_trigramAttrWord543_" + trigramAttrWord543, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWord432_" + trigramAttrWord432, 1.0); String trigramAttrWordSkip542 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip532 = prev5AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip431 = prev4AttrWord + "|" + prev3AttrWord + "|" + prevAttrWord; String trigramAttrWordSkip421 = prev4AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_trigramAttrWordSkip542_" + trigramAttrWordSkip542, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip532_" + trigramAttrWordSkip532, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip431_" + trigramAttrWordSkip431, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip421_" + trigramAttrWordSkip421, 1.0);*/ //Previous AttrValue|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_attrValueWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrValueWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevAttrValueWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2AttrValueWord = "@@"; if (generatedWords.size() - 2 >= 0) { prev2AttrValueWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3AttrValueWord = "@@"; if (generatedWords.size() - 3 >= 0) { prev3AttrValueWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4AttrValueWord = "@@"; if (generatedWords.size() - 4 >= 0) { prev4AttrValueWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5AttrValueWord = "@@"; if (generatedWords.size() - 5 >= 0) { prev5AttrValueWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevAttrValueWordBigram = prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWordTrigram = prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord4gram = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord5gram = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigram_" + prevAttrValueWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_trigram_" + prevAttrValueWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_4gram_" + prevAttrValueWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_5gram_" + prevAttrValueWord5gram.toLowerCase(), 1.0); /*String bigramAttrValueWord54 = prev5AttrValueWord + "|" + prev4AttrValueWord; String bigramAttrValueWord43 = prev4AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWord32 = prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWord54_" + bigramAttrValueWord54, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord43_" + bigramAttrValueWord43, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord32_" + bigramAttrValueWord32, 1.0); String bigramAttrValueWordSkip53 = prev5AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWordSkip42 = prev4AttrValueWord + "|" + prev2AttrValueWord; String bigramAttrValueWordSkip31 = prev3AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip53_" + bigramAttrValueWordSkip53, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip42_" + bigramAttrValueWordSkip42, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip31_" + bigramAttrValueWordSkip31, 1.0); String trigramAttrValueWord543 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord; String trigramAttrValueWord432 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWord543_" + trigramAttrValueWord543, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWord432_" + trigramAttrValueWord432, 1.0); String trigramAttrValueWordSkip542 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip532 = prev5AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip431 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prevAttrValueWord; String trigramAttrValueWordSkip421 = prev4AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip542_" + trigramAttrValueWordSkip542, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip532_" + trigramAttrValueWordSkip532, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip431_" + trigramAttrValueWordSkip431, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip421_" + trigramAttrValueWordSkip421, 1.0);*/ //Previous attrValue features int attributeSize = generatedAttributes.size(); for (int j = 1; j <= 1; j++) { String previousAttrValue = "@@"; if (attributeSize - j >= 0) { previousAttrValue = generatedAttributes.get(attributeSize - j).trim(); } generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0); } String prevAttrValue = "@@"; if (attributeSize - 1 >= 0) { prevAttrValue = generatedAttributes.get(attributeSize - 1).trim(); } String prev2AttrValue = "@@"; if (attributeSize - 2 >= 0) { prev2AttrValue = generatedAttributes.get(attributeSize - 2).trim(); } String prev3AttrValue = "@@"; if (attributeSize - 3 >= 0) { prev3AttrValue = generatedAttributes.get(attributeSize - 3).trim(); } String prev4AttrValue = "@@"; if (attributeSize - 4 >= 0) { prev4AttrValue = generatedAttributes.get(attributeSize - 4).trim(); } String prev5AttrValue = "@@"; if (attributeSize - 5 >= 0) { prev5AttrValue = generatedAttributes.get(attributeSize - 5).trim(); } String prevAttrBigramValue = prev2AttrValue + "|" + prevAttrValue; String prevAttrTrigramValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr4gramValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr5gramValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigram_" + prevAttrBigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_trigram_" + prevAttrTrigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_4gram_" + prevAttr4gramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_5gram_" + prevAttr5gramValue.toLowerCase(), 1.0); /*String bigramAttrValue54 = prev5AttrValue + "|" + prev4AttrValue; String bigramAttrValue43 = prev4AttrValue + "|" + prev3AttrValue; String bigramAttrValue32 = prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_bigramAttrValue54_" + bigramAttrValue54, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue43_" + bigramAttrValue43, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue32_" + bigramAttrValue32, 1.0); String bigramAttrValueSkip53 = prev5AttrValue + "|" + prev3AttrValue; String bigramAttrValueSkip42 = prev4AttrValue + "|" + prev2AttrValue; String bigramAttrValueSkip31 = prev3AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigramAttrValueSkip53_" + bigramAttrValueSkip53, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip42_" + bigramAttrValueSkip42, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip31_" + bigramAttrValueSkip31, 1.0); String trigramAttrValue543 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue; String trigramAttrValue432 = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_trigramAttrValue543_" + trigramAttrValue543, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValue432_" + trigramAttrValue432, 1.0); String trigramAttrValueSkip542 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip532 = prev5AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip431 = prev4AttrValue + "|" + prev3AttrValue + "|" + prevAttrValue; String trigramAttrValueSkip421 = prev4AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_trigramAttrValueSkip542_" + trigramAttrValueSkip542, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip532_" + trigramAttrValueSkip532, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip431_" + trigramAttrValueSkip431, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip421_" + trigramAttrValueSkip421, 1.0);*/ //Previous attr features for (int j = 1; j <= 1; j++) { String previousAttr = "@@"; if (attributeSize - j >= 0) { if (generatedAttributes.get(attributeSize - j).contains("=")) { previousAttr = generatedAttributes.get(attributeSize - j).trim().substring(0, generatedAttributes.get(attributeSize - j).indexOf('=')); } else { previousAttr = generatedAttributes.get(attributeSize - j).trim(); } } generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0); } String prevAttr = "@@"; if (attributeSize - 1 >= 0) { if (generatedAttributes.get(attributeSize - 1).contains("=")) { prevAttr = generatedAttributes.get(attributeSize - 1).trim().substring(0, generatedAttributes.get(attributeSize - 1).indexOf('=')); } else { prevAttr = generatedAttributes.get(attributeSize - 1).trim(); } } String prev2Attr = "@@"; if (attributeSize - 2 >= 0) { if (generatedAttributes.get(attributeSize - 2).contains("=")) { prev2Attr = generatedAttributes.get(attributeSize - 2).trim().substring(0, generatedAttributes.get(attributeSize - 2).indexOf('=')); } else { prev2Attr = generatedAttributes.get(attributeSize - 2).trim(); } } String prev3Attr = "@@"; if (attributeSize - 3 >= 0) { if (generatedAttributes.get(attributeSize - 3).contains("=")) { prev3Attr = generatedAttributes.get(attributeSize - 3).trim().substring(0, generatedAttributes.get(attributeSize - 3).indexOf('=')); } else { prev3Attr = generatedAttributes.get(attributeSize - 3).trim(); } } String prev4Attr = "@@"; if (attributeSize - 4 >= 0) { if (generatedAttributes.get(attributeSize - 4).contains("=")) { prev4Attr = generatedAttributes.get(attributeSize - 4).trim().substring(0, generatedAttributes.get(attributeSize - 4).indexOf('=')); } else { prev4Attr = generatedAttributes.get(attributeSize - 4).trim(); } } String prev5Attr = "@@"; if (attributeSize - 5 >= 0) { if (generatedAttributes.get(attributeSize - 5).contains("=")) { prev5Attr = generatedAttributes.get(attributeSize - 5).trim().substring(0, generatedAttributes.get(attributeSize - 5).indexOf('=')); } else { prev5Attr = generatedAttributes.get(attributeSize - 5).trim(); } } String prevAttrBigram = prev2Attr + "|" + prevAttr; String prevAttrTrigram = prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr4gram = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr5gram = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigram_" + prevAttrBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_trigram_" + prevAttrTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_4gram_" + prevAttr4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_5gram_" + prevAttr5gram.toLowerCase(), 1.0); /*String bigramAttr54 = prev5Attr + "|" + prev4Attr; String bigramAttr43 = prev4Attr + "|" + prev3Attr; String bigramAttr32 = prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_bigramAttr54_" + bigramAttr54, 1.0); generalFeatures.put("feature_attr_bigramAttr43_" + bigramAttr43, 1.0); generalFeatures.put("feature_attr_bigramAttr32_" + bigramAttr32, 1.0); String bigramAttrSkip53 = prev5Attr + "|" + prev3Attr; String bigramAttrSkip42 = prev4Attr + "|" + prev2Attr; String bigramAttrSkip31 = prev3Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigramAttrSkip53_" + bigramAttrSkip53, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip42_" + bigramAttrSkip42, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip31_" + bigramAttrSkip31, 1.0); String trigramAttr543 = prev5Attr + "|" + prev4Attr + "|" + prev3Attr; String trigramAttr432 = prev4Attr + "|" + prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_trigramAttr543_" + trigramAttr543, 1.0); generalFeatures.put("feature_attr_trigramAttr432_" + trigramAttr432, 1.0); String trigramAttrSkip542 = prev5Attr + "|" + prev4Attr + "|" + prev2Attr; String trigramAttrSkip532 = prev5Attr + "|" + prev3Attr + "|" + prev2Attr; String trigramAttrSkip431 = prev4Attr + "|" + prev3Attr + "|" + prevAttr; String trigramAttrSkip421 = prev4Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_trigramAttrSkip542_" + trigramAttrSkip542, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip532_" + trigramAttrSkip532, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip431_" + trigramAttrSkip431, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip421_" + trigramAttrSkip421, 1.0);*/ //If values have already been generated or not generalFeatures.put("feature_valueToBeMentioned_" + currentValue.toLowerCase(), 1.0); if (wasValueMentioned) { generalFeatures.put("feature_wasValueMentioned_true", 1.0); } else { //generalFeatures.put("feature_wasValueMentioned_false", 1.0); } HashSet<String> valuesThatFollow = new HashSet<>(); for (String attrValue : attrValuesThatFollow) { generalFeatures.put("feature_attrValuesThatFollow_" + attrValue.toLowerCase(), 1.0); if (attrValue.contains("=")) { String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); valuesThatFollow.add(SFX.TOKEN_X + attr + "_" + v.substring(1)); } else { valuesThatFollow.add(v); } generalFeatures.put( "feature_attrsThatFollow_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); } else { generalFeatures.put("feature_attrsThatFollow_" + attrValue.toLowerCase(), 1.0); } } HashSet<String> mentionedValues = new HashSet<>(); for (String attrValue : attrValuesAlreadyMentioned) { generalFeatures.put("feature_attrValuesAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); if (attrValue.contains("=")) { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); mentionedValues.add(SFX.TOKEN_X + attr + "_" + v.substring(1)); } else { mentionedValues.add(v); } } else { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); } } /*System.out.println("currentAttrValue: " + currentAttrValue); System.out.println("5W: " + prev5gram); System.out.println("5AW: " + prevAttrWord5gram); System.out.println("5A: " + prevAttr5gram); System.out.println("VM: " + wasValueMentioned); System.out.println("A_TF: " + attrValuesThatFollow); System.out.println("==============================");*/ if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") || currentValue.equals("dont_care")) { generalFeatures.put("feature_emptyValue", 1.0); } //Word specific features (and also global features) for (Action action : availableWordActions.get(currentAttr)) { //Is word same as previous word if (prevWord.equals(action.getWord())) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_sameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_sameAsPreviousWord", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_notSameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_notSameAsPreviousWord", 1.0); } //Has word appeared in the same attrValue before for (Action previousAction : generatedWords) { if (previousAction.getWord().equals(action.getWord()) && previousAction.getAttribute().equals(currentAttrValue)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_appearedInSameAttrValue", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_appearedInSameAttrValue", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_notAppearedInSameAttrValue", 1.0); //valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_notAppearedInSameAttrValue", 1.0); } } //Has word appeared before for (Action previousAction : generatedWords) { if (previousAction.getWord().equals(action.getWord())) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_appeared", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_appeared", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_notAppeared", 1.0); //valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_notAppeared", 1.0); } } if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") || currentValue.equals("dont_care")) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_emptyValue", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_emptyValue", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_notEmptyValue", 1.0); //valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_notEmptyValue", 1.0); } if (!action.getWord().startsWith(SFX.TOKEN_X)) { for (String value : valueAlignments.keySet()) { for (ArrayList<String> alignedStr : valueAlignments.get(value).keySet()) { if (alignedStr.get(0).equals(action.getWord())) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_beginsValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_beginsValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_beginsValue_current", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_beginsValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_beginsValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_beginsValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_beginsValue_notInMR", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_beginsValue_notInMR", 1.0); } } else { for (int i = 1; i < alignedStr.size(); i++) { if (alignedStr.get(i).equals(action.getWord())) { if (endsWith(generatedPhrase, new ArrayList<String>(alignedStr.subList(0, i + 1)))) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_inValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_inValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_inValue_current", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_inValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_inValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_inValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_inValue_notInMR", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_inValue_notInMR", 1.0); } } else { /*if (mentionedValues.contains(value)) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_outOfValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_outOfValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_outOfValue_thatFollows", 1.0); } else { valueSpecificFeatures.get(action.getWord()).put("feature_specific_outOfValue_notInMR", 1.0); }*/ //valueSpecificFeatures.get(action.getWord()).put("feature_specific_outOfValue", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_outOfValue", 1.0); } } } } } } if (action.getWord().equals(SFX.TOKEN_END)) { if (generatedWordsInSameAttrValue.isEmpty()) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_closingEmptyAttr", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_closingEmptyAttr", 1.0); } if (!wasValueMentioned) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_closingAttrWithValueNotMentioned", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_closingAttrWithValueNotMentioned", 1.0); } if (!prevCurrentAttrValueWord.equals("@@")) { boolean alignmentIsOpen = false; for (String value : valueAlignments.keySet()) { for (ArrayList<String> alignedStr : valueAlignments.get(value).keySet()) { for (int i = 0; i < alignedStr.size() - 1; i++) { if (alignedStr.get(i).equals(prevCurrentAttrValueWord)) { if (endsWith(generatedPhrase, new ArrayList<String>(alignedStr.subList(0, i + 1)))) { alignmentIsOpen = true; } } } } } if (alignmentIsOpen) { // valueSpecificFeatures.get(action.getWord()).put("feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); } } } } else { if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") || currentValue.equals("dont_care")) { valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_XValue_notInMR", 1.0); } else { String currentValueVariant = ""; if (currentValue.matches("[xX][0-9]+")) { currentValueVariant = SFX.TOKEN_X + currentAttr + "_" + currentValue.substring(1); } if (mentionedValues.contains(action.getWord())) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_XValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_XValue_alreadyMentioned", 1.0); } else if (currentValueVariant.equals(action.getWord()) && !currentValueVariant.isEmpty()) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_XValue_current", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_XValue_current", 1.0); } else if (valuesThatFollow.contains(action.getWord())) { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_XValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getWord()) .put("global_feature_specific_XValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getWord()).put("feature_specific_XValue_notInMR", 1.0); valueSpecificFeatures.get(action.getWord()).put("global_feature_specific_XValue_notInMR", 1.0); } } } /*for (int i : nGrams.keySet()) { for (String nGram : nGrams.get(i)) { if (i == 2) { if (nGram.startsWith(prevWord + "|") && nGram.endsWith("|" + action.getWord())) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_valuesFollowsPreviousWord", 1.0); } } else if (i == 3) { if (nGram.startsWith(prevBigram + "|") && nGram.endsWith("|" + action.getWord())) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_valuesFollowsPreviousBigram", 1.0); } } else if (i == 4) { if (nGram.startsWith(prevTrigram + "|") && nGram.endsWith("|" + action.getWord())) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_valuesFollowsPreviousTrigram", 1.0); } } else if (i == 5) { if (nGram.startsWith(prev4gram + "|") && nGram.endsWith("|" + action.getWord())) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_valuesFollowsPrevious4gram", 1.0); } } else if (i == 6) { if (nGram.startsWith(prev5gram + "|") && nGram.endsWith("|" + action.getWord())) { valueSpecificFeatures.get(action.getWord()).put("feature_specific_valuesFollowsPrevious5gram", 1.0); } } } }*/ HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action.getWord()).keySet()); for (String feature1 : keys) { for (String feature2 : keys) { if (valueSpecificFeatures.get(action.getWord()).get(feature1) == 1.0 && valueSpecificFeatures.get(action.getWord()).get(feature2) == 1.0 && feature1.compareTo(feature2) < 0) { valueSpecificFeatures.get(action.getWord()).put(feature1 + "&&" + feature2, 1.0); } } } } /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet()); for (String feature1 : keys) { for (String feature2 : keys) { if (generalFeatures.get(feature1) == 1.0 && generalFeatures.get(feature2) == 1.0 && feature1.compareTo(feature2) < 0) { generalFeatures.put(feature1 + "&&" + feature2, 1.0); } } }*/ return new Instance(generalFeatures, valueSpecificFeatures, costs); }
From source file:com.google.bitcoin.core.Wallet.java
/** * Returns an list of N transactions, ordered by increasing age. Transactions on side chains are not included. * Dead transactions (overridden by double spends) are optionally included. <p> * <p/>//from ww w. j ava 2s .com * Note: the current implementation is O(num transactions in wallet). Regardless of how many transactions are * requested, the cost is always the same. In future, requesting smaller numbers of transactions may be faster * depending on how the wallet is implemented (eg if backed by a database). */ public List<Transaction> getRecentTransactions(int numTransactions, boolean includeDead) { lock.lock(); try { checkArgument(numTransactions >= 0); // Firstly, put all transactions into an array. int size = getPoolSize(Pool.UNSPENT) + getPoolSize(Pool.SPENT) + getPoolSize(Pool.PENDING); if (numTransactions > size || numTransactions == 0) { numTransactions = size; } ArrayList<Transaction> all = new ArrayList<Transaction>(getTransactions(includeDead)); // Order by date. Collections.sort(all, Collections.reverseOrder(new Comparator<Transaction>() { public int compare(Transaction t1, Transaction t2) { return t1.getUpdateTime().compareTo(t2.getUpdateTime()); } })); if (numTransactions == all.size()) { return all; } else { all.subList(numTransactions, all.size()).clear(); return all; } } finally { lock.unlock(); } }
From source file:com.krawler.spring.iphone.iphoneController.java
private String getModuleReports(HttpServletRequest request) throws ServiceException { String jdata = ""; try {/*from w w w . j a v a 2 s. c o m*/ ArrayList li = new ArrayList(); getLeadsReportsLink(request, li); getAccountReportsLink(request, li); getContactReportsLink(request, li); getOpportunityReportsLink(request, li); getActivityReportsLink(request, li); getCaseReportsLink(request, li); getProductReportsLink(request, li); getOpportunityProductReportsLink(request, li); getSalesReportsLink(request, li); getCampaignReportsLink(request, li); getTargetReportsLink(request, li); int start = 0; //Integer.parseInt(request.getParameter("start")); int limit = 45;//Integer.parseInt(request.getParameter("limit")); String limitReport = request.getParameter("limitReport"); if (!StringUtil.isNullOrEmpty(limitReport)) { limit = Integer.parseInt(limitReport); } limit = (start + limit) > li.size() ? li.size() : (start + limit); List currli = (List) li.subList(start, limit); Iterator it = currli.iterator(); ArrayList newArr = new ArrayList(); while (it.hasNext()) { newArr.add(it.next()); } JSONObject jobj = new JSONObject("{\"count\":" + li.size() + ",\"data\":" + newArr.toString() + "}"); jdata = jobj.toString(); } catch (JSONException ex) { logger.warn(ex.getMessage(), ex); String esc = ex.toString(); jdata = "{'success':'false','data':[]}"; } catch (ServiceException ex) { logger.warn(ex.getMessage(), ex); jdata = "{'success':'false','data':[]}"; } return jdata; }
From source file:structuredPredictionNLG.SFX.java
/** * * @param predicate//from w w w. j a va2s . c om * @param currentAttrValue * @param costs * @param generatedAttributes * @param previousGeneratedWords * @param nextGeneratedAttributes * @param attrValuesAlreadyMentioned * @param attrValuesThatFollow * @param wasValueMentioned * @param availableWordActions * @return */ @Override public Instance createWordInstanceWithCosts(String predicate, String currentAttrValue, TObjectDoubleHashMap<String> costs, ArrayList<String> generatedAttributes, ArrayList<Action> previousGeneratedWords, ArrayList<String> nextGeneratedAttributes, HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesThatFollow, boolean wasValueMentioned, HashMap<String, HashSet<Action>> availableWordActions) { String currentAttr = currentAttrValue; String currentValue = ""; if (currentAttr.contains("=")) { currentAttr = currentAttrValue.substring(0, currentAttrValue.indexOf('=')); currentValue = currentAttrValue.substring(currentAttrValue.indexOf('=') + 1); } if (currentValue.contains(":")) { currentValue = currentAttrValue.substring(currentAttrValue.indexOf(':') + 1); } if (currentValue.isEmpty()) { //System.exit(0); } TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>(); HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>(); for (Action action : availableWordActions.get(currentAttr)) { valueSpecificFeatures.put(action.getAction(), new TObjectDoubleHashMap<String>()); } /*if (gWords.get(wIndex).getWord().equals(Action.TOKEN_END)) { System.out.println("!!! "+ gWords.subList(0, wIndex + 1)); }*/ ArrayList<Action> generatedWords = new ArrayList<>(); ArrayList<Action> generatedWordsInSameAttrValue = new ArrayList<>(); ArrayList<String> generatedPhrase = new ArrayList<>(); for (int i = 0; i < previousGeneratedWords.size(); i++) { Action a = previousGeneratedWords.get(i); if (!a.getWord().equals(Action.TOKEN_START) && !a.getWord().equals(Action.TOKEN_END)) { generatedWords.add(a); generatedPhrase.add(a.getWord()); if (a.getAttribute().equals(currentAttrValue)) { generatedWordsInSameAttrValue.add(a); } } } //Previous word features for (int j = 1; j <= 1; j++) { String previousWord = "@@"; if (generatedWords.size() - j >= 0) { previousWord = generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_word_" + j + "_" + previousWord.toLowerCase(), 1.0); } String prevWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevWord = generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2Word = "@@"; if (generatedWords.size() - 2 >= 0) { prev2Word = generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3Word = "@@"; if (generatedWords.size() - 3 >= 0) { prev3Word = generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4Word = "@@"; if (generatedWords.size() - 4 >= 0) { prev4Word = generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5Word = "@@"; if (generatedWords.size() - 5 >= 0) { prev5Word = generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevBigram = prev2Word + "|" + prevWord; String prevTrigram = prev3Word + "|" + prev2Word + "|" + prevWord; String prev4gram = prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; String prev5gram = prev5Word + "|" + prev4Word + "|" + prev3Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_bigram_" + prevBigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_trigram_" + prevTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_word_4gram_" + prev4gram.toLowerCase(), 1.0); generalFeatures.put("feature_word_5gram_" + prev5gram.toLowerCase(), 1.0); /*String bigramWord54 = prev5Word + "|" + prev4Word; String bigramWord43 = prev4Word + "|" + prev3Word; String bigramWord32 = prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_bigramWord54_" + bigramWord54, 1.0); generalFeatures.put("feature_word_bigramWord43_" + bigramWord43, 1.0); generalFeatures.put("feature_word_bigramWord32_" + bigramWord32, 1.0); String bigramWordSkip53 = prev5Word + "|" + prev3Word; String bigramWordSkip42 = prev4Word + "|" + prev2Word; String bigramWordSkip31 = prev3Word + "|" + prevWord; generalFeatures.put("feature_word_bigramWordSkip53_" + bigramWordSkip53, 1.0); generalFeatures.put("feature_word_bigramWordSkip42_" + bigramWordSkip42, 1.0); generalFeatures.put("feature_word_bigramWordSkip31_" + bigramWordSkip31, 1.0); String trigramWord543 = prev5Word + "|" + prev4Word + "|" + prev3Word; String trigramWord432 = prev4Word + "|" + prev3Word + "|" + prev2Word; generalFeatures.put("feature_word_trigramWord543_" + trigramWord543, 1.0); generalFeatures.put("feature_word_trigramWord432_" + trigramWord432, 1.0); String trigramWordSkip542 = prev5Word + "|" + prev4Word + "|" + prev2Word; String trigramWordSkip532 = prev5Word + "|" + prev3Word + "|" + prev2Word; String trigramWordSkip431 = prev4Word + "|" + prev3Word + "|" + prevWord; String trigramWordSkip421 = prev4Word + "|" + prev2Word + "|" + prevWord; generalFeatures.put("feature_word_trigramWordSkip542_" + trigramWordSkip542, 1.0); generalFeatures.put("feature_word_trigramWordSkip532_" + trigramWordSkip532, 1.0); generalFeatures.put("feature_word_trigramWordSkip431_" + trigramWordSkip431, 1.0); generalFeatures.put("feature_word_trigramWordSkip421_" + trigramWordSkip421, 1.0);*/ //Previous words in same as current attrValue features /*if (generatedWordsInSameAttrValue.isEmpty()) { generalFeatures.put("feature_currentAttrValueWord_isEmpty", 1.0); } for (int j = 1; j <= 1; j++) { String previousCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - j >= 0) { previousCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - j).getWord().trim(); } generalFeatures.put("feature_currentAttrValueWord_" + j + "_" + previousCurrentAttrValueWord.toLowerCase(), 1.0); } String prevCurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 1 >= 0) { prevCurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 1).getWord().trim(); } String prev2CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 2 >= 0) { prev2CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 2).getWord().trim(); } String prev3CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 3 >= 0) { prev3CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 3).getWord().trim(); } String prev4CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 4 >= 0) { prev4CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 4).getWord().trim(); } String prev5CurrentAttrValueWord = "@@"; if (generatedWordsInSameAttrValue.size() - 5 >= 0) { prev5CurrentAttrValueWord = generatedWordsInSameAttrValue.get(generatedWordsInSameAttrValue.size() - 5).getWord().trim(); } String prevCurrentAttrValueBigram = prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValueTrigram = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue4gram = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String prevCurrentAttrValue5gram = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigram_" + prevCurrentAttrValueBigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_trigram_" + prevCurrentAttrValueTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_4gram_" + prevCurrentAttrValue4gram.toLowerCase(), 1.0); generalFeatures.put("feature_currentAttrValueWord_5gram_" + prevCurrentAttrValue5gram.toLowerCase(), 1.0);*/ /*String bigramCurrentAttrValueWord54 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord; String bigramCurrentAttrValueWord43 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWord32 = prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord54_" + bigramCurrentAttrValueWord54, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord43_" + bigramCurrentAttrValueWord43, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWord32_" + bigramCurrentAttrValueWord32, 1.0); String bigramCurrentAttrValueWordSkip53 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip42 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String bigramCurrentAttrValueWordSkip31 = prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip53_" + bigramCurrentAttrValueWordSkip53, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip42_" + bigramCurrentAttrValueWordSkip42, 1.0); generalFeatures.put("feature_currentAttrValueWord_bigramCurrentAttrValueWordSkip31_" + bigramCurrentAttrValueWordSkip31, 1.0); String trigramCurrentAttrValueWord543 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord; String trigramCurrentAttrValueWord432 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord543_" + trigramCurrentAttrValueWord543, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWord432_" + trigramCurrentAttrValueWord432, 1.0); String trigramCurrentAttrValueWordSkip542 = prev5CurrentAttrValueWord + "|" + prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip532 = prev5CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord; String trigramCurrentAttrValueWordSkip431 = prev4CurrentAttrValueWord + "|" + prev3CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; String trigramCurrentAttrValueWordSkip421 = prev4CurrentAttrValueWord + "|" + prev2CurrentAttrValueWord + "|" + prevCurrentAttrValueWord; generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip542_" + trigramCurrentAttrValueWordSkip542, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip532_" + trigramCurrentAttrValueWordSkip532, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip431_" + trigramCurrentAttrValueWordSkip431, 1.0); generalFeatures.put("feature_currentAttrValueWord_trigramCurrentAttrValueWordSkip421_" + trigramCurrentAttrValueWordSkip421, 1.0);*/ //Previous Attr|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { if (generatedWords.get(generatedWords.size() - j).getAttribute().contains("=")) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() .substring(0, generatedWords.get(generatedWords.size() - j).getAttribute().indexOf('=')) + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } else { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } } generalFeatures.put("feature_attrWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrWord = "@@"; if (generatedWords.size() - 1 >= 0) { if (generatedWords.get(generatedWords.size() - 1).getAttribute().contains("=")) { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 1).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } else { prevAttrWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } } String prev2AttrWord = "@@"; if (generatedWords.size() - 2 >= 0) { if (generatedWords.get(generatedWords.size() - 2).getAttribute().contains("=")) { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 2).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } else { prev2AttrWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } } String prev3AttrWord = "@@"; if (generatedWords.size() - 3 >= 0) { if (generatedWords.get(generatedWords.size() - 3).getAttribute().contains("=")) { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 3).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } else { prev3AttrWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } } String prev4AttrWord = "@@"; if (generatedWords.size() - 4 >= 0) { if (generatedWords.get(generatedWords.size() - 4).getAttribute().contains("=")) { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 4).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } else { prev4AttrWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } } String prev5AttrWord = "@@"; if (generatedWords.size() - 5 >= 0) { if (generatedWords.get(generatedWords.size() - 5).getAttribute().contains("=")) { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim().substring(0, generatedWords.get(generatedWords.size() - 5).getAttribute().indexOf('=')) + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } else { prev5AttrWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } } String prevAttrWordBigram = prev2AttrWord + "|" + prevAttrWord; String prevAttrWordTrigram = prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord4gram = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; String prevAttrWord5gram = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigram_" + prevAttrWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_trigram_" + prevAttrWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_4gram_" + prevAttrWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrWord_5gram_" + prevAttrWord5gram.toLowerCase(), 1.0); /*String bigramAttrWord54 = prev5AttrWord + "|" + prev4AttrWord; String bigramAttrWord43 = prev4AttrWord + "|" + prev3AttrWord; String bigramAttrWord32 = prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_bigramAttrWord54_" + bigramAttrWord54, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord43_" + bigramAttrWord43, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWord32_" + bigramAttrWord32, 1.0); String bigramAttrWordSkip53 = prev5AttrWord + "|" + prev3AttrWord; String bigramAttrWordSkip42 = prev4AttrWord + "|" + prev2AttrWord; String bigramAttrWordSkip31 = prev3AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_bigramAttrWordSkip53_" + bigramAttrWordSkip53, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip42_" + bigramAttrWordSkip42, 1.0); generalFeatures.put("feature_attrWord_bigramAttrWordSkip31_" + bigramAttrWordSkip31, 1.0); String trigramAttrWord543 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev3AttrWord; String trigramAttrWord432 = prev4AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; generalFeatures.put("feature_attrWord_trigramAttrWord543_" + trigramAttrWord543, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWord432_" + trigramAttrWord432, 1.0); String trigramAttrWordSkip542 = prev5AttrWord + "|" + prev4AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip532 = prev5AttrWord + "|" + prev3AttrWord + "|" + prev2AttrWord; String trigramAttrWordSkip431 = prev4AttrWord + "|" + prev3AttrWord + "|" + prevAttrWord; String trigramAttrWordSkip421 = prev4AttrWord + "|" + prev2AttrWord + "|" + prevAttrWord; generalFeatures.put("feature_attrWord_trigramAttrWordSkip542_" + trigramAttrWordSkip542, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip532_" + trigramAttrWordSkip532, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip431_" + trigramAttrWordSkip431, 1.0); generalFeatures.put("feature_attrWord_trigramAttrWordSkip421_" + trigramAttrWordSkip421, 1.0);*/ //Previous AttrValue|Word features for (int j = 1; j <= 1; j++) { String previousAttrWord = "@@"; if (generatedWords.size() - j >= 0) { previousAttrWord = generatedWords.get(generatedWords.size() - j).getAttribute().trim() + "|" + generatedWords.get(generatedWords.size() - j).getWord().trim(); } generalFeatures.put("feature_attrValueWord_" + j + "_" + previousAttrWord.toLowerCase(), 1.0); } String prevAttrValueWord = "@@"; if (generatedWords.size() - 1 >= 0) { prevAttrValueWord = generatedWords.get(generatedWords.size() - 1).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 1).getWord().trim(); } String prev2AttrValueWord = "@@"; if (generatedWords.size() - 2 >= 0) { prev2AttrValueWord = generatedWords.get(generatedWords.size() - 2).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 2).getWord().trim(); } String prev3AttrValueWord = "@@"; if (generatedWords.size() - 3 >= 0) { prev3AttrValueWord = generatedWords.get(generatedWords.size() - 3).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 3).getWord().trim(); } String prev4AttrValueWord = "@@"; if (generatedWords.size() - 4 >= 0) { prev4AttrValueWord = generatedWords.get(generatedWords.size() - 4).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 4).getWord().trim(); } String prev5AttrValueWord = "@@"; if (generatedWords.size() - 5 >= 0) { prev5AttrValueWord = generatedWords.get(generatedWords.size() - 5).getAttribute().trim() + ":" + generatedWords.get(generatedWords.size() - 5).getWord().trim(); } String prevAttrValueWordBigram = prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWordTrigram = prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord4gram = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; String prevAttrValueWord5gram = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigram_" + prevAttrValueWordBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_trigram_" + prevAttrValueWordTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_4gram_" + prevAttrValueWord4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attrValueWord_5gram_" + prevAttrValueWord5gram.toLowerCase(), 1.0); /*String bigramAttrValueWord54 = prev5AttrValueWord + "|" + prev4AttrValueWord; String bigramAttrValueWord43 = prev4AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWord32 = prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWord54_" + bigramAttrValueWord54, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord43_" + bigramAttrValueWord43, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWord32_" + bigramAttrValueWord32, 1.0); String bigramAttrValueWordSkip53 = prev5AttrValueWord + "|" + prev3AttrValueWord; String bigramAttrValueWordSkip42 = prev4AttrValueWord + "|" + prev2AttrValueWord; String bigramAttrValueWordSkip31 = prev3AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip53_" + bigramAttrValueWordSkip53, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip42_" + bigramAttrValueWordSkip42, 1.0); generalFeatures.put("feature_attrValueWord_bigramAttrValueWordSkip31_" + bigramAttrValueWordSkip31, 1.0); String trigramAttrValueWord543 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev3AttrValueWord; String trigramAttrValueWord432 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWord543_" + trigramAttrValueWord543, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWord432_" + trigramAttrValueWord432, 1.0); String trigramAttrValueWordSkip542 = prev5AttrValueWord + "|" + prev4AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip532 = prev5AttrValueWord + "|" + prev3AttrValueWord + "|" + prev2AttrValueWord; String trigramAttrValueWordSkip431 = prev4AttrValueWord + "|" + prev3AttrValueWord + "|" + prevAttrValueWord; String trigramAttrValueWordSkip421 = prev4AttrValueWord + "|" + prev2AttrValueWord + "|" + prevAttrValueWord; generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip542_" + trigramAttrValueWordSkip542, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip532_" + trigramAttrValueWordSkip532, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip431_" + trigramAttrValueWordSkip431, 1.0); generalFeatures.put("feature_attrValueWord_trigramAttrValueWordSkip421_" + trigramAttrValueWordSkip421, 1.0);*/ //Previous attrValue features int attributeSize = generatedAttributes.size(); for (int j = 1; j <= 1; j++) { String previousAttrValue = "@@"; if (attributeSize - j >= 0) { previousAttrValue = generatedAttributes.get(attributeSize - j).trim(); } generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0); } String prevAttrValue = "@@"; if (attributeSize - 1 >= 0) { prevAttrValue = generatedAttributes.get(attributeSize - 1).trim(); } String prev2AttrValue = "@@"; if (attributeSize - 2 >= 0) { prev2AttrValue = generatedAttributes.get(attributeSize - 2).trim(); } String prev3AttrValue = "@@"; if (attributeSize - 3 >= 0) { prev3AttrValue = generatedAttributes.get(attributeSize - 3).trim(); } String prev4AttrValue = "@@"; if (attributeSize - 4 >= 0) { prev4AttrValue = generatedAttributes.get(attributeSize - 4).trim(); } String prev5AttrValue = "@@"; if (attributeSize - 5 >= 0) { prev5AttrValue = generatedAttributes.get(attributeSize - 5).trim(); } String prevAttrBigramValue = prev2AttrValue + "|" + prevAttrValue; String prevAttrTrigramValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr4gramValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prevAttr5gramValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigram_" + prevAttrBigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_trigram_" + prevAttrTrigramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_4gram_" + prevAttr4gramValue.toLowerCase(), 1.0); generalFeatures.put("feature_attrValue_5gram_" + prevAttr5gramValue.toLowerCase(), 1.0); /*String bigramAttrValue54 = prev5AttrValue + "|" + prev4AttrValue; String bigramAttrValue43 = prev4AttrValue + "|" + prev3AttrValue; String bigramAttrValue32 = prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_bigramAttrValue54_" + bigramAttrValue54, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue43_" + bigramAttrValue43, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValue32_" + bigramAttrValue32, 1.0); String bigramAttrValueSkip53 = prev5AttrValue + "|" + prev3AttrValue; String bigramAttrValueSkip42 = prev4AttrValue + "|" + prev2AttrValue; String bigramAttrValueSkip31 = prev3AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigramAttrValueSkip53_" + bigramAttrValueSkip53, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip42_" + bigramAttrValueSkip42, 1.0); generalFeatures.put("feature_attrValue_bigramAttrValueSkip31_" + bigramAttrValueSkip31, 1.0); String trigramAttrValue543 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue; String trigramAttrValue432 = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; generalFeatures.put("feature_attrValue_trigramAttrValue543_" + trigramAttrValue543, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValue432_" + trigramAttrValue432, 1.0); String trigramAttrValueSkip542 = prev5AttrValue + "|" + prev4AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip532 = prev5AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue; String trigramAttrValueSkip431 = prev4AttrValue + "|" + prev3AttrValue + "|" + prevAttrValue; String trigramAttrValueSkip421 = prev4AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_trigramAttrValueSkip542_" + trigramAttrValueSkip542, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip532_" + trigramAttrValueSkip532, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip431_" + trigramAttrValueSkip431, 1.0); generalFeatures.put("feature_attrValue_trigramAttrValueSkip421_" + trigramAttrValueSkip421, 1.0);*/ //Previous attr features for (int j = 1; j <= 1; j++) { String previousAttr = "@@"; if (attributeSize - j >= 0) { if (generatedAttributes.get(attributeSize - j).contains("=")) { previousAttr = generatedAttributes.get(attributeSize - j).trim().substring(0, generatedAttributes.get(attributeSize - j).indexOf('=')); } else { previousAttr = generatedAttributes.get(attributeSize - j).trim(); } } generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0); } String prevAttr = "@@"; if (attributeSize - 1 >= 0) { if (generatedAttributes.get(attributeSize - 1).contains("=")) { prevAttr = generatedAttributes.get(attributeSize - 1).trim().substring(0, generatedAttributes.get(attributeSize - 1).indexOf('=')); } else { prevAttr = generatedAttributes.get(attributeSize - 1).trim(); } } String prev2Attr = "@@"; if (attributeSize - 2 >= 0) { if (generatedAttributes.get(attributeSize - 2).contains("=")) { prev2Attr = generatedAttributes.get(attributeSize - 2).trim().substring(0, generatedAttributes.get(attributeSize - 2).indexOf('=')); } else { prev2Attr = generatedAttributes.get(attributeSize - 2).trim(); } } String prev3Attr = "@@"; if (attributeSize - 3 >= 0) { if (generatedAttributes.get(attributeSize - 3).contains("=")) { prev3Attr = generatedAttributes.get(attributeSize - 3).trim().substring(0, generatedAttributes.get(attributeSize - 3).indexOf('=')); } else { prev3Attr = generatedAttributes.get(attributeSize - 3).trim(); } } String prev4Attr = "@@"; if (attributeSize - 4 >= 0) { if (generatedAttributes.get(attributeSize - 4).contains("=")) { prev4Attr = generatedAttributes.get(attributeSize - 4).trim().substring(0, generatedAttributes.get(attributeSize - 4).indexOf('=')); } else { prev4Attr = generatedAttributes.get(attributeSize - 4).trim(); } } String prev5Attr = "@@"; if (attributeSize - 5 >= 0) { if (generatedAttributes.get(attributeSize - 5).contains("=")) { prev5Attr = generatedAttributes.get(attributeSize - 5).trim().substring(0, generatedAttributes.get(attributeSize - 5).indexOf('=')); } else { prev5Attr = generatedAttributes.get(attributeSize - 5).trim(); } } String prevAttrBigram = prev2Attr + "|" + prevAttr; String prevAttrTrigram = prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr4gram = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prevAttr5gram = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigram_" + prevAttrBigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_trigram_" + prevAttrTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_4gram_" + prevAttr4gram.toLowerCase(), 1.0); generalFeatures.put("feature_attr_5gram_" + prevAttr5gram.toLowerCase(), 1.0); /*String bigramAttr54 = prev5Attr + "|" + prev4Attr; String bigramAttr43 = prev4Attr + "|" + prev3Attr; String bigramAttr32 = prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_bigramAttr54_" + bigramAttr54, 1.0); generalFeatures.put("feature_attr_bigramAttr43_" + bigramAttr43, 1.0); generalFeatures.put("feature_attr_bigramAttr32_" + bigramAttr32, 1.0); String bigramAttrSkip53 = prev5Attr + "|" + prev3Attr; String bigramAttrSkip42 = prev4Attr + "|" + prev2Attr; String bigramAttrSkip31 = prev3Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigramAttrSkip53_" + bigramAttrSkip53, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip42_" + bigramAttrSkip42, 1.0); generalFeatures.put("feature_attr_bigramAttrSkip31_" + bigramAttrSkip31, 1.0); String trigramAttr543 = prev5Attr + "|" + prev4Attr + "|" + prev3Attr; String trigramAttr432 = prev4Attr + "|" + prev3Attr + "|" + prev2Attr; generalFeatures.put("feature_attr_trigramAttr543_" + trigramAttr543, 1.0); generalFeatures.put("feature_attr_trigramAttr432_" + trigramAttr432, 1.0); String trigramAttrSkip542 = prev5Attr + "|" + prev4Attr + "|" + prev2Attr; String trigramAttrSkip532 = prev5Attr + "|" + prev3Attr + "|" + prev2Attr; String trigramAttrSkip431 = prev4Attr + "|" + prev3Attr + "|" + prevAttr; String trigramAttrSkip421 = prev4Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_trigramAttrSkip542_" + trigramAttrSkip542, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip532_" + trigramAttrSkip532, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip431_" + trigramAttrSkip431, 1.0); generalFeatures.put("feature_attr_trigramAttrSkip421_" + trigramAttrSkip421, 1.0);*/ //Next attr features for (int j = 0; j < 1; j++) { String nextAttr = "@@"; if (j < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(j).contains("=")) { nextAttr = nextGeneratedAttributes.get(j).trim().substring(0, nextGeneratedAttributes.get(j).indexOf('=')); } else { nextAttr = nextGeneratedAttributes.get(j).trim(); } } generalFeatures.put("feature_nextAttr_" + j + "_" + nextAttr, 1.0); } String nextAttr = "@@"; if (0 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(0).contains("=")) { nextAttr = nextGeneratedAttributes.get(0).trim().substring(0, nextGeneratedAttributes.get(0).indexOf('=')); } else { nextAttr = nextGeneratedAttributes.get(0).trim(); } } String next2Attr = "@@"; if (1 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(1).contains("=")) { next2Attr = nextGeneratedAttributes.get(1).trim().substring(0, nextGeneratedAttributes.get(1).indexOf('=')); } else { next2Attr = nextGeneratedAttributes.get(1).trim(); } } String next3Attr = "@@"; if (2 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(2).contains("=")) { next3Attr = nextGeneratedAttributes.get(2).trim().substring(0, nextGeneratedAttributes.get(2).indexOf('=')); } else { next3Attr = nextGeneratedAttributes.get(2).trim(); } } String next4Attr = "@@"; if (3 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(3).contains("=")) { next4Attr = nextGeneratedAttributes.get(3).trim().substring(0, nextGeneratedAttributes.get(3).indexOf('=')); } else { next4Attr = nextGeneratedAttributes.get(3).trim(); } } String next5Attr = "@@"; if (4 < nextGeneratedAttributes.size()) { if (nextGeneratedAttributes.get(4).contains("=")) { next5Attr = nextGeneratedAttributes.get(4).trim().substring(0, nextGeneratedAttributes.get(4).indexOf('=')); } else { next5Attr = nextGeneratedAttributes.get(4).trim(); } } String nextAttrBigram = nextAttr + "|" + next2Attr; String nextAttrTrigram = nextAttr + "|" + next2Attr + "|" + next3Attr; String nextAttr4gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr; String nextAttr5gram = nextAttr + "|" + next2Attr + "|" + next3Attr + "|" + next4Attr + "|" + next5Attr; generalFeatures.put("feature_nextAttr_bigram_" + nextAttrBigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_trigram_" + nextAttrTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_4gram_" + nextAttr4gram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttr_5gram_" + nextAttr5gram.toLowerCase(), 1.0); //Next attrValue features for (int j = 0; j < 1; j++) { String nextAttrValue = "@@"; if (j < nextGeneratedAttributes.size()) { nextAttrValue = nextGeneratedAttributes.get(j).trim(); } generalFeatures.put("feature_nextAttrValue_" + j + "_" + nextAttrValue, 1.0); } String nextAttrValue = "@@"; if (0 < nextGeneratedAttributes.size()) { nextAttrValue = nextGeneratedAttributes.get(0).trim(); } String next2AttrValue = "@@"; if (1 < nextGeneratedAttributes.size()) { next2AttrValue = nextGeneratedAttributes.get(1).trim(); } String next3AttrValue = "@@"; if (2 < nextGeneratedAttributes.size()) { next3AttrValue = nextGeneratedAttributes.get(2).trim(); } String next4AttrValue = "@@"; if (3 < nextGeneratedAttributes.size()) { next4AttrValue = nextGeneratedAttributes.get(3).trim(); } String next5AttrValue = "@@"; if (4 < nextGeneratedAttributes.size()) { next5AttrValue = nextGeneratedAttributes.get(4).trim(); } String nextAttrValueBigram = nextAttrValue + "|" + next2AttrValue; String nextAttrValueTrigram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue; String nextAttrValue4gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|" + next4AttrValue; String nextAttrValue5gram = nextAttrValue + "|" + next2AttrValue + "|" + next3AttrValue + "|" + next4AttrValue + "|" + next5AttrValue; generalFeatures.put("feature_nextAttrValue_bigram_" + nextAttrValueBigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_trigram_" + nextAttrValueTrigram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_4gram_" + nextAttrValue4gram.toLowerCase(), 1.0); generalFeatures.put("feature_nextAttrValue_5gram_" + nextAttrValue5gram.toLowerCase(), 1.0); //If values have already been generated or not generalFeatures.put("feature_valueToBeMentioned_" + currentValue.toLowerCase(), 1.0); if (wasValueMentioned) { generalFeatures.put("feature_wasValueMentioned_true", 1.0); } else { //generalFeatures.put("feature_wasValueMentioned_false", 1.0); } HashSet<String> valuesThatFollow = new HashSet<>(); attrValuesThatFollow.stream().map((attrValue) -> { generalFeatures.put("feature_attrValuesThatFollow_" + attrValue.toLowerCase(), 1.0); return attrValue; }).forEachOrdered((attrValue) -> { if (attrValue.contains("=")) { String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); valuesThatFollow.add(Action.TOKEN_X + attr + "_" + v.substring(1)); } else { valuesThatFollow.add(v); } generalFeatures.put( "feature_attrsThatFollow_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); } else { generalFeatures.put("feature_attrsThatFollow_" + attrValue.toLowerCase(), 1.0); } }); if (valuesThatFollow.isEmpty()) { generalFeatures.put("feature_noAttrsFollow", 1.0); } else { generalFeatures.put("feature_noAttrsFollow", 0.0); } HashSet<String> mentionedValues = new HashSet<>(); attrValuesAlreadyMentioned.stream().map((attrValue) -> { generalFeatures.put("feature_attrValuesAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); return attrValue; }).forEachOrdered((attrValue) -> { if (attrValue.contains("=")) { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.substring(0, attrValue.indexOf('=')).toLowerCase(), 1.0); String v = attrValue.substring(attrValue.indexOf('=') + 1); if (v.matches("[xX][0-9]+")) { String attr = attrValue.substring(0, attrValue.indexOf('=')); mentionedValues.add(Action.TOKEN_X + attr + "_" + v.substring(1)); } else { mentionedValues.add(v); } } else { generalFeatures.put("feature_attrsAlreadyMentioned_" + attrValue.toLowerCase(), 1.0); } }); /*System.out.println("currentAttrValue: " + currentAttrValue); System.out.println("5W: " + prev5gram); System.out.println("5AW: " + prevAttrWord5gram); System.out.println("5A: " + prevAttr5gram); System.out.println("VM: " + wasValueMentioned); System.out.println("A_TF: " + attrValuesThatFollow); System.out.println("==============================");*/ if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { generalFeatures.put("feature_emptyValue", 1.0); } //Word specific features (and also global features) for (Action action : availableWordActions.get(currentAttr)) { //Is word same as previous word if (prevWord.equals(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_sameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_sameAsPreviousWord", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notSameAsPreviousWord", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notSameAsPreviousWord", 1.0); } //Has word appeared in the same attrValue before generatedWords.forEach((previousAction) -> { if (previousAction.getWord().equals(action.getWord()) && previousAction.getAttribute().equals(currentAttrValue)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appearedInSameAttrValue", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_appearedInSameAttrValue", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppearedInSameAttrValue", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppearedInSameAttrValue", 1.0); } }); //Has word appeared before generatedWords.forEach((previousAction) -> { if (previousAction.getWord().equals(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_appeared", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_appeared", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notAppeared", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notAppeared", 1.0); } }); if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_emptyValue", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_emptyValue", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_notEmptyValue", 1.0); //valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_notEmptyValue", 1.0); } HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action.getAction()).keySet()); keys.forEach((feature1) -> { keys.stream() .filter((feature2) -> (valueSpecificFeatures.get(action.getAction()).get(feature1) == 1.0 && valueSpecificFeatures.get(action.getAction()).get(feature2) == 1.0 && feature1.compareTo(feature2) < 0)) .forEachOrdered((feature2) -> { valueSpecificFeatures.get(action.getAction()).put(feature1 + "&&" + feature2, 1.0); }); }); if (!action.getWord().startsWith(Action.TOKEN_X) && !currentValue.equals("no") && !currentValue.equals("yes") && !currentValue.equals("yes or no") && !currentValue.equals("none") && !currentValue.equals("empty") //&& !currentValue.equals("dont_care") ) { for (String value : getValueAlignments().keySet()) { for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) { if (alignedStr.get(0).equals(action.getWord())) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_current", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_beginsValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_beginsValue_notInMR", 1.0); } } else { for (int i = 1; i < alignedStr.size(); i++) { if (alignedStr.get(i).equals(action.getWord())) { if (endsWith(generatedPhrase, new ArrayList<String>(alignedStr.subList(0, i + 1)))) { if (mentionedValues.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_current", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_inValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_inValue_notInMR", 1.0); } } else { /*if (mentionedValues.contains(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_alreadyMentioned", 1.0); } else if (currentValue.equals(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_current", 1.0); } else if (valuesThatFollow.contains(value)) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_thatFollows", 1.0); } else { valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue_notInMR", 1.0); }*/ //valueSpecificFeatures.get(action.getAction()).put("feature_specific_outOfValue", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_outOfValue", 1.0); } } } } } } if (action.getWord().equals(Action.TOKEN_END)) { if (generatedWordsInSameAttrValue.isEmpty()) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingEmptyAttr", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingEmptyAttr", 1.0); } if (!wasValueMentioned) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWithValueNotMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingAttrWithValueNotMentioned", 1.0); } // if (!prevCurrentAttrValueWord.equals("@@")) { if (!prevWord.equals("@@")) { boolean alignmentIsOpen = false; for (String value : getValueAlignments().keySet()) { for (ArrayList<String> alignedStr : getValueAlignments().get(value).keySet()) { for (int i = 0; i < alignedStr.size() - 1; i++) { if (alignedStr.get(i).equals(prevWord) && endsWith(generatedPhrase, new ArrayList<>(alignedStr.subList(0, i + 1)))) { alignmentIsOpen = true; } } } } if (alignmentIsOpen) { // valueSpecificFeatures.get(action.getAction()).put("feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_closingAttrWhileValueIsNotConcluded", 1.0); } } } } else if (currentValue.equals("no") || currentValue.equals("yes") || currentValue.equals("yes or no") || currentValue.equals("none") || currentValue.equals("empty") //|| currentValue.equals("dont_care") ) { valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR", 1.0); } else { String currentValueVariant = ""; if (currentValue.matches("[xX][0-9]+")) { currentValueVariant = Action.TOKEN_X + currentAttr + "_" + currentValue.substring(1); } if (mentionedValues.contains(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_alreadyMentioned", 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_specific_XValue_alreadyMentioned", 1.0); } else if (currentValueVariant.equals(action.getWord()) && !currentValueVariant.isEmpty()) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_current", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_current", 1.0); } else if (valuesThatFollow.contains(action.getWord())) { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_thatFollows", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_thatFollows", 1.0); } else { //valueSpecificFeatures.get(action.getAction()).put("feature_specific_XValue_notInMR", 1.0); valueSpecificFeatures.get(action.getAction()).put("global_feature_specific_XValue_notInMR", 1.0); } } /*for (int i : nGrams.keySet()) { for (String nGram : nGrams.get(i)) { if (i == 2) { if (nGram.startsWith(prevWord + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousWord", 1.0); } } else if (i == 3) { if (nGram.startsWith(prevBigram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousBigram", 1.0); } } else if (i == 4) { if (nGram.startsWith(prevTrigram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPreviousTrigram", 1.0); } } else if (i == 5) { if (nGram.startsWith(prev4gram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious4gram", 1.0); } } else if (i == 6) { if (nGram.startsWith(prev5gram + "|") && nGram.endsWith("|" + action.getAction())) { valueSpecificFeatures.get(action.getAction()).put("feature_specific_valuesFollowsPrevious5gram", 1.0); } } } }*/ //valueSpecificFeatures.get(action.getAction()).put("global_feature_abstractMR_" + mr.getAbstractMR(), 1.0); valueSpecificFeatures.get(action.getAction()) .put("global_feature_currentValue_" + currentValue.toLowerCase(), 1.0); ArrayList<String> fullGramLM = new ArrayList<>(); for (int i = 0; i < generatedWords.size(); i++) { fullGramLM.add(generatedWords.get(i).getWord()); } ArrayList<String> prev5wordGramLM = new ArrayList<>(); int j = 0; for (int i = generatedWords.size() - 1; (i >= 0 && j < 5); i--) { prev5wordGramLM.add(0, generatedWords.get(i).getWord()); j++; } prev5wordGramLM.add(action.getWord()); while (prev5wordGramLM.size() < 4) { prev5wordGramLM.add(0, "@@"); } double afterLMScorePerPred5Gram = getWordLMsPerPredicate().get(predicate) .getProbability(prev5wordGramLM); valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_5gram_score", afterLMScorePerPred5Gram); double afterLMScorePerPred = getWordLMsPerPredicate().get(predicate).getProbability(fullGramLM); valueSpecificFeatures.get(action.getAction()).put("global_feature_LMWord_perPredicate_score", afterLMScorePerPred); } /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet()); for (String feature1 : keys) { if (generalFeatures.get(feature1) == 1.0) { generalFeatures.put("global_feature_attr_" + currentValue.toLowerCase() + "&&" + feature1, 1.0); } }*/ //generalFeatures.put("feature_abstractMR_" + mr.getAbstractMR(), 1.0); /*HashSet<String> keys = new HashSet<>(generalFeatures.keySet()); for (String feature1 : keys) { for (String feature2 : keys) { if (generalFeatures.get(feature1) == 1.0 && generalFeatures.get(feature2) == 1.0 && feature1.compareTo(feature2) < 0) { generalFeatures.put(feature1 + "&&" + feature2, 1.0); } } }*/ return new Instance(generalFeatures, valueSpecificFeatures, costs); }
From source file:com.hygenics.parser.ParseJSoup.java
/** * Runs the Program//from www . j av a 2 s .com */ public void run() { int its = 0; this.select = Properties.getProperty(this.select); this.extracondition = Properties.getProperty(this.extracondition); this.column = Properties.getProperty(this.column); createTables(); log.info("Starting Parse via JSoup @ " + Calendar.getInstance().getTime().toString()); ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs); Set<Callable<ArrayList<String>>> collection; List<Future<ArrayList<String>>> futures; ArrayList<String> data = new ArrayList<String>((commitsize + 10)); ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3)); int offenderhash = offset; boolean run = true; int iteration = 0; int currpos = 0; do { collection = new HashSet<Callable<ArrayList<String>>>(qnums); log.info("Getting Data"); // get data currpos = iteration * commitsize + offset; iteration += 1; String query = select; if (extracondition != null) { query += " " + extracondition; } if (extracondition != null) { query += " WHERE " + extracondition + " AND "; } else { query += " WHERE "; } for (int i = 0; i < qnums; i++) { if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) { collection.add(new SplitQuery((query + pullid + " >= " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1))))))); } else { collection.add(new SplitQuery((query + pullid + " >= " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid + " < " + Integer.toString(currpos + commitsize)))); } } if (collection.size() > 0) { futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) { w++; } for (Future<ArrayList<String>> f : futures) { try { // TODO Get Pages to Parse data.addAll(f.get()); } catch (NullPointerException e) { log.info("Some Data Returned Null"); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } collection = new HashSet<Callable<ArrayList<String>>>(data.size()); // checkstring if (data.size() == 0 && checkstring != null && its <= maxchecks) { its++; collection.add(new SplitQuery(checkstring)); futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) { w++; } for (Future<ArrayList<String>> f : futures) { try { // TODO Get Pages to Parse data.addAll(f.get()); } catch (NullPointerException e) { log.info("Some Data Returned Null"); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } if (data.size() == 0) { // set to stop if size is0 log.info("No Pages to Parse. Will Terminate"); run = false; } else { // parse log.info("Starting JSoup Parse @ " + Calendar.getInstance().getTime().toString()); for (String json : data) { // faster json reader is minimal json but faster parser is // Simple Json Map<String, Json> jMap = Json.read(json).asJsonMap(); if (jMap.containsKey("offenderhash")) { // string to int in case it is a string and has some // extra space offenderhash = Integer.parseInt(jMap.get("offenderhash").asString().trim()); } boolean allow = true; if (mustcontain != null) { if (jMap.get(column).asString().contains(mustcontain) == false) { allow = false; } } if (cannotcontain != null) { if (jMap.get(column).asString().contains(cannotcontain)) { allow = false; } } // this is the fastest way. I was learning before and will // rewrite when time permits. if (allow == true) { if (jMap.containsKey("offenderhash")) { if (this.singlepaths != null) { collection.add(new ParseSingle(Integer.toString(offenderhash), header, footer, pagenarrow, singlepaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } if (this.multipaths != null) { collection.add(new ParseRows(Integer.toString(offenderhash), header, footer, pagenarrow, multipaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } if (this.recordpaths != null) { collection.add(new ParseLoop(Integer.toString(offenderhash), header, footer, pagenarrow, recordpaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } } } offenderhash += 1; } // complete parse log.info("Waiting for Parsing to Complete."); if (collection.size() > 0) { futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) { w++; } log.info("Waited for " + Integer.toString(w) + " Cycles!"); for (Future<ArrayList<String>> f : futures) { try { outdata.addAll(f.get()); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString()); int cp = 0; // post data log.info("Posting Data @ " + Calendar.getInstance().getTime().toString()); if (outdata.size() > 0) { for (int i = 0; i < qnums; i++) { ArrayList<String> od = new ArrayList<String>( ((cp + (Math.round(outdata.size() / qnums) - cp)))); if (cp + (Math.round(outdata.size() / qnums)) < outdata.size()) { od.addAll(outdata.subList(cp, (cp + (Math.round(outdata.size() / qnums))))); } else { od.addAll(outdata.subList(cp, (outdata.size() - 1))); } fjp.execute(new SplitPost(template, od)); cp += Math.round(outdata.size() / qnums); } int w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + Integer.toString(w) + " cycles!"); } log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString()); // size should remain same with 10 slot buffer room data.clear(); outdata.clear(); } // my favorite really desperate attempt to actually invoke garbage // collection because of MASSIVE STRINGS System.gc(); Runtime.getRuntime().gc(); } while (run); log.info("Shutting Down FJP"); // shutdown fjp if (fjp.isShutdown() == false) { fjp.shutdownNow(); } log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString()); }
From source file:structuredPredictionNLG.SFX.java
/** * This method goes through the ActionSequence one time-step at the time, and creates a feature and cost vector for each one. * Meanwhile it tracks the context information that the feature vector requires. *///from w w w.j ava2 s . c o m @Override public void run() { String predicate = di.getMeaningRepresentation().getPredicate(); ArrayList<Action> refSequence = di.getDirectReferenceSequence(); //Collections to track which attribute/value pairs have already be mentioned in the sequence and which are yet to be mentioned HashSet<String> attrValuesAlreadyMentioned = new HashSet<>(); HashSet<String> attrValuesToBeMentioned = new HashSet<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } // First we create the feature and cost vectors for the content actions ArrayList<String> attributeSequence = new ArrayList<>(); String attrValue = ""; // For every step of the sequence for (int w = 0; w < refSequence.size(); w++) { if (!refSequence.get(w).getAttribute().equals(Action.TOKEN_PUNCT) && !refSequence.get(w).getAttribute().equals(attrValue)) { if (!attrValue.isEmpty()) { attrValuesToBeMentioned.remove(attrValue); } // Create the feature and cost vector Instance contentTrainingVector = SFX.createContentInstance(predicate, refSequence.get(w).getAttribute(), attributeSequence, attrValuesAlreadyMentioned, attrValuesToBeMentioned, di.getMeaningRepresentation(), SFX.getAvailableContentActions()); if (contentTrainingVector != null) { predicateContentTrainingData.get(di).get(predicate).add(contentTrainingVector); } attributeSequence.add(refSequence.get(w).getAttribute()); attrValue = refSequence.get(w).getAttribute(); if (!attrValue.isEmpty()) { attrValuesAlreadyMentioned.add(attrValue); attrValuesToBeMentioned.remove(attrValue); } } } // Reset the tracking collections attrValuesAlreadyMentioned = new HashSet<>(); attrValuesToBeMentioned = new HashSet<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } // Then we create the feature and cost vectors for the word actions // Each word action corresponds to a content action, so we need to keep track of which content action we are "generating" from at each timestep ArrayList<String> attrs = new ArrayList<>(); boolean isValueMentioned = false; // The value that we currently need to mention String valueTBM = ""; // These track the content (attribute/value pairs) attrValue = ""; // Time-step counter int a = -1; // This tracks the subphrase consisting of the words generated for the current content action ArrayList<String> subPhrase = new ArrayList<>(); // For every step of the sequence for (int w = 0; w < refSequence.size(); w++) { if (!refSequence.get(w).getAttribute().equals(Action.TOKEN_PUNCT)) { // If this action does not belong to the current content, we need to update the trackers and switch to the new content action if (!refSequence.get(w).getAttribute().equals(attrValue)) { a++; if (!attrValue.isEmpty()) { attrValuesToBeMentioned.remove(attrValue); } attrs.add(refSequence.get(w).getAttribute()); attrValue = refSequence.get(w).getAttribute(); subPhrase = new ArrayList<>(); isValueMentioned = false; valueTBM = ""; if (attrValue.contains("=")) { valueTBM = attrValue.substring(attrValue.indexOf('=') + 1); } if (valueTBM.isEmpty()) { isValueMentioned = true; } } // If it's not the end of the ActionSequence if (!attrValue.equals(Action.TOKEN_END)) { // The subsequence of content actions we have generated for so far ArrayList<String> predictedAttributesForInstance = new ArrayList<>(); for (int i = 0; i < attrs.size() - 1; i++) { predictedAttributesForInstance.add(attrs.get(i)); } // ...exclusive of the current content action if (!attrs.get(attrs.size() - 1).equals(attrValue)) { predictedAttributesForInstance.add(attrs.get(attrs.size() - 1)); } // The subsequence of content actions we will generated for after the current content action ArrayList<String> nextAttributesForInstance = new ArrayList<>( attributeSequence.subList(a + 1, attributeSequence.size())); // Create the feature and cost vector Instance wordTrainingVector = SFX.createWordInstance(predicate, refSequence.get(w), predictedAttributesForInstance, new ArrayList<>(refSequence.subList(0, w)), nextAttributesForInstance, attrValuesAlreadyMentioned, attrValuesToBeMentioned, isValueMentioned, SFX.getAvailableWordActions().get(predicate)); if (wordTrainingVector != null) { String attribute = attrValue; if (attribute.contains("=")) { attribute = attrValue.substring(0, attrValue.indexOf('=')); } if (!predicateWordTrainingData.get(di).containsKey(predicate)) { predicateWordTrainingData.get(di).put(predicate, new HashMap<String, ArrayList<Instance>>()); } if (!predicateWordTrainingData.get(di).get(predicate).containsKey(attribute)) { predicateWordTrainingData.get(di).get(predicate).put(attribute, new ArrayList<Instance>()); } predicateWordTrainingData.get(di).get(predicate).get(attribute).add(wordTrainingVector); if (!refSequence.get(w).getWord().equals(Action.TOKEN_START) && !refSequence.get(w).getWord().equals(Action.TOKEN_END)) { subPhrase.add(refSequence.get(w).getWord()); } } // Check if we have mentioned the value of the current content action if (!isValueMentioned) { // If the value is a variable, we just check if the word action we just generated is that variable if (refSequence.get(w).getWord().startsWith(Action.TOKEN_X) && (valueTBM.matches("[xX][0-9]+") || valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.startsWith(Action.TOKEN_X))) { isValueMentioned = true; // Otherwise } else if (!refSequence.get(w).getWord().startsWith(Action.TOKEN_X) && !(valueTBM.matches("[xX][0-9]+") || valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.startsWith(Action.TOKEN_X))) { // We form the key for the value, as it appears in the valueAlignments collection String valueToCheck = valueTBM; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") || valueToCheck.equals("empty")) { String attribute = attrValue; if (attribute.contains("=")) { attribute = attrValue.substring(0, attrValue.indexOf('=')); } valueToCheck = attribute + ":" + valueTBM; } // We look up the value in all the value alignments we have made during the parsing of the data, and see if it is mentioned in the subphrase // Note that the value may be formed by multiple word actions if (!valueToCheck.equals("empty:empty") && SFX.getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : SFX.getValueAlignments().get(valueToCheck) .keySet()) { if (SFX.endsWith(subPhrase, alignedStr)) { isValueMentioned = true; break; } } } } if (isValueMentioned) { attrValuesAlreadyMentioned.add(attrValue); attrValuesToBeMentioned.remove(attrValue); } } // We also check if we have inadvertedly mentioned some other pending value (not the current one) String mentionedAttrValue = ""; if (!refSequence.get(w).getWord().startsWith(Action.TOKEN_X)) { for (String attrValueTBM : attrValuesToBeMentioned) { if (attrValueTBM.contains("=")) { String value = attrValueTBM.substring(attrValueTBM.indexOf('=') + 1); if (!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") || valueToCheck.equals("empty")) { valueToCheck = attrValueTBM.replace("=", ":"); } if (!valueToCheck.equals("empty:empty") && SFX.getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : SFX.getValueAlignments() .get(valueToCheck).keySet()) { if (SFX.endsWith(subPhrase, alignedStr)) { mentionedAttrValue = attrValueTBM; break; } } } } } } } if (!mentionedAttrValue.isEmpty()) { attrValuesAlreadyMentioned.add(mentionedAttrValue); attrValuesToBeMentioned.remove(mentionedAttrValue); } } } } }
From source file:MSUmpire.PeptidePeakClusterDetection.PDHandlerBase.java
protected void FindAllMzTracePeakCurves(ScanCollection scanCollection) throws IOException { // final HashSet<String> IncludedHashMap = new HashSet<>(); // Logger.getRootLogger().info("Processing all scans to detect possible m/z peak curves...."); Logger.getRootLogger().info("Processing all scans to detect possible m/z peak curves and"); Logger.getRootLogger().info("Smoothing detected signals......"); float preRT = 0f; //Loop for each scan in the ScanCollection final ArrayList<ForkJoinTask<ArrayList<PeakCurve>>> ftemp = new ArrayList<>(); final ForkJoinPool fjp = new ForkJoinPool(NoCPUs); final int idx_end = scanCollection.GetScanNoArray(MSlevel).size(); final int[] ia = new int[idx_end + 1]; ia[0] = 0;// w w w. j a v a 2 s .co m for (int idx = 0; idx < idx_end; idx++) { final int scanNO = scanCollection.GetScanNoArray(MSlevel).get(idx); final ScanData sd = scanCollection.GetScan(scanNO); ia[idx + 1] = sd.Data.size() + ia[idx]; } final boolean[] included = new boolean[ia[ia.length - 1]]; if (step == -1) step = fjp.getParallelism() * 32; long peakCurvesCount = 0; for (int idx = 0; idx < idx_end; idx++) { int scanNO = scanCollection.GetScanNoArray(MSlevel).get(idx); ScanData scanData = scanCollection.GetScan(scanNO); //If we are doing targeted peak detection and the RT of current scan is not in the range of targeted list, jump to the next scan if (TargetedOnly && !FoundInInclusionRTList(scanData.RetentionTime)) { continue; } if (idx == 0) { preRT = scanData.RetentionTime - 0.01f; } for (int i = 0; i < scanData.PointCount(); i++) { XYData peak = scanData.Data.get(i); //If we are doing targeted peak detection and the RT and m/z of current peak is not in the range of targeted list, jump to the next peak if (TargetedOnly && !FoundInInclusionMZList(scanData.RetentionTime, peak.getX())) { continue; } if (peak.getX() < parameter.MinMZ) { continue; } //Check if the current peak has been included in previously developed peak curves // if (!IncludedHashMap.contains(scanNO + "_" + peak.getX())) {//The peak hasn't been included final int id_scanNO_peak = int_id(ia, idx, i); if (!included[id_scanNO_peak]) {//The peak hasn't been included //The current peak will be the starting peak of a new peak curve //Add it to the hash table // IncludedHashMap.add(scanNO + "_" + peak.getX()); included[id_scanNO_peak] = true; float startmz = peak.getX(); float startint = peak.getY(); //Find the maximum peak within PPM window as the starting peak for (int j = i + 1; j < scanData.PointCount(); j++) { XYData currentpeak = scanData.Data.get(j); final int id_scanNO_currentpeak = int_id(ia, idx, j); if (!included[id_scanNO_currentpeak]) { // if (!IncludedHashMap.contains(scanNO + "_" + currentpeak.getX())) { if (InstrumentParameter.CalcPPM(currentpeak.getX(), startmz) <= PPM) { included[id_scanNO_currentpeak] = true; // IncludedHashMap.add(scanNO + "_" + currentpeak.getX()); if (currentpeak.getY() >= startint) { startmz = currentpeak.getX(); startint = currentpeak.getY(); } } else { break; } } } //Initialize a new peak curve PeakCurve Peakcurve = new PeakCurve(parameter); //Add a background peak Peakcurve.AddPeak(preRT, startmz, scanData.background); //Add the starting peak Peakcurve.AddPeak(scanData.RetentionTime, startmz, startint); Peakcurve.StartScan = scanNO; int missedScan = 0; float endrt = scanData.RetentionTime; int endScan = scanData.ScanNum; float bk = 0f; //Starting from the next scan, find the following peaks given the starting peak for (int idx2 = idx + 1; idx2 < scanCollection.GetScanNoArray(MSlevel).size() && (missedScan < parameter.NoMissedScan /*|| (TargetedOnly && Peakcurve.RTWidth()<parameter.MaxCurveRTRange)*/); idx2++) { int scanNO2 = scanCollection.GetScanNoArray(MSlevel).get(idx2); ScanData scanData2 = scanCollection.GetScan(scanNO2); endrt = scanData2.RetentionTime; endScan = scanData2.ScanNum; bk = scanData2.background; float currentmz = 0f; float currentint = 0f; //If the scan is empty if (scanData2.PointCount() == 0) { if (parameter.FillGapByBK) { Peakcurve.AddPeak(scanData2.RetentionTime, Peakcurve.TargetMz, scanData2.background); } missedScan++; continue; } //Find the m/z index int mzidx = scanData2.GetLowerIndexOfX(Peakcurve.TargetMz); for (int pkidx = mzidx; pkidx < scanData2.Data.size(); pkidx++) { XYData currentpeak = scanData2.Data.get(pkidx); if (currentpeak.getX() < parameter.MinMZ) { continue; } //Check if the peak has been included or not final int int_id_scanNO2_currentpeak = int_id(ia, idx2, pkidx); // if (!included.get(int_id_scanNO2_currentpeak)) { if (!included[int_id_scanNO2_currentpeak]) { if (InstrumentParameter.CalcPPM(currentpeak.getX(), Peakcurve.TargetMz) > PPM) { if (currentpeak.getX() > Peakcurve.TargetMz) { break; } } else { //////////The peak is in the ppm window, select the highest peak included[int_id_scanNO2_currentpeak] = true; // IncludedHashMap.add(scanNO2 + "_" + currentpeak.getX()); if (currentint < currentpeak.getY()) { currentmz = currentpeak.getX(); currentint = currentpeak.getY(); } } } } //No peak in the PPM window has been found if (currentmz == 0f) { if (parameter.FillGapByBK) { Peakcurve.AddPeak(scanData2.RetentionTime, Peakcurve.TargetMz, scanData2.background); } missedScan++; } else { missedScan = 0; Peakcurve.AddPeak(scanData2.RetentionTime, currentmz, currentint); } } Peakcurve.AddPeak(endrt, Peakcurve.TargetMz, bk); Peakcurve.EndScan = endScan; //First check if the peak curve is in targeted list if (FoundInInclusionList(Peakcurve.TargetMz, Peakcurve.StartRT(), Peakcurve.EndRT())) { // LCMSPeakBase.UnSortedPeakCurves.add(Peakcurve); ++peakCurvesCount; ftemp.add(fjp.submit(new PeakCurveSmoothingUnit(Peakcurve, parameter))); //Then check if the peak curve passes the criteria } else if (Peakcurve.GetRawSNR() > LCMSPeakBase.SNR && Peakcurve.GetPeakList().size() >= parameter.MinPeakPerPeakCurve + 2) { // LCMSPeakBase.UnSortedPeakCurves.add(Peakcurve); ++peakCurvesCount; ftemp.add(fjp.submit(new PeakCurveSmoothingUnit(Peakcurve, parameter))); } else { Peakcurve = null; } } } preRT = scanData.RetentionTime; if (ReleaseScans) { scanData.dispose(); } /** the if statement below does PeakCurveSmoothing() and ClearRawPeaks() */ final boolean last_iter = idx + 1 == idx_end; if (ftemp.size() == step || last_iter) { final List<ForkJoinTask<ArrayList<PeakCurve>>> ftemp_sublist_view = last_iter ? ftemp : ftemp.subList(0, step / 2); for (final Future<ArrayList<PeakCurve>> f : ftemp_sublist_view) { try { LCMSPeakBase.UnSortedPeakCurves.addAll(f.get()); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); } } ftemp_sublist_view.clear(); if (!last_iter && fjp.getActiveThreadCount() < fjp.getParallelism()) { // System.out.println("PeakCurveSmoothingUnit: fjp.getActiveThreadCount()\t"+fjp.getActiveThreadCount()+"\t"+step); step *= 2; } } } assert ftemp.isEmpty(); //System.out.print("PSM removed (PeakCurve generation):" + PSMRemoved ); int i = 1; //Assign peak curve index for (PeakCurve peakCurve : LCMSPeakBase.UnSortedPeakCurves) { peakCurve.Index = i++; } System.gc(); // Logger.getRootLogger().info(LCMSPeakBase.UnSortedPeakCurves.size() + " Peak curves found (Memory usage:" + Math.round((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1048576) + "MB)"); Logger.getRootLogger() .info(peakCurvesCount + " Peak curves found (Memory usage:" + Math.round( (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1048576) + "MB)"); }
From source file:com.virtusa.isq.vtaf.runtime.SeleniumTestBase.java
/** * Validate table cell value function./*w w w . j av a 2 s . co m*/ * * @param element * the element * @param objectName * the object name * @param expectedvalue * the expectedvalue * @param fail * the fail * @throws Exception * the exception */ private void validateCellValue(final WebElement element, final String objectName, final String expectedvalue, final boolean fail, final Object[] customError) throws Exception { ArrayList<String> inputStringArray; boolean failedOnce = false; int row = -1; int col = -1; String cellText = ""; String result = ""; ArrayList<String> htmlTable = new ArrayList<String>(); final int inputStringItems = 3; inputStringArray = new ArrayList<String>( Arrays.asList(expectedvalue.split("(?<!\\\\),", Integer.MAX_VALUE))); ArrayList<String> tempInputTable = new ArrayList<String>(); for (String inputVal : inputStringArray) { String formattedValue = inputVal.replaceAll("\\\\,", ","); tempInputTable.add(formattedValue); } inputStringArray = tempInputTable; if (inputStringArray.size() < inputStringItems) { failedOnce = true; if (customError != null && !(customError[0].equals("null") || customError[0].equals(""))) { result = " verification data not provided correctly"; reportresult(true, "CHECK TABLE :TABLE CELL", "FAILED", " Custom Error :" + generateCustomError(customError) + " System generated Error : " + objectName + "'s CELL validation " + " is not as expected " + result); checkTrue(false, fail, objectName + " Custom Error :" + generateCustomError(customError) + " System generated Error : " + objectName + "'s CELL validation " + " is not as expected " + result); return; } else { result = " verification data not provided correctly"; reportresult(true, "CHECK TABLE :TABLE CELL", "FAILED", objectName + "'s CELL validation " + " is not as expected " + result); checkTrue(false, fail, objectName + "'s CELL validation " + " is not as expected " + result); return; } } row = Integer.parseInt(inputStringArray.get(0)); col = Integer.parseInt(inputStringArray.get(1)); cellText = StringUtils.join(inputStringArray.subList(2, inputStringArray.size()).toArray(), ","); try { htmlTable = getAppTableRow(element, row); } catch (Exception ex) { failedOnce = true; result = result + "|Expected Row : " + row + " cannot be found in the actual table \n"; } int verifyIndex = col; // get the sequential index of the value to be // verified String verifyValue = ""; try { verifyValue = htmlTable.get(verifyIndex).trim(); if (!cellText.equals(verifyValue)) { failedOnce = true; result = result + "|Expected : " + cellText + " Actual :" + htmlTable.get(verifyIndex) + "\n"; } } catch (IndexOutOfBoundsException ex) { failedOnce = true; result = result + "|Expected Column : " + verifyIndex + " cannot be found in the actual table \n"; } if (failedOnce) { reportresult(true, "CHECK TABLE :TABLE CELL", "FAILED", objectName + "'s TABLECELL validation " + " is not as expected " + result); checkTrue(false, fail, objectName + "'s TABLECELL validation " + " is not as expected " + result); } else { reportresult(true, "CHECK TABLE :TABLE CELL", "PASSED", objectName + " . Input Value = " + expectedvalue); } }
From source file:com.jhh.hdb.sqlparser.MySemanticAnalyzer.java
/** * Generate the conversion SelectOperator that converts the columns into the * types that are expected by the table_desc. *//* w ww . j av a2 s.c o m*/ Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException { StructObjectInspector oi = null; try { Deserializer deserializer = table_desc.getDeserializerClass().newInstance(); SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null); oi = (StructObjectInspector) deserializer.getObjectInspector(); } catch (Exception e) { throw new SemanticException(e); } // Check column number List<? extends StructField> tableFields = oi.getAllStructFieldRefs(); boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING); ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos(); int inColumnCnt = rowFields.size(); int outColumnCnt = tableFields.size(); if (dynPart && dpCtx != null) { outColumnCnt += dpCtx.getNumDPCols(); } if (deleting()) { // Figure out if we have partition columns in the list or not. If so, // add them into the mapping. Partition columns will be located after the row id. if (rowFields.size() > 1) { // This means we have partition columns to deal with, so set up the mapping from the // input to the partition columns. dpCtx.mapInputToDP(rowFields.subList(1, rowFields.size())); } } else if (updating()) { // In this case we expect the number of in fields to exceed the number of out fields by one // (for the ROW__ID virtual column). If there are more columns than this, // then the extras are for dynamic partitioning if (dynPart && dpCtx != null) { dpCtx.mapInputToDP(rowFields.subList(tableFields.size() + 1, rowFields.size())); } } else { if (inColumnCnt != outColumnCnt) { String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns."; throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH .getMsg(qb.getParseInfo().getDestForClause(dest), reason)); } else if (dynPart && dpCtx != null) { // create the mapping from input ExprNode to dest table DP column dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size())); } } // Check column types boolean converted = false; int columnNumber = tableFields.size(); ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber); // MetadataTypedColumnsetSerDe does not need type conversions because it // does the conversion to String by itself. boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class); boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class); if (!isMetaDataSerDe && !deleting()) { // If we're updating, add the ROW__ID expression, then make the following column accesses // offset by 1 so that we don't try to convert the ROW__ID if (updating()) { expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true)); } // here only deals with non-partition columns. We deal with partition columns next for (int i = 0; i < columnNumber; i++) { int rowFieldsOffset = updating() ? i + 1 : i; ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector(); TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI); TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType(); ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol()); // LazySimpleSerDe can convert any types to String type using // JSON-format. if (!tableFieldTypeInfo.equals(rowFieldTypeInfo) && !(isLazySimpleSerDe && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) && tableFieldTypeInfo.equals(TypeInfoFactory.stringTypeInfo))) { // need to do some conversions here converted = true; if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) { // cannot convert to complex types column = null; } else { column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo); } if (column == null) { String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + "."; throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH .getMsg(qb.getParseInfo().getDestForClause(dest), reason)); } } expressions.add(column); } } // deal with dynamic partition columns: convert ExprNodeDesc type to String?? if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) { // DP columns starts with tableFields.size() for (int i = tableFields.size() + (updating() ? 1 : 0); i < rowFields.size(); ++i) { TypeInfo rowFieldTypeInfo = rowFields.get(i).getType(); ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", false); expressions.add(column); } // converted = true; // [TODO]: should we check & convert type to String and set it to true? } if (converted) { // add the select operator RowResolver rowResolver = new RowResolver(); ArrayList<String> colNames = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); for (int i = 0; i < expressions.size(); i++) { String name = getColumnInternalName(i); rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false)); colNames.add(name); colExprMap.put(name, expressions.get(i)); } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver); output.setColumnExprMap(colExprMap); return output; } else { // not converted return input; } }
From source file:structuredPredictionNLG.SFX.java
/** * * @param classifierAttrs//from w ww . ja va 2 s .com * @param classifierWords * @param testingData * @param epoch * @return */ @Override public Double evaluateGeneration(HashMap<String, JAROW> classifierAttrs, HashMap<String, HashMap<String, JAROW>> classifierWords, ArrayList<DatasetInstance> testingData, int epoch) { System.out.println("Evaluate argument generation "); ArrayList<ScoredFeaturizedTranslation<IString, String>> generations = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<Action>> generationActions = new HashMap<>(); ArrayList<ArrayList<Sequence<IString>>> finalReferences = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<String>> finalReferencesWordSequences = new HashMap<>(); HashMap<DatasetInstance, String> predictedWordSequences_overAllPredicates = new HashMap<>(); ArrayList<String> allPredictedWordSequences = new ArrayList<>(); ArrayList<String> allPredictedMRStr = new ArrayList<>(); ArrayList<ArrayList<String>> allPredictedReferences = new ArrayList<>(); HashMap<String, Double> attrCoverage = new HashMap<>(); HashMap<String, HashSet<String>> abstractMRsToMRs = new HashMap<>(); for (DatasetInstance di : testingData) { String predicate = di.getMeaningRepresentation().getPredicate(); ArrayList<Action> predictedActionList = new ArrayList<>(); ArrayList<Action> predictedWordList = new ArrayList<>(); //PHRASE GENERATION EVALUATION String predictedAttr = ""; ArrayList<String> predictedAttrValues = new ArrayList<>(); HashSet<String> attrValuesToBeMentioned = new HashSet<>(); HashSet<String> attrValuesAlreadyMentioned = new HashSet<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } while (!predictedAttr.equals(Action.TOKEN_END) && predictedAttrValues.size() < getMaxContentSequenceLength()) { if (!predictedAttr.isEmpty()) { attrValuesToBeMentioned.remove(predictedAttr); } if (!attrValuesToBeMentioned.isEmpty()) { Instance attrTrainingVector = createContentInstance(predicate, "@TOK@", predictedAttrValues, attrValuesAlreadyMentioned, attrValuesToBeMentioned, di.getMeaningRepresentation(), getAvailableContentActions()); if (attrTrainingVector != null) { Prediction predictAttr = classifierAttrs.get(predicate).predict(attrTrainingVector); if (predictAttr.getLabel() != null) { predictedAttr = predictAttr.getLabel().trim(); if (!classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation().getAttributeValues().keySet())) { System.out.println("MR ATTR NOT IN CLASSIFIERS"); System.out .println(classifierAttrs.get(predicate).getCurrentWeightVectors().keySet()); } String predictedValue = ""; if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); HashSet<String> rejectedAttrs = new HashSet<>(); while (predictedValue.isEmpty() && (!predictedAttr.equals(Action.TOKEN_END) || (predictedAttrValues.isEmpty() && classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation() .getAttributeValues().keySet())))) { rejectedAttrs.add(predictedAttr); predictedAttr = Action.TOKEN_END; double maxScore = -Double.MAX_VALUE; for (String attr : predictAttr.getLabel2Score().keySet()) { if (!rejectedAttrs.contains(attr) && (Double .compare(predictAttr.getLabel2Score().get(attr), maxScore) > 0)) { maxScore = predictAttr.getLabel2Score().get(attr); predictedAttr = attr; } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); } } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedAttr += "=" + predictedValue; } predictedAttrValues.add(predictedAttr); if (!predictedAttr.isEmpty()) { attrValuesAlreadyMentioned.add(predictedAttr); attrValuesToBeMentioned.remove(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } //WORD SEQUENCE EVALUATION predictedAttr = ""; ArrayList<String> predictedAttributes = new ArrayList<>(); attrValuesToBeMentioned = new HashSet<>(); attrValuesAlreadyMentioned = new HashSet<>(); HashMap<String, ArrayList<String>> valuesToBeMentioned = new HashMap<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } valuesToBeMentioned.put(attribute, new ArrayList<>(di.getMeaningRepresentation().getAttributeValues().get(attribute))); } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } HashSet<String> attrValuesToBeMentionedCopy = new HashSet<>(attrValuesToBeMentioned); int a = -1; for (String attrValue : predictedAttrValues) { a++; if (!attrValue.equals(Action.TOKEN_END)) { String attribute = attrValue.split("=")[0]; predictedAttributes.add(attrValue); //GENERATE PHRASES if (!attribute.equals(Action.TOKEN_END)) { if (classifierWords.get(predicate).containsKey(attribute)) { ArrayList<String> nextAttributesForInstance = new ArrayList<>( predictedAttrValues.subList(a + 1, predictedAttrValues.size())); String predictedWord = ""; boolean isValueMentioned = false; String valueTBM = ""; if (attrValue.contains("=")) { valueTBM = attrValue.substring(attrValue.indexOf('=') + 1); } if (valueTBM.isEmpty()) { isValueMentioned = true; } ArrayList<String> subPhrase = new ArrayList<>(); while (!predictedWord.equals(Action.TOKEN_END) && predictedWordList.size() < getMaxWordSequenceLength()) { ArrayList<String> predictedAttributesForInstance = new ArrayList<>(); for (int i = 0; i < predictedAttributes.size() - 1; i++) { predictedAttributesForInstance.add(predictedAttributes.get(i)); } if (!predictedAttributes.get(predictedAttributes.size() - 1).equals(attrValue)) { predictedAttributesForInstance .add(predictedAttributes.get(predictedAttributes.size() - 1)); } Instance wordTrainingVector = createWordInstance(predicate, new Action("@TOK@", attrValue), predictedAttributesForInstance, predictedActionList, nextAttributesForInstance, attrValuesAlreadyMentioned, attrValuesToBeMentioned, isValueMentioned, getAvailableWordActions().get(predicate)); if (wordTrainingVector != null && classifierWords.get(predicate) != null) { if (classifierWords.get(predicate).get(attribute) != null) { Prediction predictWord = classifierWords.get(predicate).get(attribute) .predict(wordTrainingVector); if (predictWord.getLabel() != null) { predictedWord = predictWord.getLabel().trim(); while (predictedWord.equals(Action.TOKEN_END) && !predictedActionList.isEmpty() && predictedActionList.get(predictedActionList.size() - 1) .getWord().equals(Action.TOKEN_END)) { double maxScore = -Double.MAX_VALUE; for (String word : predictWord.getLabel2Score().keySet()) { if (!word.equals(Action.TOKEN_END) && (Double.compare( predictWord.getLabel2Score().get(word), maxScore) > 0)) { maxScore = predictWord.getLabel2Score().get(word); predictedWord = word; } } } predictedActionList.add(new Action(predictedWord, attrValue)); if (!predictedWord.equals(Action.TOKEN_START) && !predictedWord.equals(Action.TOKEN_END)) { subPhrase.add(predictedWord); predictedWordList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } if (!isValueMentioned) { if (!predictedWord.equals(Action.TOKEN_END)) { if (predictedWord.startsWith(Action.TOKEN_X) && (valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { isValueMentioned = true; } else if (!predictedWord.startsWith(Action.TOKEN_X) && !(valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { String valueToCheck = valueTBM; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { if (attribute.contains("=")) { valueToCheck = attribute.replace("=", ":"); } else { valueToCheck = attribute + ":" + valueTBM; } } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { isValueMentioned = true; break; } } } } } if (isValueMentioned) { attrValuesAlreadyMentioned.add(attrValue); attrValuesToBeMentioned.remove(attrValue); } } String mentionedAttrValue = ""; if (!predictedWord.startsWith(Action.TOKEN_X)) { for (String attrValueTBM : attrValuesToBeMentioned) { if (attrValueTBM.contains("=")) { String value = attrValueTBM.substring(attrValueTBM.indexOf('=') + 1); if (!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attrValueTBM.replace("=", ":"); } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { mentionedAttrValue = attrValueTBM; break; } } } } } } } if (!mentionedAttrValue.isEmpty()) { attrValuesAlreadyMentioned.add(mentionedAttrValue); attrValuesToBeMentioned.remove(mentionedAttrValue); } } if (predictedWordList.size() >= getMaxWordSequenceLength() && !predictedActionList .get(predictedActionList.size() - 1).getWord().equals(Action.TOKEN_END)) { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { String predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } } } ArrayList<String> predictedAttrs = new ArrayList<>(); predictedAttrValues.forEach((attributeValuePair) -> { predictedAttrs.add(attributeValuePair.split("=")[0]); }); String predictedWordSequence = postProcessWordSequence(di, predictedActionList); ArrayList<String> predictedAttrList = getPredictedAttrList(predictedActionList); if (attrValuesToBeMentionedCopy.size() != 0.0) { double missingAttrs = 0.0; missingAttrs = attrValuesToBeMentionedCopy.stream() .filter((attr) -> (!predictedAttrList.contains(attr))).map((_item) -> 1.0) .reduce(missingAttrs, (accumulator, _item) -> accumulator + _item); double attrSize = attrValuesToBeMentionedCopy.size(); attrCoverage.put(predictedWordSequence, missingAttrs / attrSize); } allPredictedWordSequences.add(predictedWordSequence); allPredictedMRStr.add(di.getMeaningRepresentation().getMRstr()); predictedWordSequences_overAllPredicates.put(di, predictedWordSequence); if (!abstractMRsToMRs.containsKey(di.getMeaningRepresentation().getAbstractMR())) { abstractMRsToMRs.put(di.getMeaningRepresentation().getAbstractMR(), new HashSet<String>()); } abstractMRsToMRs.get(di.getMeaningRepresentation().getAbstractMR()) .add(di.getMeaningRepresentation().getMRstr()); Sequence<IString> translation = IStrings .tokenize(NISTTokenizer.tokenize(predictedWordSequence.toLowerCase())); ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>(translation, null, 0); generations.add(tran); generationActions.put(di, predictedActionList); ArrayList<Sequence<IString>> references = new ArrayList<>(); ArrayList<String> referencesStrings = new ArrayList<>(); if (getPerformEvaluationOn().equals("valid") || getPerformEvaluationOn().equals("train")) { for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } else { references = wenEvaluationReferenceSequences.get(di.getMeaningRepresentation().getMRstr()); referencesStrings = wenEvaluationReferences.get(di.getMeaningRepresentation().getMRstr()); if (references == null) { references = new ArrayList<>(); referencesStrings = new ArrayList<>(); for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } } allPredictedReferences.add(referencesStrings); finalReferencesWordSequences.put(di, referencesStrings); finalReferences.add(references); } BLEUMetric BLEU = new BLEUMetric(finalReferences, 4, false); Double bleuScore = BLEU.score(generations); double finalCoverageError = 0.0; finalCoverageError = attrCoverage.values().stream().map((c) -> c).reduce(finalCoverageError, (accumulator, _item) -> accumulator + _item); finalCoverageError /= attrCoverage.size(); for (int i = 0; i < allPredictedWordSequences.size(); i++) { double maxRouge = 0.0; String predictedWordSequence = allPredictedWordSequences.get(i).replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ") .trim(); for (String ref : allPredictedReferences.get(i)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } //System.out.println(allPredictedMRStr.get(i) + "\t" + maxRouge + "\t" + allPredictedWordSequences.get(i) + "\t" + refs); } double avgRougeScore = 0.0; String detailedRes = ""; avgRougeScore = testingData.stream().map((di) -> { double maxRouge = 0.0; if (!finalReferencesWordSequences.containsKey(di)) { System.out.println(di.getMeaningRepresentation().getAbstractMR()); } String predictedWordSequence = predictedWordSequences_overAllPredicates.get(di) .replaceAll("\\?", " \\? ").replaceAll(":", " : ").replaceAll("\\.", " \\. ") .replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } return maxRouge; }).map((maxRouge) -> maxRouge).reduce(avgRougeScore, (accumulator, _item) -> accumulator + _item); System.out.println("BLEU: \t" + bleuScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); System.out.println("ROUGE: \t" + (avgRougeScore / allPredictedWordSequences.size())); System.out.println("COVERAGE ERROR: \t" + finalCoverageError); System.out.println("BRC: \t" + ((avgRougeScore / allPredictedWordSequences.size()) + bleuScore + (1.0 - finalCoverageError)) / 3.0); if (isCalculateResultsPerPredicate()) { //////////////////////// //ArrayList<String> bestPredictedStrings = new ArrayList<>(); //ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueMRsInTestAndNotInTrainAllPredWordBLEU = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordROUGE = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordBRC = 0.0; detailedRes = ""; ArrayList<DatasetInstance> abstractMRList = new ArrayList<>(); HashSet<String> reportedAbstractMRs = new HashSet<>(); testingData.stream() .filter((di) -> (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR()))) .map((di) -> { reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); return di; }).forEachOrdered((di) -> { boolean isInTraining = false; for (DatasetInstance di2 : getTrainingData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } if (!isInTraining) { for (DatasetInstance di2 : getValidationData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } } if (!isInTraining) { abstractMRList.add(di); } }); for (DatasetInstance di : abstractMRList) { Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU += bestBLEU; uniqueMRsInTestAndNotInTrainAllPredWordROUGE += bestROUGE; uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR += bestCover; uniqueMRsInTestAndNotInTrainAllPredWordBRC += bestHarmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordROUGE /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordBRC /= abstractMRList.size(); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BLEU: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBLEU); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED ROUGE: \t" + uniqueMRsInTestAndNotInTrainAllPredWordROUGE); System.out.println("UNIQUE (NOT IN TRAIN) WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR)); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BRC: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBRC); abstractMRList.forEach((di) -> { System.out.println(di.getMeaningRepresentation().getAbstractMR() + "\t" + predictedWordSequences_overAllPredicates.get(di)); }); System.out.println("TOTAL SET SIZE: \t" + abstractMRList.size()); //System.out.println(abstractMRList); //System.out.println(detailedRes); } ArrayList<String> bestPredictedStrings = new ArrayList<>(); ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueAllPredWordBLEU = 0.0; double uniqueAllPredWordROUGE = 0.0; double uniqueAllPredWordCOVERAGEERR = 0.0; double uniqueAllPredWordBRC = 0.0; HashSet<String> reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniqueAllPredWordBLEU += bestBLEU; uniqueAllPredWordROUGE += bestROUGE; uniqueAllPredWordCOVERAGEERR += bestCover; uniqueAllPredWordBRC += bestHarmonicMean; } //} } if (isCalculateResultsPerPredicate()) { uniqueAllPredWordBLEU /= reportedAbstractMRs.size(); uniqueAllPredWordROUGE /= reportedAbstractMRs.size(); uniqueAllPredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniqueAllPredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD ALL PRED BLEU: \t" + uniqueAllPredWordBLEU); System.out.println("UNIQUE WORD ALL PRED ROUGE: \t" + uniqueAllPredWordROUGE); System.out.println("UNIQUE WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueAllPredWordCOVERAGEERR)); System.out.println("UNIQUE WORD ALL PRED BRC: \t" + uniqueAllPredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL: \t" + reportedAbstractMRs.size()); //////////////////////// for (String predicate : getPredicates()) { detailedRes = ""; bestPredictedStrings = new ArrayList<>(); bestPredictedStringsMRs = new ArrayList<>(); double uniquePredWordBLEU = 0.0; double uniquePredWordROUGE = 0.0; double uniquePredWordCOVERAGEERR = 0.0; double uniquePredWordBRC = 0.0; reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (di.getMeaningRepresentation().getPredicate().equals(predicate) && !reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ") .replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniquePredWordBLEU += bestBLEU; uniquePredWordROUGE += bestROUGE; uniquePredWordCOVERAGEERR += bestCover; uniquePredWordBRC += bestHarmonicMean; } } uniquePredWordBLEU /= reportedAbstractMRs.size(); uniquePredWordROUGE /= reportedAbstractMRs.size(); uniquePredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniquePredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD " + predicate + " BLEU: \t" + uniquePredWordBLEU); System.out.println("UNIQUE WORD " + predicate + " ROUGE: \t" + uniquePredWordROUGE); System.out.println( "UNIQUE WORD " + predicate + " COVERAGE ERROR: \t" + (1.0 - uniquePredWordCOVERAGEERR)); System.out.println("UNIQUE WORD " + predicate + " BRC: \t" + uniquePredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL " + predicate + ": \t" + reportedAbstractMRs.size()); } } if (isCalculateResultsPerPredicate()) { BufferedWriter bw = null; File f = null; try { f = new File("results/random_SFX" + getDataset() + "TextsAfter" + (epoch) + "_" + JLOLS.sentenceCorrectionFurtherSteps + "_" + JLOLS.p + "epochsTESTINGDATA.txt"); } catch (NullPointerException e) { } try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f))); } catch (FileNotFoundException e) { } try { bw.write("BLEU:" + bleuScore); bw.write("\n"); } catch (IOException e) { } for (int i = 0; i < bestPredictedStrings.size(); i++) { try { String mr = bestPredictedStringsMRs.get(i); bw.write("MR;" + mr.replaceAll(";", ",") + ";"); if (getDataset().equals("hotel")) { bw.write("LOLS_SFHOT;"); } else { bw.write("LOLS_SFRES;"); } bw.write("\n"); } catch (IOException e) { } } try { bw.close(); } catch (IOException e) { } } return bleuScore; }