List of usage examples for java.util ArrayList stream
default Stream<E> stream()
From source file:cn.edu.zjnu.acm.judge.core.Judger.java
private boolean runProcess(RunRecord runRecord) throws IOException { Path dataPath = runRecord.getDataPath(); Objects.requireNonNull(dataPath, "dataPath"); Path specialFile = dataPath.resolve(JudgeConfiguration.VALIDATE_FILE_NAME); boolean isspecial = Files.exists(specialFile); if (!Files.isDirectory(dataPath)) { log.error("{} not exists", runRecord.getDataPath()); return false; }// ww w .j a v a 2 s . c om List<Path[]> files = new ArrayList<>(20); try (DirectoryStream<Path> listFiles = Files.newDirectoryStream(dataPath)) { log.debug("dataPath = {}", dataPath); for (Path inFile : listFiles) { String inFileName = inFile.getFileName().toString(); if (!inFileName.toLowerCase().endsWith(".in")) { continue; } Path outFile = dataPath.resolve(inFileName.substring(0, inFileName.length() - 3) + ".out"); if (!Files.exists(outFile)) { continue; } files.add(new Path[] { inFile, outFile });//, } } int casenum = files.size(); log.debug("casenum = {}", casenum); if (casenum == 0) { return false; } int accept = 0; //? ArrayList<String> details = new ArrayList<>(casenum << 2); long time = 0; // long memory = 0; // String command = runRecord.getLanguage().getExecuteCommand(); Path work = judgeConfiguration.getWorkDirectory(runRecord.getSubmissionId()); // command = !StringUtils.isEmptyOrWhitespace(command) ? command : work.resolve("Main." + runRecord.getLanguage().getExecutableExtension()).toString(); long extTime = runRecord.getLanguage().getExtTime(); long castTimeLimit = runRecord.getTimeLimit() * runRecord.getLanguage().getTimeFactor() + extTime; long extraMemory = runRecord.getLanguage().getExtMemory(); // long caseMemoryLimit = (runRecord.getMemoryLimit() + extraMemory) * 1024; Options[] optionses = new Options[casenum]; for (int cas = 0; cas < casenum; cas++) { Path[] entry = files.get(cas); Path in = entry[0]; Path standard = entry[1]; Path progOutput = work.resolve(standard.getFileName()); optionses[cas] = Options.builder().timeLimit(castTimeLimit) // time limit .memoryLimit(caseMemoryLimit) // memory in bytes .outputLimit(16 * 1024 * 1024) // 16M .command(command).workDirectory(work).inputFile(in).outputFile(progOutput) .standardOutput(standard).errFile(getNull(work)).build(); } String detailMessageStr = null; String scorePerCase = new DecimalFormat("0.#").format(100.0 / casenum); final Validator validator = isspecial ? new SpecialValidator(specialFile.toString(), work) : new SimpleValidator(); try { ExecuteResult[] ers = JudgeBridge.INSTANCE.judge(optionses, false, validator); for (ExecuteResult er : ers) { long tim1 = er.getTime() - extTime; tim1 = Math.max(0, tim1); long mem1 = er.getMemory() / 1024 - extraMemory; mem1 = Math.max(0, mem1); String message = er.getMessage(); int caseResult = getResultFromExecuteResult(er); time = Math.max(time, tim1); memory = Math.max(memory, mem1); log.debug("message = {}, time = {}, memory = {}", message, time, memory); details.add(String.valueOf(caseResult)); if (caseResult == 0) { details.add(scorePerCase); } else { details.add("0"); } details.add(String.valueOf(tim1)); details.add(String.valueOf(mem1)); if (caseResult == 0) { ++accept; } } } catch (JudgeException | RuntimeException | Error ex) { log.error("", ex); accept = ResultType.SYSTEM_ERROR; detailMessageStr = ex.getMessage(); } log.debug("{}", details); int score = accept >= 0 ? (int) Math.round(accept * 100.0 / casenum) : accept; if (score == 0 && accept != 0) { ++score; } else if (score == 100 && accept != casenum) { --score; } submissionMapper.updateResult(runRecord.getSubmissionId(), score, time, memory); submissionMapper.saveDetail(runRecord.getSubmissionId(), detailMessageStr != null ? detailMessageStr : details.stream().map(String::valueOf).collect(Collectors.joining(","))); updateSubmissionStatus(runRecord); return score == 100; }
From source file:it.polimi.diceH2020.launcher.controller.LaunchAnalysis.java
@RequestMapping(value = "/simulationSetup", method = RequestMethod.GET) public String showSimulationsManagerForm(SessionStatus sessionStatus, Model model, @ModelAttribute("instanceDataMultiProvider") String instanceDataMultiProviderPath, @ModelAttribute("pathList") ArrayList<String> pathList, @ModelAttribute("scenario") String scenarioString, RedirectAttributes redirectAttrs) { Scenarios scenario = Scenarios.valueOf(scenarioString); model.addAttribute("scenario", scenario); redirectAttrs.addAttribute("scenario", scenario); if (pathList.size() == 0) { deleteUploadedFiles(pathList);/*from w ww .j a v a 2 s. c om*/ redirectAttrs.addAttribute("message", "You haven't submitted any file!"); return "redirect:/launchRetry"; } if (instanceDataMultiProviderPath == null) { deleteUploadedFiles(pathList); redirectAttrs.addAttribute("message", "Select a Json file!"); return "redirect:/launchRetry"; } Optional<InstanceDataMultiProvider> idmp = validator .readInstanceDataMultiProvider(Paths.get(instanceDataMultiProviderPath)); if (idmp.isPresent()) { if (!idmp.get().validate()) { deleteUploadedFiles(pathList); model.addAttribute("message", idmp.get().getValidationError()); return "redirect:/launchRetry"; } } else { model.addAttribute("message", "Error with InstanceDataMultiProvider"); deleteUploadedFiles(pathList); return "redirect:/launchRetry"; } InstanceDataMultiProvider instanceDataMultiProvider = idmp.get(); String check = scenarioValidation(instanceDataMultiProvider, scenario); if (!check.equals("ok")) { deleteUploadedFiles(pathList); redirectAttrs.addAttribute("message", check); return "redirect:/launchRetry"; } List<InstanceDataMultiProvider> inputList = JsonSplitter .splitInstanceDataMultiProvider(instanceDataMultiProvider, scenario); if (inputList.size() > 1) { List<String> providersList = inputList.stream().map(InstanceDataMultiProvider::getProvider) .collect(Collectors.toList()); if (!minNumTxt(providersList, pathList)) { deleteUploadedFiles(pathList); model.addAttribute("message", "Not enough TXT files selected.\nFor each provider in your JSON there must be 2 TXT files containing in their name the provider name."); return "redirect:/launchRetry"; } } List<SimulationsManager> simManagerList = initializeSimManagers(inputList); List<String> txtFoldersList = new ArrayList<>(); for (SimulationsManager sm : simManagerList) { sm.setInputFileName(Paths.get(instanceDataMultiProviderPath).getFileName().toString()); InstanceDataMultiProvider input = sm.getInputData(); String txtFolder = new String(); try { txtFolder = fileUtility.createInputSubFolder(); txtFoldersList.add(txtFolder); } catch (Exception e) { deleteUploadedFiles(pathList); deleteUploadedFiles(txtFoldersList); redirectAttrs.addAttribute("message", "Too many folders for TXTs with the same name have been created!"); return "redirect:/launchRetry"; } for (Entry<String, Map<String, Map<String, JobProfile>>> jobIDs : input.getMapJobProfiles() .getMapJobProfile().entrySet()) { for (Entry<String, Map<String, JobProfile>> provider : jobIDs.getValue().entrySet()) { for (Entry<String, JobProfile> typeVMs : provider.getValue().entrySet()) { String secondPartOfTXTName = getSecondPartOfReplayersName(jobIDs.getKey(), provider.getKey(), typeVMs.getKey()); List<String> txtToBeSaved = pathList.stream().filter(s -> s.contains(secondPartOfTXTName)) .filter(s -> s.contains(input.getId())).collect(Collectors.toList()); if (txtToBeSaved.isEmpty()) { deleteUploadedFiles(pathList); deleteUploadedFiles(txtFoldersList); model.addAttribute("message", "Missing TXT file for Instance:" + input.getId() + ", Job: " + jobIDs.getKey() + ", Provider:" + provider.getKey() + ", TypeVM:" + typeVMs.getKey()); return "redirect:/launchRetry"; } for (String srcPath : txtToBeSaved) { File src = new File(srcPath); String fileContent = new String(); try { fileContent = new String(Files.readAllBytes(Paths.get(srcPath))); FileOutputStream fooStream = new FileOutputStream(src, false); // true to append // false to overwrite. byte[] myBytes = Compressor.compress(fileContent).getBytes(); fooStream.write(myBytes); fooStream.close(); fileUtility.copyFile(srcPath, txtFolder + src.getName()); } catch (IOException e) { deleteUploadedFiles(pathList); deleteUploadedFiles(txtFoldersList); model.addAttribute("message", "Problem with TXT paths. [TXT file for Instance:" + input.getId() + ", Job: " + jobIDs.getKey() + ", Provider:" + provider.getKey() + ", TypeVM:" + typeVMs.getKey() + "]"); return "redirect:/launchRetry"; } if (fileContent.length() == 0) { deleteUploadedFiles(pathList); deleteUploadedFiles(txtFoldersList); model.addAttribute("message", "Missing TXT file for Instance:" + input.getId() + ", Job: " + jobIDs.getKey() + ", Provider:" + provider.getKey() + ", TypeVM:" + typeVMs.getKey()); return "redirect:/launchRetry"; } sm.addInputFolder(txtFolder); sm.setNumCompletedSimulations(0); sm.buildExperiments(); } } } } } deleteUploadedFiles(pathList); for (SimulationsManager sm : simManagerList) { ds.simulation(sm); } model.addAttribute("simManagersList", simManagerList); return "redirect:/"; }
From source file:structuredPredictionNLG.SFX.java
/** * * @param wordSequence//from w w w .java2 s .co m * @return */ public ArrayList<String> getPredictedAttrList(ArrayList<Action> wordSequence) { ArrayList<Action> cleanActionList = new ArrayList<>(); wordSequence.stream().filter((action) -> (!action.getWord().equals(Action.TOKEN_START) && !action.getWord().equals(Action.TOKEN_END))).forEachOrdered((action) -> { cleanActionList.add(action); }); ArrayList<String> predictedAttrList = new ArrayList<>(); cleanActionList.forEach((action) -> { if (predictedAttrList.isEmpty()) { predictedAttrList.add(action.getAttribute()); } else if (!predictedAttrList.get(predictedAttrList.size() - 1).equals(action.getAttribute())) { predictedAttrList.add(action.getAttribute()); } }); return predictedAttrList; }
From source file:structuredPredictionNLG.SFX.java
/** * During this method, we calculate the alignments (naive or random), the language models, the available content and word actions, and finally the feature vectors. *//*from w w w .j av a 2 s.co m*/ @Override public void createTrainingData() { //setTrainingData(new ArrayList<>(getTrainingData().subList(0, 50))); //setTestingData(new ArrayList<>(getTrainingData())); // Calculate alignments between the word of the sentence and the atribute/values if (getUseAlignments().equals("naive")) { createNaiveAlignments(getTrainingData()); } else { createRandomAlignments(getTrainingData()); } // Create (or load from cache) the content and word language models per predicate if (isResetStoredCaches() || !loadLMs()) { HashMap<String, ArrayList<ArrayList<String>>> LMWordTrainingPerPred = new HashMap<>(); HashMap<String, ArrayList<ArrayList<String>>> LMAttrTrainingPerPred = new HashMap<>(); getTrainingData().stream().map((di) -> { if (!LMWordTrainingPerPred.containsKey(di.getMeaningRepresentation().getPredicate())) { LMWordTrainingPerPred.put(di.getMeaningRepresentation().getPredicate(), new ArrayList<ArrayList<String>>()); LMAttrTrainingPerPred.put(di.getMeaningRepresentation().getPredicate(), new ArrayList<ArrayList<String>>()); } return di; }).forEachOrdered((di) -> { HashSet<ArrayList<Action>> seqs = new HashSet<>(); seqs.add(di.getDirectReferenceSequence()); seqs.forEach((seq) -> { ArrayList<String> wordSeq = new ArrayList<>(); ArrayList<String> attrSeq = new ArrayList<>(); // We add some empty tokens at the start of each sequence wordSeq.add("@@"); wordSeq.add("@@"); attrSeq.add("@@"); attrSeq.add("@@"); for (int i = 0; i < seq.size(); i++) { if (!seq.get(i).getAttribute().equals(Action.TOKEN_END) && !seq.get(i).getWord().equals(Action.TOKEN_END)) { wordSeq.add(seq.get(i).getWord()); } if (attrSeq.isEmpty()) { attrSeq.add(seq.get(i).getAttribute()); } else if (!attrSeq.get(attrSeq.size() - 1).equals(seq.get(i).getAttribute())) { attrSeq.add(seq.get(i).getAttribute()); } } wordSeq.add(Action.TOKEN_END); LMWordTrainingPerPred.get(di.getMeaningRepresentation().getPredicate()).add(wordSeq); LMAttrTrainingPerPred.get(di.getMeaningRepresentation().getPredicate()).add(attrSeq); }); }); setWordLMsPerPredicate(new HashMap<>()); setContentLMsPerPredicate(new HashMap<>()); LMWordTrainingPerPred.keySet().stream().map((pred) -> { SimpleLM simpleWordLM = new SimpleLM(3); simpleWordLM.trainOnStrings(LMWordTrainingPerPred.get(pred)); getWordLMsPerPredicate().put(pred, simpleWordLM); return pred; }).forEachOrdered((pred) -> { SimpleLM simpleAttrLM = new SimpleLM(3); simpleAttrLM.trainOnStrings(LMAttrTrainingPerPred.get(pred)); getContentLMsPerPredicate().put(pred, simpleAttrLM); }); writeLMs(); } // Go through the sequences in the data and populate the available content and word action dictionaries // We populate a distinct word dictionary for each attribute, and populate it with the words of word sequences whose corresponding content sequences contain that attribute HashMap<String, HashSet<String>> availableContentActions = new HashMap<>(); HashMap<String, HashMap<String, HashSet<Action>>> availableWordActions = new HashMap<>(); getTrainingData().forEach((DI) -> { String predicate = DI.getMeaningRepresentation().getPredicate(); if (!availableContentActions.containsKey(predicate)) { availableContentActions.put(predicate, new HashSet<String>()); availableContentActions.get(predicate).add(Action.TOKEN_END); } if (!availableWordActions.containsKey(predicate)) { availableWordActions.put(predicate, new HashMap<String, HashSet<Action>>()); } ArrayList<Action> realization = DI.getDirectReferenceSequence(); realization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_END))) .forEachOrdered((Action a) -> { String attr; if (a.getAttribute().contains("=")) { attr = a.getAttribute().substring(0, a.getAttribute().indexOf('=')); } else { attr = a.getAttribute(); } availableContentActions.get(predicate).add(attr); if (!availableWordActions.get(predicate).containsKey(attr)) { availableWordActions.get(predicate).put(attr, new HashSet<Action>()); availableWordActions.get(predicate).get(attr).add(new Action(Action.TOKEN_END, attr)); } if (!a.getWord().equals(Action.TOKEN_START) && !a.getWord().equals(Action.TOKEN_END) && !a.getWord().matches("([,.?!;:'])")) { if (a.getWord().startsWith(Action.TOKEN_X)) { if (a.getWord().substring(3, a.getWord().lastIndexOf('_')).toLowerCase().trim() .equals(attr)) { availableWordActions.get(predicate).get(attr) .add(new Action(a.getWord(), attr)); } } else { availableWordActions.get(predicate).get(attr).add(new Action(a.getWord(), attr)); } } }); }); setAvailableContentActions(availableContentActions); setAvailableWordActions(availableWordActions); //When using random alignments we do not consider the value alignments either if (getUseAlignments().equals("random")) { setValueAlignments(new HashMap<>()); } // Infer the feature vectors of the training data if (isResetStoredCaches() || !loadTrainingData(getTrainingData().size())) { System.out.print("Create training data..."); Object[] results = inferFeatureAndCostVectors(); System.out.print("almost..."); @SuppressWarnings("unchecked") ConcurrentHashMap<DatasetInstance, HashMap<String, ArrayList<Instance>>> getPredicateContentTrainingDataBefore = (ConcurrentHashMap<DatasetInstance, HashMap<String, ArrayList<Instance>>>) results[0]; @SuppressWarnings("unchecked") ConcurrentHashMap<DatasetInstance, HashMap<String, HashMap<String, ArrayList<Instance>>>> getPredicateWordTrainingDataBefore = (ConcurrentHashMap<DatasetInstance, HashMap<String, HashMap<String, ArrayList<Instance>>>>) results[1]; // Reorganize the feature/cost vector collections // Initially they are mapped according to DatasetInstance (since it helps with parallel processing) but we prefer them mapped by predicate for training setPredicateContentTrainingData(new HashMap<>()); getTrainingData().forEach((di) -> { getPredicateContentTrainingDataBefore.get(di).keySet().stream().map((predicate) -> { if (!getPredicateContentTrainingData().containsKey(predicate)) { getPredicateContentTrainingData().put(predicate, new ArrayList<Instance>()); } return predicate; }).forEachOrdered((predicate) -> { getPredicateContentTrainingData().get(predicate) .addAll(getPredicateContentTrainingDataBefore.get(di).get(predicate)); }); }); setPredicateWordTrainingData(new HashMap<>()); getTrainingData().forEach((di) -> { getPredicateWordTrainingDataBefore.get(di).keySet().stream().map((predicate) -> { if (!getPredicateWordTrainingData().containsKey(predicate)) { getPredicateWordTrainingData().put(predicate, new HashMap<String, ArrayList<Instance>>()); } return predicate; }).forEachOrdered((predicate) -> { getPredicateWordTrainingDataBefore.get(di).get(predicate).keySet().stream().map((attribute) -> { if (!getPredicateWordTrainingData().get(predicate).containsKey(attribute)) { getPredicateWordTrainingData().get(predicate).put(attribute, new ArrayList<Instance>()); } return attribute; }).forEachOrdered((attribute) -> { getPredicateWordTrainingData().get(predicate).get(attribute) .addAll(getPredicateWordTrainingDataBefore.get(di).get(predicate).get(attribute)); }); }); }); writeTrainingData(getTrainingData().size()); } }
From source file:structuredPredictionNLG.SFX.java
/** * * @param trainingData//from ww w . jav a2 s .co m */ public void createRandomAlignments(ArrayList<DatasetInstance> trainingData) { HashMap<String, HashMap<ArrayList<Action>, HashMap<Action, Integer>>> punctPatterns = new HashMap<>(); getPredicates().forEach((predicate) -> { punctPatterns.put(predicate, new HashMap<ArrayList<Action>, HashMap<Action, Integer>>()); }); HashMap<DatasetInstance, ArrayList<Action>> punctRealizations = new HashMap<DatasetInstance, ArrayList<Action>>(); HashMap<ArrayList<Action>, ArrayList<Action>> calculatedRealizationsCache = new HashMap<>(); trainingData.stream().map((di) -> { HashSet<ArrayList<Action>> initRealizations = new HashSet<>(); if (!calculatedRealizationsCache.containsKey(di.getDirectReferenceSequence())) { initRealizations.add(di.getDirectReferenceSequence()); } initRealizations.stream().map((realization) -> { HashMap<String, HashSet<String>> values = new HashMap<>(); di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attr) -> { values.put(attr, new HashSet<>(di.getMeaningRepresentation().getAttributeValues().get(attr))); }); ArrayList<Action> randomRealization = new ArrayList<Action>(); realization.forEach((a) -> { if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.add(new Action(a.getWord(), a.getAttribute())); } else { randomRealization.add(new Action(a.getWord(), "")); } }); HashSet<String> unalignedAttrs = new HashSet<>(); if (values.keySet().isEmpty()) { for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!getAttributes().get(di.getMeaningRepresentation().getPredicate()) .contains("empty")) { getAttributes().get(di.getMeaningRepresentation().getPredicate()).add("empty"); } randomRealization.get(i).setAttribute("empty=empty"); } } } else { values.keySet().forEach((attr) -> { values.get(attr).forEach((value) -> { if ((!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) && !value.isEmpty()) { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attr + ":" + value; unalignedAttrs.add(attr + "=" + value); } if (valueToCheck.equals(attr)) { unalignedAttrs.add(attr + "=" + value); } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { unalignedAttrs.add(attr + "=" + valueToCheck); } } else { unalignedAttrs.add(attr + "=" + value); } }); }); unalignedAttrs.forEach((attrValue) -> { int index = getRandomGen().nextInt(randomRealization.size()); boolean change = false; while (!change) { if (!randomRealization.get(index).getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim()); change = true; } else { index = getRandomGen().nextInt(randomRealization.size()); } } }); String previousAttr = ""; for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("1: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("2: " + randomRealization); previousAttr = ""; for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("3: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("4: " + randomRealization); } //FIX WRONG @PUNCT@ String previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT) && !randomRealization.get(i).getWord().matches("[,.?!;:']")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } ArrayList<Action> cleanRandomRealization = new ArrayList<>(); randomRealization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_PUNCT))) .forEachOrdered((a) -> { cleanRandomRealization.add(a); }); //ADD END TOKENS ArrayList<Action> endRandomRealization = new ArrayList<>(); previousAttr = ""; for (int i = 0; i < cleanRandomRealization.size(); i++) { Action a = cleanRandomRealization.get(i); if (!previousAttr.isEmpty() && !a.getAttribute().equals(previousAttr)) { endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); } endRandomRealization.add(a); previousAttr = a.getAttribute(); } endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); endRandomRealization.add(new Action(Action.TOKEN_END, Action.TOKEN_END)); calculatedRealizationsCache.put(realization, endRandomRealization); //System.out.println(di.getMeaningRepresentation().getPredicate() + ": " + endRandomRealization); ArrayList<String> attrValues = new ArrayList<String>(); endRandomRealization.forEach((a) -> { if (attrValues.isEmpty()) { attrValues.add(a.getAttribute()); } else if (!attrValues.get(attrValues.size() - 1).equals(a.getAttribute())) { attrValues.add(a.getAttribute()); } }); if (attrValues.size() > getMaxContentSequenceLength()) { setMaxContentSequenceLength(attrValues.size()); } ArrayList<Action> punctRealization = new ArrayList<>(); punctRealization.addAll(randomRealization); previousAttr = ""; for (int i = 0; i < punctRealization.size(); i++) { if (!punctRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { if (!punctRealization.get(i).getAttribute().equals(previousAttr) && !previousAttr.isEmpty()) { punctRealization.add(i, new Action(Action.TOKEN_END, previousAttr)); i++; } previousAttr = punctRealization.get(i).getAttribute(); } } if (!punctRealization.get(punctRealization.size() - 1).getWord().equals(Action.TOKEN_END)) { punctRealization.add(new Action(Action.TOKEN_END, previousAttr)); } return punctRealization; }).map((punctRealization) -> { punctRealizations.put(di, punctRealization); return punctRealization; }).forEachOrdered((punctRealization) -> { for (int i = 0; i < punctRealization.size(); i++) { Action a = punctRealization.get(i); if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { boolean legal = true; ArrayList<Action> surroundingActions = new ArrayList<>(); /*if (i - 3 >= 0) { surroundingActions.add(punctRealization.get(i - 3)); } else { surroundingActions.add(null); }*/ if (i - 2 >= 0) { surroundingActions.add(punctRealization.get(i - 2)); } else { surroundingActions.add(null); } if (i - 1 >= 0) { surroundingActions.add(punctRealization.get(i - 1)); } else { legal = false; } boolean oneMore = false; if (i + 1 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 1)); if (!punctRealization.get(i + 1).getAttribute().equals(Action.TOKEN_END)) { oneMore = true; } } else { legal = false; } if (oneMore && i + 2 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 2)); } else { surroundingActions.add(null); } if (legal) { if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .containsKey(surroundingActions)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .put(surroundingActions, new HashMap<Action, Integer>()); } if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions) .put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).get(a) + 1); } } } } }); return di; }).map((di) -> { di.setDirectReferenceSequence(calculatedRealizationsCache.get(di.getDirectReferenceSequence())); return di; }); punctRealizations.keySet().forEach((di) -> { ArrayList<Action> punctRealization = punctRealizations.get(di); punctPatterns.get(di.getMeaningRepresentation().getPredicate()).keySet().forEach((surrounds) -> { int beforeNulls = 0; if (surrounds.get(0) == null) { beforeNulls++; } if (surrounds.get(1) == null) { beforeNulls++; } for (int i = 0 - beforeNulls; i < punctRealization.size(); i++) { boolean matches = true; int m = 0; for (int s = 0; s < surrounds.size(); s++) { if (surrounds.get(s) != null) { if (i + s < punctRealization.size()) { if (!punctRealization.get(i + s).getWord().equals(surrounds.get(s) .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) { matches = false; s = surrounds.size(); } else { m++; } } else { matches = false; s = surrounds.size(); } } else if (s < 2 && i + s >= 0) { matches = false; s = surrounds.size(); } else if (s >= 2 && i + s < punctRealization.size()) { matches = false; s = surrounds.size(); } } if (matches && m > 0) { Action a = new Action("", ""); if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .get(a) + 1); } } } }); }); punctPatterns.keySet().forEach((predicate) -> { punctPatterns.get(predicate).keySet().forEach((punct) -> { Action bestAction = null; int bestCount = 0; for (Action a : punctPatterns.get(predicate).get(punct).keySet()) { if (punctPatterns.get(predicate).get(punct).get(a) > bestCount) { bestAction = a; bestCount = punctPatterns.get(predicate).get(punct).get(a); } else if (punctPatterns.get(predicate).get(punct).get(a) == bestCount && bestAction.getWord().isEmpty()) { bestAction = a; } } if (!getPunctuationPatterns().containsKey(predicate)) { getPunctuationPatterns().put(predicate, new HashMap<ArrayList<Action>, Action>()); } if (!bestAction.getWord().isEmpty()) { getPunctuationPatterns().get(predicate).put(punct, bestAction); } }); }); }
From source file:structuredPredictionNLG.SFX.java
/** * * @param trainingData//from w w w. j av a 2 s . c o m */ @Override public void createNaiveAlignments(ArrayList<DatasetInstance> trainingData) { HashMap<String, HashMap<ArrayList<Action>, HashMap<Action, Integer>>> punctPatterns = new HashMap<>(); getPredicates().forEach((predicate) -> { punctPatterns.put(predicate, new HashMap<ArrayList<Action>, HashMap<Action, Integer>>()); }); HashMap<DatasetInstance, ArrayList<Action>> punctRealizations = new HashMap<DatasetInstance, ArrayList<Action>>(); trainingData.stream().map((di) -> { HashMap<ArrayList<Action>, ArrayList<Action>> calculatedRealizationsCache = new HashMap<>(); HashSet<ArrayList<Action>> initRealizations = new HashSet<>(); if (!calculatedRealizationsCache.containsKey(di.getDirectReferenceSequence())) { initRealizations.add(di.getDirectReferenceSequence()); } initRealizations.stream().map((realization) -> { HashMap<String, HashSet<String>> values = new HashMap<>(); di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attr) -> { values.put(attr, new HashSet<>(di.getMeaningRepresentation().getAttributeValues().get(attr))); }); ArrayList<Action> randomRealization = new ArrayList<>(); for (int i = 0; i < realization.size(); i++) { Action a = realization.get(i); if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.add(new Action(a.getWord(), a.getAttribute())); } else { randomRealization.add(new Action(a.getWord(), "")); } } if (values.keySet().isEmpty()) { for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!getAttributes().get(di.getMeaningRepresentation().getPredicate()) .contains("empty")) { getAttributes().get(di.getMeaningRepresentation().getPredicate()).add("empty"); } randomRealization.get(i).setAttribute("empty=empty"); } } } else { HashMap<Double, HashMap<String, ArrayList<Integer>>> indexAlignments = new HashMap<>(); HashSet<String> noValueAttrs = new HashSet<String>(); values.keySet().forEach((attr) -> { values.get(attr).stream().filter( (value) -> ((!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) && !value.isEmpty())) .map((value) -> { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| attr.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attr + ":" + value; noValueAttrs.add(attr + "=" + value); } if (valueToCheck.equals(attr)) { noValueAttrs.add(attr + "=" + value); } return valueToCheck; }) .filter((valueToCheck) -> (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck))) .forEachOrdered((valueToCheck) -> { for (ArrayList<String> align : getValueAlignments().get(valueToCheck) .keySet()) { int n = align.size(); for (int i = 0; i <= randomRealization.size() - n; i++) { ArrayList<String> compare = new ArrayList<String>(); ArrayList<Integer> indexAlignment = new ArrayList<Integer>(); for (int j = 0; j < n; j++) { compare.add(randomRealization.get(i + j).getWord()); indexAlignment.add(i + j); } if (compare.equals(align)) { if (!indexAlignments.containsKey( getValueAlignments().get(valueToCheck).get(align))) { indexAlignments.put( getValueAlignments().get(valueToCheck).get(align), new HashMap()); } indexAlignments .get(getValueAlignments().get(valueToCheck).get(align)) .put(attr + "=" + valueToCheck, indexAlignment); } } } }); }); ArrayList<Double> similarities = new ArrayList<>(indexAlignments.keySet()); Collections.sort(similarities); HashSet<String> assignedAttrValues = new HashSet<String>(); HashSet<Integer> assignedIntegers = new HashSet<Integer>(); for (int i = similarities.size() - 1; i >= 0; i--) { for (String attrValue : indexAlignments.get(similarities.get(i)).keySet()) { if (!assignedAttrValues.contains(attrValue)) { boolean isUnassigned = true; for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) { if (assignedIntegers.contains(index)) { isUnassigned = false; } } if (isUnassigned) { assignedAttrValues.add(attrValue); for (Integer index : indexAlignments.get(similarities.get(i)).get(attrValue)) { assignedIntegers.add(index); randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim()); } } } } } //System.out.println("-1: " + randomRealization); randomRealization.stream().filter((a) -> (a.getWord().startsWith(Action.TOKEN_X))) .forEachOrdered((a) -> { String attr = a.getWord().substring(3, a.getWord().lastIndexOf('_')).toLowerCase() .trim(); a.setAttribute(attr + "=" + a.getWord()); }); HashSet<String> unalignedNoValueAttrs = new HashSet<>(); noValueAttrs.forEach((noValueAttr) -> { boolean assigned = false; for (Action a : randomRealization) { if (a.getAttribute().equals(noValueAttr)) { assigned = true; } } if (!assigned) { unalignedNoValueAttrs.add(noValueAttr); } }); boolean isAllEmpty = true; boolean hasSpace = false; for (int i = 0; i < randomRealization.size(); i++) { if (!randomRealization.get(i).getAttribute().isEmpty() && !randomRealization.get(i).getAttribute().equals("[]") && !randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { isAllEmpty = false; } if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { hasSpace = true; } } if (isAllEmpty && hasSpace && !unalignedNoValueAttrs.isEmpty()) { unalignedNoValueAttrs.forEach((attrValue) -> { int index = getRandomGen().nextInt(randomRealization.size()); boolean change = false; while (!change) { if (!randomRealization.get(index).getAttribute().equals(Action.TOKEN_PUNCT)) { randomRealization.get(index).setAttribute(attrValue.toLowerCase().trim()); change = true; } else { index = getRandomGen().nextInt(randomRealization.size()); } } }); } //System.out.println(isAllEmpty + " " + hasSpace + " " + unalignedNoValueAttrs); //System.out.println(">> " + noValueAttrs); //System.out.println(">> " + values); //System.out.println("0: " + randomRealization); String previousAttr = ""; int start = -1; for (int i = 0; i < randomRealization.size(); i++) { if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT) && !randomRealization.get(i).getAttribute().isEmpty() && !randomRealization.get(i).getAttribute().equals("[]")) { if (start != -1) { int middle = (start + i - 1) / 2 + 1; for (int j = start; j < middle; j++) { if (randomRealization.get(j).getAttribute().isEmpty() || randomRealization.get(j).getAttribute().equals("[]")) { randomRealization.get(j).setAttribute(previousAttr); } } for (int j = middle; j < i; j++) { if (randomRealization.get(j).getAttribute().isEmpty() || randomRealization.get(j).getAttribute().equals("[]")) { randomRealization.get(j) .setAttribute(randomRealization.get(i).getAttribute()); } } } start = i; previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("1: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } else { previousAttr = ""; } } //System.out.println("2: " + randomRealization); previousAttr = ""; for (int i = 0; i < randomRealization.size(); i++) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("3: " + randomRealization); previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().isEmpty() || randomRealization.get(i).getAttribute().equals("[]")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } //System.out.println("4: " + randomRealization); } //FIX WRONG @PUNCT@ String previousAttr = ""; for (int i = randomRealization.size() - 1; i >= 0; i--) { if (randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT) && !randomRealization.get(i).getWord().matches("[,.?!;:']")) { if (!previousAttr.isEmpty()) { randomRealization.get(i).setAttribute(previousAttr); } } else if (!randomRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { previousAttr = randomRealization.get(i).getAttribute(); } } ArrayList<Action> cleanRandomRealization = new ArrayList<>(); randomRealization.stream().filter((a) -> (!a.getAttribute().equals(Action.TOKEN_PUNCT))) .forEachOrdered((a) -> { cleanRandomRealization.add(a); }); //ADD END TOKENS ArrayList<Action> endRandomRealization = new ArrayList<>(); previousAttr = ""; for (int i = 0; i < cleanRandomRealization.size(); i++) { Action a = cleanRandomRealization.get(i); if (!previousAttr.isEmpty() && !a.getAttribute().equals(previousAttr)) { endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); } endRandomRealization.add(a); previousAttr = a.getAttribute(); } endRandomRealization.add(new Action(Action.TOKEN_END, previousAttr)); endRandomRealization.add(new Action(Action.TOKEN_END, Action.TOKEN_END)); calculatedRealizationsCache.put(realization, endRandomRealization); //System.out.println(di.getMeaningRepresentation().getPredicate() + ": " + endRandomRealization); ArrayList<String> attrValues = new ArrayList<String>(); endRandomRealization.forEach((a) -> { if (attrValues.isEmpty()) { attrValues.add(a.getAttribute()); } else if (!attrValues.get(attrValues.size() - 1).equals(a.getAttribute())) { attrValues.add(a.getAttribute()); } }); if (attrValues.size() > getMaxContentSequenceLength()) { setMaxContentSequenceLength(attrValues.size()); } ArrayList<Action> punctRealization = new ArrayList<>(); punctRealization.addAll(randomRealization); previousAttr = ""; for (int i = 0; i < punctRealization.size(); i++) { if (!punctRealization.get(i).getAttribute().equals(Action.TOKEN_PUNCT)) { if (!punctRealization.get(i).getAttribute().equals(previousAttr) && !previousAttr.isEmpty()) { punctRealization.add(i, new Action(Action.TOKEN_END, previousAttr)); i++; } previousAttr = punctRealization.get(i).getAttribute(); } } if (!punctRealization.get(punctRealization.size() - 1).getWord().equals(Action.TOKEN_END)) { punctRealization.add(new Action(Action.TOKEN_END, previousAttr)); } return punctRealization; }).map((punctRealization) -> { punctRealizations.put(di, punctRealization); return punctRealization; }).forEachOrdered((punctRealization) -> { for (int i = 0; i < punctRealization.size(); i++) { Action a = punctRealization.get(i); if (a.getAttribute().equals(Action.TOKEN_PUNCT)) { boolean legal = true; ArrayList<Action> surroundingActions = new ArrayList<>(); if (i - 2 >= 0) { surroundingActions.add(punctRealization.get(i - 2)); } else { surroundingActions.add(null); } if (i - 1 >= 0) { surroundingActions.add(punctRealization.get(i - 1)); } else { legal = false; } boolean oneMore = false; if (i + 1 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 1)); if (!punctRealization.get(i + 1).getAttribute().equals(Action.TOKEN_END)) { oneMore = true; } } else { legal = false; } if (oneMore && i + 2 < punctRealization.size()) { surroundingActions.add(punctRealization.get(i + 2)); } else { surroundingActions.add(null); } if (legal) { if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .containsKey(surroundingActions)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .put(surroundingActions, new HashMap<Action, Integer>()); } if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions) .put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()) .get(surroundingActions).get(a) + 1); } } } } }); di.setDirectReferenceSequence(calculatedRealizationsCache.get(di.getDirectReferenceSequence())); return di; }).forEachOrdered((di) -> { HashSet<String> attrValuesToBeMentioned = new HashSet<>(); di.getMeaningRepresentation().getAttributeValues().keySet().forEach((attribute) -> { int a = 0; for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { if (value.startsWith("\"x")) { value = "x" + a; a++; } else if (value.startsWith("\"")) { value = value.substring(1, value.length() - 1).replaceAll(" ", "_"); } attrValuesToBeMentioned.add(attribute + "=" + value); } }); di.getDirectReferenceSequence().stream().map((key) -> { attrValuesToBeMentioned.remove(key.getAttribute()); return key; }); }); punctRealizations.keySet().forEach((di) -> { ArrayList<Action> punctRealization = punctRealizations.get(di); punctPatterns.get(di.getMeaningRepresentation().getPredicate()).keySet().forEach((surrounds) -> { int beforeNulls = 0; if (surrounds.get(0) == null) { beforeNulls++; } if (surrounds.get(1) == null) { beforeNulls++; } for (int i = 0 - beforeNulls; i < punctRealization.size(); i++) { boolean matches = true; int m = 0; for (int s = 0; s < surrounds.size(); s++) { if (surrounds.get(s) != null) { if (i + s < punctRealization.size()) { if (!punctRealization.get(i + s).getWord().equals(surrounds.get(s) .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) { matches = false; s = surrounds.size(); } else { m++; } } else { matches = false; s = surrounds.size(); } } else if (s < 2 && i + s >= 0) { matches = false; s = surrounds.size(); } else if (s >= 2 && i + s < punctRealization.size()) { matches = false; s = surrounds.size(); } } if (matches && m > 0) { Action a = new Action("", ""); if (!punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .containsKey(a)) { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, 1); } else { punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds).put(a, punctPatterns.get(di.getMeaningRepresentation().getPredicate()).get(surrounds) .get(a) + 1); } } } }); }); punctPatterns.keySet().forEach((predicate) -> { punctPatterns.get(predicate).keySet().forEach((punct) -> { Action bestAction = null; int bestCount = 0; for (Action a : punctPatterns.get(predicate).get(punct).keySet()) { if (punctPatterns.get(predicate).get(punct).get(a) > bestCount) { bestAction = a; bestCount = punctPatterns.get(predicate).get(punct).get(a); } else if (punctPatterns.get(predicate).get(punct).get(a) == bestCount && bestAction.getWord().isEmpty()) { bestAction = a; } } if (!getPunctuationPatterns().containsKey(predicate)) { getPunctuationPatterns().put(predicate, new HashMap<ArrayList<Action>, Action>()); } if (!bestAction.getWord().isEmpty()) { getPunctuationPatterns().get(predicate).put(punct, bestAction); } }); }); }
From source file:structuredPredictionNLG.SFX.java
/** * * @param predicate/*from w ww .j a va 2 s . c o m*/ * @param costs * @param previousGeneratedAttrs * @param attrValuesAlreadyMentioned * @param attrValuesToBeMentioned * @param availableAttributeActions * @param MR * @return */ @Override public Instance createContentInstanceWithCosts(String predicate, TObjectDoubleHashMap<String> costs, ArrayList<String> previousGeneratedAttrs, HashSet<String> attrValuesAlreadyMentioned, HashSet<String> attrValuesToBeMentioned, HashMap<String, HashSet<String>> availableAttributeActions, MeaningRepresentation MR) { TObjectDoubleHashMap<String> generalFeatures = new TObjectDoubleHashMap<>(); HashMap<String, TObjectDoubleHashMap<String>> valueSpecificFeatures = new HashMap<>(); if (availableAttributeActions.containsKey(predicate)) { availableAttributeActions.get(predicate).forEach((action) -> { valueSpecificFeatures.put(action, new TObjectDoubleHashMap<String>()); }); } ArrayList<String> mentionedAttrValues = new ArrayList<>(); previousGeneratedAttrs.stream().filter( (attrValue) -> (!attrValue.equals(Action.TOKEN_START) && !attrValue.equals(Action.TOKEN_END))) .forEachOrdered((attrValue) -> { mentionedAttrValues.add(attrValue); }); for (int j = 1; j <= 1; j++) { String previousAttrValue = "@@"; if (mentionedAttrValues.size() - j >= 0) { previousAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - j).trim(); } generalFeatures.put("feature_attrValue_" + j + "_" + previousAttrValue, 1.0); } //Word N-Grams String prevAttrValue = "@@"; if (mentionedAttrValues.size() - 1 >= 0) { prevAttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 1).trim(); } String prev2AttrValue = "@@"; if (mentionedAttrValues.size() - 2 >= 0) { prev2AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 2).trim(); } String prev3AttrValue = "@@"; if (mentionedAttrValues.size() - 3 >= 0) { prev3AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 3).trim(); } String prev4AttrValue = "@@"; if (mentionedAttrValues.size() - 4 >= 0) { prev4AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 4).trim(); } String prev5AttrValue = "@@"; if (mentionedAttrValues.size() - 5 >= 0) { prev5AttrValue = mentionedAttrValues.get(mentionedAttrValues.size() - 5).trim(); } String prevBigramAttrValue = prev2AttrValue + "|" + prevAttrValue; String prevTrigramAttrValue = prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prev4gramAttrValue = prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; String prev5gramAttrValue = prev5AttrValue + "|" + prev4AttrValue + "|" + prev3AttrValue + "|" + prev2AttrValue + "|" + prevAttrValue; generalFeatures.put("feature_attrValue_bigram_" + prevBigramAttrValue, 1.0); generalFeatures.put("feature_attrValue_trigram_" + prevTrigramAttrValue, 1.0); generalFeatures.put("feature_attrValue_4gram_" + prev4gramAttrValue, 1.0); generalFeatures.put("feature_attrValue_5gram_" + prev5gramAttrValue, 1.0); //If arguments have been generated or not for (int i = 0; i < mentionedAttrValues.size(); i++) { generalFeatures.put("feature_attrValue_allreadyMentioned_" + mentionedAttrValues.get(i), 1.0); } //If arguments should still be generated or not attrValuesToBeMentioned.forEach((attrValue) -> { generalFeatures.put("feature_attrValue_toBeMentioned_" + attrValue, 1.0); }); //Which attrs are in the MR and which are not if (availableAttributeActions.containsKey(predicate)) { availableAttributeActions.get(predicate).forEach((attribute) -> { if (MR.getAttributeValues().keySet().contains(attribute)) { generalFeatures.put("feature_attr_inMR_" + attribute, 1.0); } else { generalFeatures.put("feature_attr_notInMR_" + attribute, 1.0); } }); } ArrayList<String> mentionedAttrs = new ArrayList<>(); for (int i = 0; i < mentionedAttrValues.size(); i++) { String attr = mentionedAttrValues.get(i); if (attr.contains("=")) { attr = mentionedAttrValues.get(i).substring(0, mentionedAttrValues.get(i).indexOf('=')); } mentionedAttrs.add(attr); } HashSet<String> attrsToBeMentioned = new HashSet<>(); attrValuesToBeMentioned.stream().map((attrValue) -> { String attr = attrValue; if (attr.contains("=")) { attr = attrValue.substring(0, attrValue.indexOf('=')); } return attr; }).forEachOrdered((attr) -> { attrsToBeMentioned.add(attr); }); for (int j = 1; j <= 1; j++) { String previousAttr = ""; if (mentionedAttrs.size() - j >= 0) { previousAttr = mentionedAttrs.get(mentionedAttrs.size() - j).trim(); } if (!previousAttr.isEmpty()) { generalFeatures.put("feature_attr_" + j + "_" + previousAttr, 1.0); } else { generalFeatures.put("feature_attr_" + j + "_@@", 1.0); } } //Word N-Grams String prevAttr = "@@"; if (mentionedAttrs.size() - 1 >= 0) { prevAttr = mentionedAttrs.get(mentionedAttrs.size() - 1).trim(); } String prev2Attr = "@@"; if (mentionedAttrs.size() - 2 >= 0) { prev2Attr = mentionedAttrs.get(mentionedAttrs.size() - 2).trim(); } String prev3Attr = "@@"; if (mentionedAttrs.size() - 3 >= 0) { prev3Attr = mentionedAttrs.get(mentionedAttrs.size() - 3).trim(); } String prev4Attr = "@@"; if (mentionedAttrs.size() - 4 >= 0) { prev4Attr = mentionedAttrs.get(mentionedAttrs.size() - 4).trim(); } String prev5Attr = "@@"; if (mentionedAttrs.size() - 5 >= 0) { prev5Attr = mentionedAttrs.get(mentionedAttrs.size() - 5).trim(); } String prevBigramAttr = prev2Attr + "|" + prevAttr; String prevTrigramAttr = prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prev4gramAttr = prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; String prev5gramAttr = prev5Attr + "|" + prev4Attr + "|" + prev3Attr + "|" + prev2Attr + "|" + prevAttr; generalFeatures.put("feature_attr_bigram_" + prevBigramAttr, 1.0); generalFeatures.put("feature_attr_trigram_" + prevTrigramAttr, 1.0); generalFeatures.put("feature_attr_4gram_" + prev4gramAttr, 1.0); generalFeatures.put("feature_attr_5gram_" + prev5gramAttr, 1.0); //If arguments have been generated or not attrValuesAlreadyMentioned.forEach((attr) -> { generalFeatures.put("feature_attr_alreadyMentioned_" + attr, 1.0); }); //If arguments should still be generated or not attrsToBeMentioned.forEach((attr) -> { generalFeatures.put("feature_attr_toBeMentioned_" + attr, 1.0); }); //Attr specific features (and global features) if (availableAttributeActions.containsKey(predicate)) { for (String action : availableAttributeActions.get(predicate)) { if (action.equals(Action.TOKEN_END)) { if (attrsToBeMentioned.isEmpty()) { valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesMentioned", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_allAttrValuesNotMentioned", 1.0); } } else { //Is attr in MR? if (MR.getAttributeValues().get(action) != null) { valueSpecificFeatures.get(action).put("global_feature_specific_isInMR", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_isNotInMR", 1.0); } //Is attr already mentioned right before if (prevAttr.equals(action)) { valueSpecificFeatures.get(action).put("global_feature_specific_attrFollowingSameAttr", 1.0); } else { valueSpecificFeatures.get(action).put("global_feature_specific_attrNotFollowingSameAttr", 1.0); } //Is attr already mentioned attrValuesAlreadyMentioned.stream().map((attrValue) -> { if (attrValue.indexOf('=') == -1) { } return attrValue; }).filter((attrValue) -> (attrValue.substring(0, attrValue.indexOf('=')).equals(action))) .forEachOrdered((_item) -> { valueSpecificFeatures.get(action) .put("global_feature_specific_attrAlreadyMentioned", 1.0); }); //Is attr to be mentioned (has value to express) boolean toBeMentioned = false; for (String attrValue : attrValuesToBeMentioned) { if (attrValue.substring(0, attrValue.indexOf('=')).equals(action)) { toBeMentioned = true; valueSpecificFeatures.get(action).put("global_feature_specific_attrToBeMentioned", 1.0); } } if (!toBeMentioned) { valueSpecificFeatures.get(action).put("global_feature_specific_attrNotToBeMentioned", 1.0); } } HashSet<String> keys = new HashSet<>(valueSpecificFeatures.get(action).keySet()); keys.forEach((feature1) -> { keys.stream() .filter((feature2) -> (valueSpecificFeatures.get(action).get(feature1) == 1.0 && valueSpecificFeatures.get(action).get(feature2) == 1.0 && feature1.compareTo(feature2) < 0)) .forEachOrdered((feature2) -> { valueSpecificFeatures.get(action).put(feature1 + "&&" + feature2, 1.0); }); }); String nextValue = chooseNextValue(action, attrValuesToBeMentioned); if (nextValue.isEmpty() && !action.equals(Action.TOKEN_END)) { valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", 0.0); } else { ArrayList<String> fullGramLM = new ArrayList<>(); for (int i = 0; i < mentionedAttrValues.size(); i++) { fullGramLM.add(mentionedAttrValues.get(i)); } ArrayList<String> prev5attrValueGramLM = new ArrayList<>(); int j = 0; for (int i = mentionedAttrValues.size() - 1; (i >= 0 && j < 5); i--) { prev5attrValueGramLM.add(0, mentionedAttrValues.get(i)); j++; } if (!action.equals(Action.TOKEN_END)) { prev5attrValueGramLM.add(action + "=" + chooseNextValue(action, attrValuesToBeMentioned)); } else { prev5attrValueGramLM.add(action); } while (prev5attrValueGramLM.size() < 4) { prev5attrValueGramLM.add(0, "@@"); } double afterLMScore = getContentLMsPerPredicate().get(predicate) .getProbability(prev5attrValueGramLM); valueSpecificFeatures.get(action).put("global_feature_LMAttr_score", afterLMScore); afterLMScore = getContentLMsPerPredicate().get(predicate).getProbability(fullGramLM); valueSpecificFeatures.get(action).put("global_feature_LMAttrFull_score", afterLMScore); } } } return new Instance(generalFeatures, valueSpecificFeatures, costs); }
From source file:structuredPredictionNLG.SFX.java
/** * * @param di/* w w w .j a v a 2s . c om*/ * @param wordSequence * @return */ @Override public String postProcessWordSequence(DatasetInstance di, ArrayList<Action> wordSequence) { HashSet<ArrayList<Action>> matched = new HashSet<>(); ArrayList<Action> processedWordSequence = new ArrayList<>(); wordSequence.forEach((act) -> { processedWordSequence.add(new Action(act)); }); if (!processedWordSequence.isEmpty() && processedWordSequence.get(processedWordSequence.size() - 1).getWord().equals(Action.TOKEN_END) && processedWordSequence.get(processedWordSequence.size() - 1).getAttribute() .equals(Action.TOKEN_END)) { processedWordSequence.remove(processedWordSequence.size() - 1); } if (getPunctuationPatterns().containsKey(di.getMeaningRepresentation().getPredicate())) { getPunctuationPatterns().get(di.getMeaningRepresentation().getPredicate()).keySet() .forEach((surrounds) -> { int beforeNulls = 0; if (surrounds.get(0) == null) { beforeNulls++; } if (surrounds.get(1) == null) { beforeNulls++; } for (int i = 0 - beforeNulls; i < processedWordSequence.size(); i++) { boolean matches = true; int m = 0; for (int s = 0; s < surrounds.size(); s++) { if (surrounds.get(s) != null) { if (i + s < processedWordSequence.size()) { if (!processedWordSequence.get(i + s).getWord().equals(surrounds.get(s) .getWord()) /*|| !cleanActionList.get(i).getAttribute().equals(surrounds.get(s).getAttribute())*/) { matches = false; s = surrounds.size(); } else { m++; } } else { matches = false; s = surrounds.size(); } } else if (s < 2 && i + s >= 0) { matches = false; s = surrounds.size(); } else if (s >= 2 && i + s < processedWordSequence.size()) { matches = false; s = surrounds.size(); } } if (matches && m > 0) { matched.add(surrounds); processedWordSequence.add(i + 2, getPunctuationPatterns() .get(di.getMeaningRepresentation().getPredicate()).get(surrounds)); } } }); } boolean isLastPunct = true; if (processedWordSequence.contains(new Action("and", ""))) { for (int i = processedWordSequence.size() - 1; i > 0; i--) { if (processedWordSequence.get(i).getWord().equals(",") && isLastPunct) { isLastPunct = false; processedWordSequence.get(i).setWord("and"); } else if (processedWordSequence.get(i).getWord().equals("and") && isLastPunct) { isLastPunct = false; } else if (processedWordSequence.get(i).getWord().equals("and") && !isLastPunct) { processedWordSequence.get(i).setWord(","); } } } ArrayList<Action> cleanActionList = new ArrayList<>(); processedWordSequence.stream().filter((action) -> (!action.getWord().equals(Action.TOKEN_START) && !action.getWord().equals(Action.TOKEN_END))).forEachOrdered((action) -> { cleanActionList.add(action); }); String predictedWordSequence = " "; boolean previousIsTokenX = false; for (Action action : cleanActionList) { if (action.getWord().startsWith(Action.TOKEN_X)) { predictedWordSequence += " " + di.getMeaningRepresentation().getDelexicalizationMap().get(action.getWord()); previousIsTokenX = true; } else { if (action.getWord().equals("-ly") && previousIsTokenX) { predictedWordSequence += "ly"; } else if (action.getWord().equals("s") && previousIsTokenX) { predictedWordSequence += action.getWord(); } else { predictedWordSequence += " " + action.getWord(); } previousIsTokenX = false; } } predictedWordSequence = predictedWordSequence.trim(); if (di.getMeaningRepresentation().getPredicate().startsWith("?") && !predictedWordSequence.endsWith("?")) { if (predictedWordSequence.endsWith(".")) { predictedWordSequence = predictedWordSequence.substring(0, predictedWordSequence.length() - 1); } predictedWordSequence = predictedWordSequence.trim() + "?"; } else if (!predictedWordSequence.endsWith(".") && !predictedWordSequence.endsWith("?")) { /*if (predictedWordSequence.endsWith("?")) { predictedWordSequence = predictedWordSequence.substring(0, predictedWordSequence.length() - 1); }*/ predictedWordSequence = predictedWordSequence.trim() + "."; } predictedWordSequence = predictedWordSequence.replaceAll(" the the ", " the ").replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ") .trim(); predictedWordSequence = predictedWordSequence.replaceAll(" , \\. ", " \\. ") .replaceAll(" and \\. ", " \\. ").replaceAll(" , \\? ", " \\? ").replaceAll(" and \\? ", " \\? ") .replaceAll(" ,\\. ", " \\. ").replaceAll(" and\\. ", " \\. ").replaceAll(" ,\\? ", " \\? ") .replaceAll(" and\\? ", " \\? ").trim(); /*for (String comp : sillyCompositeWordsInData.keySet()) { predictedWordSequence = predictedWordSequence.replaceAll(comp, sillyCompositeWordsInData.get(comp)); }*/ if (predictedWordSequence.startsWith(",") || predictedWordSequence.startsWith(".") || predictedWordSequence.startsWith("?")) { predictedWordSequence = predictedWordSequence.substring(1).trim(); } if (predictedWordSequence.startsWith(",")) { System.out.println(wordSequence); System.out.println(matched); } return predictedWordSequence; }
From source file:structuredPredictionNLG.SFX.java
/** * * @param classifierAttrs/*from ww w .ja va2 s . c o m*/ * @param classifierWords * @param testingData * @param epoch * @return */ @Override public Double evaluateGeneration(HashMap<String, JAROW> classifierAttrs, HashMap<String, HashMap<String, JAROW>> classifierWords, ArrayList<DatasetInstance> testingData, int epoch) { System.out.println("Evaluate argument generation "); ArrayList<ScoredFeaturizedTranslation<IString, String>> generations = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<Action>> generationActions = new HashMap<>(); ArrayList<ArrayList<Sequence<IString>>> finalReferences = new ArrayList<>(); HashMap<DatasetInstance, ArrayList<String>> finalReferencesWordSequences = new HashMap<>(); HashMap<DatasetInstance, String> predictedWordSequences_overAllPredicates = new HashMap<>(); ArrayList<String> allPredictedWordSequences = new ArrayList<>(); ArrayList<String> allPredictedMRStr = new ArrayList<>(); ArrayList<ArrayList<String>> allPredictedReferences = new ArrayList<>(); HashMap<String, Double> attrCoverage = new HashMap<>(); HashMap<String, HashSet<String>> abstractMRsToMRs = new HashMap<>(); for (DatasetInstance di : testingData) { String predicate = di.getMeaningRepresentation().getPredicate(); ArrayList<Action> predictedActionList = new ArrayList<>(); ArrayList<Action> predictedWordList = new ArrayList<>(); //PHRASE GENERATION EVALUATION String predictedAttr = ""; ArrayList<String> predictedAttrValues = new ArrayList<>(); HashSet<String> attrValuesToBeMentioned = new HashSet<>(); HashSet<String> attrValuesAlreadyMentioned = new HashSet<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } while (!predictedAttr.equals(Action.TOKEN_END) && predictedAttrValues.size() < getMaxContentSequenceLength()) { if (!predictedAttr.isEmpty()) { attrValuesToBeMentioned.remove(predictedAttr); } if (!attrValuesToBeMentioned.isEmpty()) { Instance attrTrainingVector = createContentInstance(predicate, "@TOK@", predictedAttrValues, attrValuesAlreadyMentioned, attrValuesToBeMentioned, di.getMeaningRepresentation(), getAvailableContentActions()); if (attrTrainingVector != null) { Prediction predictAttr = classifierAttrs.get(predicate).predict(attrTrainingVector); if (predictAttr.getLabel() != null) { predictedAttr = predictAttr.getLabel().trim(); if (!classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation().getAttributeValues().keySet())) { System.out.println("MR ATTR NOT IN CLASSIFIERS"); System.out .println(classifierAttrs.get(predicate).getCurrentWeightVectors().keySet()); } String predictedValue = ""; if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); HashSet<String> rejectedAttrs = new HashSet<>(); while (predictedValue.isEmpty() && (!predictedAttr.equals(Action.TOKEN_END) || (predictedAttrValues.isEmpty() && classifierAttrs.get(predicate).getCurrentWeightVectors().keySet() .containsAll(di.getMeaningRepresentation() .getAttributeValues().keySet())))) { rejectedAttrs.add(predictedAttr); predictedAttr = Action.TOKEN_END; double maxScore = -Double.MAX_VALUE; for (String attr : predictAttr.getLabel2Score().keySet()) { if (!rejectedAttrs.contains(attr) && (Double .compare(predictAttr.getLabel2Score().get(attr), maxScore) > 0)) { maxScore = predictAttr.getLabel2Score().get(attr); predictedAttr = attr; } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedValue = chooseNextValue(predictedAttr, attrValuesToBeMentioned); } } } if (!predictedAttr.equals(Action.TOKEN_END)) { predictedAttr += "=" + predictedValue; } predictedAttrValues.add(predictedAttr); if (!predictedAttr.isEmpty()) { attrValuesAlreadyMentioned.add(predictedAttr); attrValuesToBeMentioned.remove(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } else { predictedAttr = Action.TOKEN_END; predictedAttrValues.add(predictedAttr); } } //WORD SEQUENCE EVALUATION predictedAttr = ""; ArrayList<String> predictedAttributes = new ArrayList<>(); attrValuesToBeMentioned = new HashSet<>(); attrValuesAlreadyMentioned = new HashSet<>(); HashMap<String, ArrayList<String>> valuesToBeMentioned = new HashMap<>(); for (String attribute : di.getMeaningRepresentation().getAttributeValues().keySet()) { for (String value : di.getMeaningRepresentation().getAttributeValues().get(attribute)) { attrValuesToBeMentioned.add(attribute.toLowerCase() + "=" + value.toLowerCase()); } valuesToBeMentioned.put(attribute, new ArrayList<>(di.getMeaningRepresentation().getAttributeValues().get(attribute))); } if (attrValuesToBeMentioned.isEmpty()) { attrValuesToBeMentioned.add("empty=empty"); } HashSet<String> attrValuesToBeMentionedCopy = new HashSet<>(attrValuesToBeMentioned); int a = -1; for (String attrValue : predictedAttrValues) { a++; if (!attrValue.equals(Action.TOKEN_END)) { String attribute = attrValue.split("=")[0]; predictedAttributes.add(attrValue); //GENERATE PHRASES if (!attribute.equals(Action.TOKEN_END)) { if (classifierWords.get(predicate).containsKey(attribute)) { ArrayList<String> nextAttributesForInstance = new ArrayList<>( predictedAttrValues.subList(a + 1, predictedAttrValues.size())); String predictedWord = ""; boolean isValueMentioned = false; String valueTBM = ""; if (attrValue.contains("=")) { valueTBM = attrValue.substring(attrValue.indexOf('=') + 1); } if (valueTBM.isEmpty()) { isValueMentioned = true; } ArrayList<String> subPhrase = new ArrayList<>(); while (!predictedWord.equals(Action.TOKEN_END) && predictedWordList.size() < getMaxWordSequenceLength()) { ArrayList<String> predictedAttributesForInstance = new ArrayList<>(); for (int i = 0; i < predictedAttributes.size() - 1; i++) { predictedAttributesForInstance.add(predictedAttributes.get(i)); } if (!predictedAttributes.get(predictedAttributes.size() - 1).equals(attrValue)) { predictedAttributesForInstance .add(predictedAttributes.get(predictedAttributes.size() - 1)); } Instance wordTrainingVector = createWordInstance(predicate, new Action("@TOK@", attrValue), predictedAttributesForInstance, predictedActionList, nextAttributesForInstance, attrValuesAlreadyMentioned, attrValuesToBeMentioned, isValueMentioned, getAvailableWordActions().get(predicate)); if (wordTrainingVector != null && classifierWords.get(predicate) != null) { if (classifierWords.get(predicate).get(attribute) != null) { Prediction predictWord = classifierWords.get(predicate).get(attribute) .predict(wordTrainingVector); if (predictWord.getLabel() != null) { predictedWord = predictWord.getLabel().trim(); while (predictedWord.equals(Action.TOKEN_END) && !predictedActionList.isEmpty() && predictedActionList.get(predictedActionList.size() - 1) .getWord().equals(Action.TOKEN_END)) { double maxScore = -Double.MAX_VALUE; for (String word : predictWord.getLabel2Score().keySet()) { if (!word.equals(Action.TOKEN_END) && (Double.compare( predictWord.getLabel2Score().get(word), maxScore) > 0)) { maxScore = predictWord.getLabel2Score().get(word); predictedWord = word; } } } predictedActionList.add(new Action(predictedWord, attrValue)); if (!predictedWord.equals(Action.TOKEN_START) && !predictedWord.equals(Action.TOKEN_END)) { subPhrase.add(predictedWord); predictedWordList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } if (!isValueMentioned) { if (!predictedWord.equals(Action.TOKEN_END)) { if (predictedWord.startsWith(Action.TOKEN_X) && (valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { isValueMentioned = true; } else if (!predictedWord.startsWith(Action.TOKEN_X) && !(valueTBM.matches("\"[xX][0-9]+\"") || valueTBM.matches("[xX][0-9]+") || valueTBM.startsWith(Action.TOKEN_X))) { String valueToCheck = valueTBM; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { if (attribute.contains("=")) { valueToCheck = attribute.replace("=", ":"); } else { valueToCheck = attribute + ":" + valueTBM; } } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { isValueMentioned = true; break; } } } } } if (isValueMentioned) { attrValuesAlreadyMentioned.add(attrValue); attrValuesToBeMentioned.remove(attrValue); } } String mentionedAttrValue = ""; if (!predictedWord.startsWith(Action.TOKEN_X)) { for (String attrValueTBM : attrValuesToBeMentioned) { if (attrValueTBM.contains("=")) { String value = attrValueTBM.substring(attrValueTBM.indexOf('=') + 1); if (!(value.matches("\"[xX][0-9]+\"") || value.matches("[xX][0-9]+") || value.startsWith(Action.TOKEN_X))) { String valueToCheck = value; if (valueToCheck.equals("no") || valueToCheck.equals("yes") || valueToCheck.equals("yes or no") || valueToCheck.equals("none") //|| valueToCheck.equals("dont_care") || valueToCheck.equals("empty")) { valueToCheck = attrValueTBM.replace("=", ":"); } if (!valueToCheck.equals("empty:empty") && getValueAlignments().containsKey(valueToCheck)) { for (ArrayList<String> alignedStr : getValueAlignments() .get(valueToCheck).keySet()) { if (endsWith(subPhrase, alignedStr)) { mentionedAttrValue = attrValueTBM; break; } } } } } } } if (!mentionedAttrValue.isEmpty()) { attrValuesAlreadyMentioned.add(mentionedAttrValue); attrValuesToBeMentioned.remove(mentionedAttrValue); } } if (predictedWordList.size() >= getMaxWordSequenceLength() && !predictedActionList .get(predictedActionList.size() - 1).getWord().equals(Action.TOKEN_END)) { predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } else { String predictedWord = Action.TOKEN_END; predictedActionList.add(new Action(predictedWord, attrValue)); } } } } ArrayList<String> predictedAttrs = new ArrayList<>(); predictedAttrValues.forEach((attributeValuePair) -> { predictedAttrs.add(attributeValuePair.split("=")[0]); }); String predictedWordSequence = postProcessWordSequence(di, predictedActionList); ArrayList<String> predictedAttrList = getPredictedAttrList(predictedActionList); if (attrValuesToBeMentionedCopy.size() != 0.0) { double missingAttrs = 0.0; missingAttrs = attrValuesToBeMentionedCopy.stream() .filter((attr) -> (!predictedAttrList.contains(attr))).map((_item) -> 1.0) .reduce(missingAttrs, (accumulator, _item) -> accumulator + _item); double attrSize = attrValuesToBeMentionedCopy.size(); attrCoverage.put(predictedWordSequence, missingAttrs / attrSize); } allPredictedWordSequences.add(predictedWordSequence); allPredictedMRStr.add(di.getMeaningRepresentation().getMRstr()); predictedWordSequences_overAllPredicates.put(di, predictedWordSequence); if (!abstractMRsToMRs.containsKey(di.getMeaningRepresentation().getAbstractMR())) { abstractMRsToMRs.put(di.getMeaningRepresentation().getAbstractMR(), new HashSet<String>()); } abstractMRsToMRs.get(di.getMeaningRepresentation().getAbstractMR()) .add(di.getMeaningRepresentation().getMRstr()); Sequence<IString> translation = IStrings .tokenize(NISTTokenizer.tokenize(predictedWordSequence.toLowerCase())); ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>(translation, null, 0); generations.add(tran); generationActions.put(di, predictedActionList); ArrayList<Sequence<IString>> references = new ArrayList<>(); ArrayList<String> referencesStrings = new ArrayList<>(); if (getPerformEvaluationOn().equals("valid") || getPerformEvaluationOn().equals("train")) { for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } else { references = wenEvaluationReferenceSequences.get(di.getMeaningRepresentation().getMRstr()); referencesStrings = wenEvaluationReferences.get(di.getMeaningRepresentation().getMRstr()); if (references == null) { references = new ArrayList<>(); referencesStrings = new ArrayList<>(); for (String ref : di.getEvaluationReferences()) { referencesStrings.add(ref); references.add(IStrings.tokenize(NISTTokenizer.tokenize(ref))); } } } allPredictedReferences.add(referencesStrings); finalReferencesWordSequences.put(di, referencesStrings); finalReferences.add(references); } BLEUMetric BLEU = new BLEUMetric(finalReferences, 4, false); Double bleuScore = BLEU.score(generations); double finalCoverageError = 0.0; finalCoverageError = attrCoverage.values().stream().map((c) -> c).reduce(finalCoverageError, (accumulator, _item) -> accumulator + _item); finalCoverageError /= attrCoverage.size(); for (int i = 0; i < allPredictedWordSequences.size(); i++) { double maxRouge = 0.0; String predictedWordSequence = allPredictedWordSequences.get(i).replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ") .trim(); for (String ref : allPredictedReferences.get(i)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } //System.out.println(allPredictedMRStr.get(i) + "\t" + maxRouge + "\t" + allPredictedWordSequences.get(i) + "\t" + refs); } double avgRougeScore = 0.0; String detailedRes = ""; avgRougeScore = testingData.stream().map((di) -> { double maxRouge = 0.0; if (!finalReferencesWordSequences.containsKey(di)) { System.out.println(di.getMeaningRepresentation().getAbstractMR()); } String predictedWordSequence = predictedWordSequences_overAllPredicates.get(di) .replaceAll("\\?", " \\? ").replaceAll(":", " : ").replaceAll("\\.", " \\. ") .replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } return maxRouge; }).map((maxRouge) -> maxRouge).reduce(avgRougeScore, (accumulator, _item) -> accumulator + _item); System.out.println("BLEU: \t" + bleuScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); //System.out.println("g: " + generations); //System.out.println("attr: " + predictedAttrLists); //System.out.println("BLEU smooth: \t" + bleuSmoothScore); System.out.println("ROUGE: \t" + (avgRougeScore / allPredictedWordSequences.size())); System.out.println("COVERAGE ERROR: \t" + finalCoverageError); System.out.println("BRC: \t" + ((avgRougeScore / allPredictedWordSequences.size()) + bleuScore + (1.0 - finalCoverageError)) / 3.0); if (isCalculateResultsPerPredicate()) { //////////////////////// //ArrayList<String> bestPredictedStrings = new ArrayList<>(); //ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueMRsInTestAndNotInTrainAllPredWordBLEU = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordROUGE = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR = 0.0; double uniqueMRsInTestAndNotInTrainAllPredWordBRC = 0.0; detailedRes = ""; ArrayList<DatasetInstance> abstractMRList = new ArrayList<>(); HashSet<String> reportedAbstractMRs = new HashSet<>(); testingData.stream() .filter((di) -> (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR()))) .map((di) -> { reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); return di; }).forEachOrdered((di) -> { boolean isInTraining = false; for (DatasetInstance di2 : getTrainingData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } if (!isInTraining) { for (DatasetInstance di2 : getValidationData()) { if (di2.getMeaningRepresentation().getAbstractMR() .equals(di.getMeaningRepresentation().getAbstractMR())) { isInTraining = true; } } } if (!isInTraining) { abstractMRList.add(di); } }); for (DatasetInstance di : abstractMRList) { Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU += bestBLEU; uniqueMRsInTestAndNotInTrainAllPredWordROUGE += bestROUGE; uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR += bestCover; uniqueMRsInTestAndNotInTrainAllPredWordBRC += bestHarmonicMean; } uniqueMRsInTestAndNotInTrainAllPredWordBLEU /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordROUGE /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR /= abstractMRList.size(); uniqueMRsInTestAndNotInTrainAllPredWordBRC /= abstractMRList.size(); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BLEU: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBLEU); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED ROUGE: \t" + uniqueMRsInTestAndNotInTrainAllPredWordROUGE); System.out.println("UNIQUE (NOT IN TRAIN) WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueMRsInTestAndNotInTrainAllPredWordCOVERAGEERR)); System.out.println( "UNIQUE (NOT IN TRAIN) WORD ALL PRED BRC: \t" + uniqueMRsInTestAndNotInTrainAllPredWordBRC); abstractMRList.forEach((di) -> { System.out.println(di.getMeaningRepresentation().getAbstractMR() + "\t" + predictedWordSequences_overAllPredicates.get(di)); }); System.out.println("TOTAL SET SIZE: \t" + abstractMRList.size()); //System.out.println(abstractMRList); //System.out.println(detailedRes); } ArrayList<String> bestPredictedStrings = new ArrayList<>(); ArrayList<String> bestPredictedStringsMRs = new ArrayList<>(); double uniqueAllPredWordBLEU = 0.0; double uniqueAllPredWordROUGE = 0.0; double uniqueAllPredWordCOVERAGEERR = 0.0; double uniqueAllPredWordBRC = 0.0; HashSet<String> reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (!reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ").replaceAll(":", " : ") .replaceAll("\\.", " \\. ").replaceAll(",", " , ").replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniqueAllPredWordBLEU += bestBLEU; uniqueAllPredWordROUGE += bestROUGE; uniqueAllPredWordCOVERAGEERR += bestCover; uniqueAllPredWordBRC += bestHarmonicMean; } //} } if (isCalculateResultsPerPredicate()) { uniqueAllPredWordBLEU /= reportedAbstractMRs.size(); uniqueAllPredWordROUGE /= reportedAbstractMRs.size(); uniqueAllPredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniqueAllPredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD ALL PRED BLEU: \t" + uniqueAllPredWordBLEU); System.out.println("UNIQUE WORD ALL PRED ROUGE: \t" + uniqueAllPredWordROUGE); System.out.println("UNIQUE WORD ALL PRED COVERAGE ERROR: \t" + (1.0 - uniqueAllPredWordCOVERAGEERR)); System.out.println("UNIQUE WORD ALL PRED BRC: \t" + uniqueAllPredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL: \t" + reportedAbstractMRs.size()); //////////////////////// for (String predicate : getPredicates()) { detailedRes = ""; bestPredictedStrings = new ArrayList<>(); bestPredictedStringsMRs = new ArrayList<>(); double uniquePredWordBLEU = 0.0; double uniquePredWordROUGE = 0.0; double uniquePredWordCOVERAGEERR = 0.0; double uniquePredWordBRC = 0.0; reportedAbstractMRs = new HashSet<>(); for (DatasetInstance di : testingData) { if (di.getMeaningRepresentation().getPredicate().equals(predicate) && !reportedAbstractMRs.contains(di.getMeaningRepresentation().getAbstractMR())) { String bestPredictedString = ""; Double bestROUGE = -100.0; Double bestBLEU = -100.0; Double bestCover = -100.0; Double bestHarmonicMean = -100.0; String predictedString = predictedWordSequences_overAllPredicates.get(di); reportedAbstractMRs.add(di.getMeaningRepresentation().getAbstractMR()); double maxRouge = 0.0; String predictedWordSequence = predictedString.replaceAll("\\?", " \\? ") .replaceAll(":", " : ").replaceAll("\\.", " \\. ").replaceAll(",", " , ") .replaceAll(" ", " ").trim(); for (String ref : finalReferencesWordSequences.get(di)) { double rouge = Rouge.ROUGE_N(predictedWordSequence, ref, 4); if (rouge > maxRouge) { maxRouge = rouge; } } double BLEUSmooth = BLEUMetric.computeLocalSmoothScore(predictedWordSequence, finalReferencesWordSequences.get(di), 4); double cover = 1.0 - attrCoverage.get(predictedString); double harmonicMean = 3.0 / (1.0 / BLEUSmooth + 1.0 / maxRouge + 1.0 / cover); if (harmonicMean > bestHarmonicMean) { bestPredictedString = predictedString; bestROUGE = maxRouge; bestBLEU = BLEUSmooth; bestCover = cover; bestHarmonicMean = harmonicMean; } bestPredictedStrings.add(bestPredictedString); bestPredictedStringsMRs.add(di.getMeaningRepresentation().getMRstr()); uniquePredWordBLEU += bestBLEU; uniquePredWordROUGE += bestROUGE; uniquePredWordCOVERAGEERR += bestCover; uniquePredWordBRC += bestHarmonicMean; } } uniquePredWordBLEU /= reportedAbstractMRs.size(); uniquePredWordROUGE /= reportedAbstractMRs.size(); uniquePredWordCOVERAGEERR /= reportedAbstractMRs.size(); uniquePredWordBRC /= reportedAbstractMRs.size(); System.out.println("UNIQUE WORD " + predicate + " BLEU: \t" + uniquePredWordBLEU); System.out.println("UNIQUE WORD " + predicate + " ROUGE: \t" + uniquePredWordROUGE); System.out.println( "UNIQUE WORD " + predicate + " COVERAGE ERROR: \t" + (1.0 - uniquePredWordCOVERAGEERR)); System.out.println("UNIQUE WORD " + predicate + " BRC: \t" + uniquePredWordBRC); System.out.println(detailedRes); System.out.println("TOTAL " + predicate + ": \t" + reportedAbstractMRs.size()); } } if (isCalculateResultsPerPredicate()) { BufferedWriter bw = null; File f = null; try { f = new File("results/random_SFX" + getDataset() + "TextsAfter" + (epoch) + "_" + JLOLS.sentenceCorrectionFurtherSteps + "_" + JLOLS.p + "epochsTESTINGDATA.txt"); } catch (NullPointerException e) { } try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f))); } catch (FileNotFoundException e) { } try { bw.write("BLEU:" + bleuScore); bw.write("\n"); } catch (IOException e) { } for (int i = 0; i < bestPredictedStrings.size(); i++) { try { String mr = bestPredictedStringsMRs.get(i); bw.write("MR;" + mr.replaceAll(";", ",") + ";"); if (getDataset().equals("hotel")) { bw.write("LOLS_SFHOT;"); } else { bw.write("LOLS_SFRES;"); } bw.write("\n"); } catch (IOException e) { } } try { bw.close(); } catch (IOException e) { } } return bleuScore; }