Java tutorial
/* * MIT License * * Copyright (c) 2017 Magdalena Kaiser * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package de.dfki.mmf.attributeselection; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import java.text.DecimalFormat; import java.util.*; /** * Created by Magdalena Kaiser on 17.07.2016. */ /** * Algorithm to retrieve all the available attributes which identify certain world object * and to select the most suitable ones with respect to the used AttributeSelectorType */ public class AttributeSelectionAlgorithm implements Runnable { private JsonObject queriedObject; private List<JsonObject> databaseObjects; protected JsonObject saliencyAnnotations; private Set<JsonObject> identifierSet; private double saliencyThreshold = 0.5; private boolean partialIdentifier = false; public AttributeSelectionAlgorithm(List<JsonObject> databaseObjects, JsonObject queriedObject) { this.databaseObjects = databaseObjects; this.queriedObject = queriedObject; } public AttributeSelectionAlgorithm(List<JsonObject> databaseObjects, JsonObject queriedObject, JsonObject saliencyAnnotations) { this.databaseObjects = databaseObjects; this.queriedObject = queriedObject; this.saliencyAnnotations = saliencyAnnotations; } public synchronized Set<JsonObject> getResult() throws InterruptedException { while (identifierSet == null) { wait(); } return identifierSet; } private void setResult(Set<JsonObject> identifier) { this.identifierSet = identifier; } public void setSaliencyThreshold(double threshold) { this.saliencyThreshold = threshold; } public double getSaliencyThreshold() { return this.saliencyThreshold; } public boolean isPartialIdentifiable() { return partialIdentifier; } public void setPartialIdentifier(boolean partialIdentifier) { this.partialIdentifier = partialIdentifier; } /** * * @param identifierSet * @return shortest attribute set where all used attributes have a saliency value higher than default threshold of 0.8 */ public JsonObject getShortestMostSalientAboveThresholdIdentifier(Set<JsonObject> identifierSet) { //use default threshold of 0.8 return getShortestMostSalientAboveThresholdIdentifier(identifierSet, 0.8); } /** * * @param identifierSet * @param saliencyEndResultThreshold * @return shortest attribute set where all used attributes have a saliency value higher than certain threshold */ public JsonObject getShortestMostSalientAboveThresholdIdentifier(Set<JsonObject> identifierSet, double saliencyEndResultThreshold) { if (identifierSet == null || identifierSet.isEmpty()) { return null; } ArrayList<JsonObject> resultList = new ArrayList<>(identifierSet); Collections.sort(resultList, new CompareSize()); CompareSaliency cmpSaliency = new CompareSaliency(); JsonObject resultMap = new JsonObject(); double oldSaliencyValue = 0.0; int oldSize = resultList.get(0).size(); //check if list element has a saliency value above the threshold boolean foundVisible = false; //use same sigmoid function to normalize threshold which is used to normalize saliency value saliencyEndResultThreshold = saliencyEndResultThreshold / (Math.sqrt(1 + Math.pow(saliencyEndResultThreshold, 2.0))); for (int i = 0; i < resultList.size(); i++) { double saliencyValue = cmpSaliency.computeSaliencyValue(resultList.get(i)); int size = resultList.get(i).size(); //accept only greater size if no visible one has been found yet if (size > oldSize && foundVisible) { break; } //get the most visible one for the smallest possible size if (saliencyValue > saliencyEndResultThreshold && saliencyValue > oldSaliencyValue) { foundVisible = true; resultMap = new JsonObject(); Set<Map.Entry<String, JsonElement>> entrySet = resultList.get(i).entrySet(); for (Map.Entry<String, JsonElement> currentEntry : entrySet) { resultMap.add(currentEntry.getKey(), currentEntry.getValue()); } oldSaliencyValue = saliencyValue; oldSize = size; } } //no JsonObject has saliency value above required threshold -> take last element in list, will have highest value if (resultMap.size() == 0) { Set<Map.Entry<String, JsonElement>> entrySet = resultList.get(resultList.size() - 1).entrySet(); for (Map.Entry<String, JsonElement> entry : entrySet) { resultMap.add(entry.getKey(), entry.getValue()); } } return resultMap; } /** * * @param identifierSet * @return shortest attribute set which identifies queried object */ public JsonObject getShortestIdentifier(Set<JsonObject> identifierSet) { if (identifierSet == null || identifierSet.isEmpty()) { return null; } ArrayList<JsonObject> resultList = new ArrayList<>(identifierSet); Collections.sort(resultList, new CompareSize()); return resultList.get(0); } /** * * @param identifierSet * @return attribute set which maximizes the saliency value */ public JsonObject getMostSalientIdentifier(Set<JsonObject> identifierSet) { if (identifierSet == null || identifierSet.isEmpty()) { return null; } ArrayList<JsonObject> resultList = new ArrayList<>(identifierSet); Collections.sort(resultList, new CompareSaliency()); return resultList.get(0); } /** * * @param identifierSet * @return attribute set which maximize the saliency AND the shortness value */ public JsonObject getMaxSalientMaxShortestIdentifier(Set<JsonObject> identifierSet) { if (identifierSet == null || identifierSet.isEmpty()) { return null; } ArrayList<JsonObject> saliencyResultList = new ArrayList<>(identifierSet); //sort identifiers according to their saliency values Collections.sort(saliencyResultList, new CompareSaliency()); ArrayList<JsonObject> sizeResultList = new ArrayList<>(identifierSet); //sort identifiers according to their length Collections.sort(sizeResultList, new CompareSize()); //determine rank in sizeResultList, if there are several with the same length -> receive same rank HashMap<JsonObject, Integer> sizeMap = new HashMap<>(); int sizeCounter = 1; sizeMap.put(sizeResultList.get(0), sizeCounter); for (int i = 1; i < sizeResultList.size(); i++) { if (sizeResultList.get(i).size() == sizeResultList.get(i - 1).size()) { sizeMap.put(sizeResultList.get(i), sizeCounter); } else { sizeCounter++; sizeMap.put(sizeResultList.get(i), sizeCounter); } } //determine rank in saliencyResultList, if there are several with the same saliency value -> receive same rank HashMap<JsonObject, Integer> saliencyMap = new HashMap<>(); int saliencyCounter = 1; saliencyMap.put(saliencyResultList.get(0), saliencyCounter); CompareSaliency compareSaliency; //use only two decimal places DecimalFormat f = new DecimalFormat("##.00"); for (int i = 1; i < saliencyResultList.size(); i++) { compareSaliency = new CompareSaliency(); if (f.format(compareSaliency.computeSaliencyValue(saliencyResultList.get(i))) .equals(f.format(compareSaliency.computeSaliencyValue(saliencyResultList.get(i - 1))))) { saliencyMap.put(saliencyResultList.get(i), saliencyCounter); } else { saliencyCounter++; saliencyMap.put(saliencyResultList.get(i), saliencyCounter); } } //receive as results those which does have a good rank for both categories //add rank of the two maps and add the difference of their rank as well HashMap<JsonObject, Double> resultMap = new HashMap<>(); for (JsonObject object : sizeMap.keySet()) { if (saliencyMap.containsKey(object)) { //use relative ranks (rank divided by the number of ranks) and then calculate sum and difference double add = ((double) (sizeMap.get(object))) / sizeCounter + ((double) (saliencyMap.get(object))) / saliencyCounter; double diff = ((double) (sizeMap.get(object))) / sizeCounter - ((double) (saliencyMap.get(object))) / saliencyCounter; resultMap.put(object, add + Math.abs(diff)); } } //take the minimal rank value of the resultMap -> best for both categories, if there are several, take the first one Map.Entry<JsonObject, Double> min = Collections.min(resultMap.entrySet(), new Comparator<Map.Entry<JsonObject, Double>>() { public int compare(Map.Entry<JsonObject, Double> entry1, Map.Entry<JsonObject, Double> entry2) { return entry1.getValue().compareTo(entry2.getValue()); } }); return min.getKey(); } @Override public synchronized void run() { //comparison between queriedObject and each database object to find properties which differentiate them List<JsonObject> identifiers = computeSingleDiscriminatingIdentifiers(); if (identifiers == null) { setResult(null); notifyAll(); return; } //create a Jsonobject containing all used discriminating attributes contained in the list of identifiers JsonObject allIdentifiers = new JsonObject(); for (JsonObject object : identifiers) { for (Map.Entry<String, JsonElement> entry : object.entrySet()) { //only add entry if it does not exist yet if (!allIdentifiers.has(entry.getKey())) { allIdentifiers.add(entry.getKey(), entry.getValue()); } } } Set<JsonObject> resultSet; //combine identifiers from single comparisons to receive list with all possible discriminators for the queried object resultSet = combineIdentifiers(identifiers, allIdentifiers); //if resultset is empty -> no identifier found, use partial identifier instead if (resultSet.isEmpty()) { for (JsonObject object : identifiers) { if (object.entrySet().size() != 0) { resultSet.add(object); } } if (!resultSet.isEmpty()) { setPartialIdentifier(true); } } setResult(resultSet); notifyAll(); } /** * @return list with JsonObjects each containing those attributes of the queried object which discriminates it from the compared database object */ public List<JsonObject> computeSingleDiscriminatingIdentifiers() { //data structure to save discriminating identifiers resulting from the comparisons between queried object and each database object ArrayList<JsonObject> identifiers = new ArrayList<>(); for (JsonObject databaseObject : databaseObjects) { //create list for each comparison of queried object properties and the properties of a database object // to save property values which uniquely identify queried object JsonObject singleDiscriminatingIdentifiers = new JsonObject(); //go over each property of the queried object Set<Map.Entry<String, JsonElement>> entrySet = queriedObject.entrySet(); for (Map.Entry<String, JsonElement> entry : entrySet) { String queryKey = entry.getKey(); //check saliencyAnnotation is exists if (saliencyAnnotations != null) { //look up the saliency annotation of the object if (saliencyAnnotations.has(queryKey)) { double saliencyNbr = saliencyAnnotations.get(queryKey).getAsDouble(); //if saliency is lower than threshold -> property will be pruned, continue with next property if (saliencyNbr < saliencyThreshold) { continue; } } else { continue; } } //the values the queried object has for certain property JsonElement queriedPropertyValues = queriedObject.get(queryKey); //check if database object has this property if (databaseObject.has(queryKey)) { //get the values the database object has for this property JsonElement dataPropertyValues = databaseObject.get(queryKey); //check if objects have the same values for this property -> if not add values of queried object to list if ((queriedPropertyValues.isJsonArray() && !dataPropertyValues.isJsonArray()) || (!queriedPropertyValues.isJsonArray() && dataPropertyValues.isJsonArray())) { singleDiscriminatingIdentifiers.add(queryKey, queriedPropertyValues); } else if (!queriedPropertyValues.isJsonArray() && !dataPropertyValues.isJsonArray()) { if (!Objects.equals(queriedPropertyValues.toString(), dataPropertyValues.toString())) { singleDiscriminatingIdentifiers.add(queryKey, queriedPropertyValues); } } else if (queriedPropertyValues.isJsonArray() && dataPropertyValues.isJsonArray()) { JsonArray queryArray = queriedPropertyValues.getAsJsonArray(); JsonArray dataArray = dataPropertyValues.getAsJsonArray(); boolean added = false; for (JsonElement queryElement : queryArray) { if (!dataArray.contains(queryElement)) { singleDiscriminatingIdentifiers.add(queryKey, queriedPropertyValues); added = true; break; } } if (!added) { for (JsonElement dataElement : dataArray) { if (!queryArray.contains(dataElement)) { singleDiscriminatingIdentifiers.add(queryKey, queriedPropertyValues); break; } } } } //compared database object does not have particular property -> can be seen as differentiating } else { singleDiscriminatingIdentifiers.add(queryKey, queriedPropertyValues); } } //add single discriminating identifiers to list with all found identifiers identifiers.add(singleDiscriminatingIdentifiers); } return identifiers; } /** * Combine the identifiers to retrieve a set containing all discriminators for the queried object * @param identifiers individual identifiers * @param allIdentifiers all individual identifiers in one Jsonobject * @return set with all valid identifiers */ private Set<JsonObject> combineIdentifiers(List<JsonObject> identifiers, JsonObject allIdentifiers) { //create the power set for the identifiers Set<JsonObject> powerSet = powerSet(allIdentifiers); ArrayList<JsonObject> powerList = new ArrayList<>(powerSet); boolean found = false; ArrayList<Integer> deleteIndices = new ArrayList<>(); //powerset contains also some non valid identifiers //check if identifiers are valid -> only if the element of the powerset contains an element which is represented in each individual identifier of the list for (int i = 0; i < powerList.size(); i++) { for (int j = 0; j < identifiers.size(); j++) { for (Map.Entry<String, JsonElement> identifierEntry : identifiers.get(j).entrySet()) { for (Map.Entry<String, JsonElement> powerListEntry : powerList.get(i).entrySet()) { //check if at least one element of the current powerset entry is contained in each identifier if (powerListEntry.getKey().equals(identifierEntry.getKey())) { found = true; break; } } } //one entry is not represented by all identifiers -> save index to delete it later if (!found) { deleteIndices.add(i); break; } else { found = false; } } } ArrayList<JsonObject> resultList = new ArrayList<>(); //delete non valid identifiers from the powerset for (int i = 0; i < powerList.size(); i++) { if (!deleteIndices.contains(new Integer(i))) { resultList.add(powerList.get(i)); } } //list with all valid identifiers return new HashSet<>(resultList); } /** * * @param originalObject * @return powerset of original set */ private Set<JsonObject> powerSet(JsonObject originalObject) { HashSet<JsonObject> sets = new HashSet<>(); //check if termination condition is reached if (originalObject.size() == 0) { sets.add(new JsonObject()); return sets; } JsonObject head = new JsonObject(); JsonObject rest = new JsonObject(); //retrieve first object from current set and save it in "head", the remaining objects are saved in "rest" for (Map.Entry<String, JsonElement> entry : originalObject.entrySet()) { if (head.size() < 1) { head.add(entry.getKey(), entry.getValue()); } else { rest.add(entry.getKey(), entry.getValue()); } } //recursive call for combining all possibilities for (JsonObject set : powerSet(rest)) { JsonObject newSet = new JsonObject(); newSet.add(head.entrySet().iterator().next().getKey(), head.entrySet().iterator().next().getValue()); for (Map.Entry<String, JsonElement> entry : set.entrySet()) { newSet.add(entry.getKey(), entry.getValue()); } sets.add(newSet); sets.add(set); } return sets; } /** * Class to compare the size of two different sets (each given as a Jsonobject) */ class CompareSize implements Comparator<JsonObject> { @Override public int compare(JsonObject map1, JsonObject map2) { if (map1.size() > map2.size()) { return 1; } else if (map1.size() < map2.size()) { return -1; } else { return 0; } } } /** * Class to compare saliency of two different sets (each given as a Jsonobject) */ class CompareSaliency implements Comparator<JsonObject> { @Override public int compare(JsonObject map1, JsonObject map2) { double saliencyCounter1 = computeSaliencyValue(map1); double saliencyCounter2 = computeSaliencyValue(map2); if (saliencyCounter1 < saliencyCounter2) { return 1; } else if (saliencyCounter1 > saliencyCounter2) { return -1; } else { return 0; } } /** * Calculates the saliency value of the attributes stored in map * @param map * @return summed saliency value of all properties in map normalized between 0.0 and 1.0 */ protected double computeSaliencyValue(JsonObject map) { double saliencyCounter = 0.0; Set<Map.Entry<String, JsonElement>> entrySet = map.entrySet(); for (Map.Entry<String, JsonElement> entry : entrySet) { if (saliencyAnnotations.has(entry.getKey())) { saliencyCounter += saliencyAnnotations.get(entry.getKey()).getAsDouble(); } } //sigmoid function x/sqrt(1+x^2) used to have saliency counter between 0.0 and 1.0 return saliencyCounter / (Math.sqrt(1 + Math.pow(saliencyCounter, 2.0))); } } }