Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.rya.indexing.smarturi.duplication; import static java.util.Objects.requireNonNull; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.TreeSet; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.lang.StringUtils; import org.apache.rya.api.domain.RyaType; import org.apache.rya.api.domain.RyaURI; import org.apache.rya.api.resolver.impl.DateTimeRyaTypeResolver; import org.apache.rya.indexing.entity.model.Entity; import org.apache.rya.indexing.entity.model.Property; import org.apache.rya.indexing.smarturi.SmartUriAdapter; import org.apache.rya.indexing.smarturi.SmartUriException; import org.apache.rya.indexing.smarturi.duplication.conf.DuplicateDataConfig; import org.calrissian.mango.types.exception.TypeEncodingException; import org.joda.time.DateTime; import org.openrdf.model.URI; import org.openrdf.model.impl.URIImpl; import org.openrdf.model.vocabulary.XMLSchema; import com.google.common.collect.ImmutableMap; /** * Detects if two entities contain data that's nearly identical based on a set * tolerance for each field's type. Two entities are considered nearly * identical if all their properties are equal and/or within the specified * tolerance for the property's object type. Setting all object type tolerances * to 0 means that the objects need to be exactly equal to each other to be * considered duplicates. Duplicate data detection can be enabled/disabled * through configuration and each object type can have a tolerance based on * either the difference or the percentage difference between the objects being * compared. */ public class DuplicateDataDetector { private final Map<URI, ApproxEqualsDetector<?>> uriMap = new HashMap<>(); private final Map<Class<?>, ApproxEqualsDetector<?>> classMap = new HashMap<>(); private boolean isDetectionEnabled; /** * Creates a new instance of {@link DuplicateDataDetector} with the * values provided by the configuration file. * @param duplicateDataConfig the {@link DuplicateDataConfig} */ public DuplicateDataDetector(final DuplicateDataConfig duplicateDataConfig) { this(duplicateDataConfig.getBooleanTolerance(), duplicateDataConfig.getByteTolerance(), duplicateDataConfig.getDateTolerance(), duplicateDataConfig.getDoubleTolerance(), duplicateDataConfig.getFloatTolerance(), duplicateDataConfig.getIntegerTolerance(), duplicateDataConfig.getLongTolerance(), duplicateDataConfig.getShortTolerance(), duplicateDataConfig.getStringTolerance(), duplicateDataConfig.getUriTolerance(), duplicateDataConfig.getEquivalentTermsMap(), duplicateDataConfig.isDetectionEnabled()); } /** * Creates a new instance of {@link DuplicateDataDetector} with the values * from the config. * @throws ConfigurationException */ public DuplicateDataDetector() throws ConfigurationException { this(new DuplicateDataConfig()); } /** * Creates a new instance of {@link DuplicateDataDetector}. * @param tolerance the tolerance to assign to all types. */ public DuplicateDataDetector(final double tolerance) { this(new Tolerance(tolerance, ToleranceType.DIFFERENCE), new LinkedHashMap<>()); } /** * Creates a new instance of {@link DuplicateDataDetector}. * @param tolerance the tolerance to assign to all types. * @param equivalentTermsMap the {@link Map} of terms that are considered * equivalent to each other. (not {@code null}) */ public DuplicateDataDetector(final Tolerance tolerance, final Map<String, List<String>> equivalentTermsMap) { this(tolerance, tolerance, tolerance, tolerance, tolerance, tolerance, tolerance, tolerance, tolerance, tolerance, equivalentTermsMap, true); } /** * Creates a new instance of {@link DuplicateDataDetector}. * @param booleanTolerance the {@link Boolean} tolerance value or * {@code null} if not specified. * @param byteTolerance the {@link Byte} tolerance value or {@code null} if * not specified. * @param dateTolerance the {@link Date} tolerance value or {@code null} if * not specified. * @param doubleTolerance the {@link Double} tolerance value or {@code null} * if not specified. * @param floatTolerance the {@link Float} tolerance value or {@code null} * if not specified. * @param integerTolerance the {@link Integer} tolerance value or * {@code null} if not specified. * @param longTolerance the {@link Long} tolerance value or {@code null} if * not specified. * @param shortTolerance the {@link Short} tolerance value or {@code null} * if not specified. * @param stringTolerance the {@link String} tolerance value or {@code null} * if not specified. * @param uriTolerance the {@link URI} tolerance value or {@code null} if * not specified. * @param equivalentTermsMap the {@link Map} of terms that are considered * equivalent to each other. (not {@code null}) * @param isDetectionEnabled {@code true} to enable detection. {@code false} * to disable detection. */ public DuplicateDataDetector(final Tolerance booleanTolerance, final Tolerance byteTolerance, final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance, final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance, final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap, final boolean isDetectionEnabled) { init(booleanTolerance, byteTolerance, dateTolerance, doubleTolerance, floatTolerance, integerTolerance, longTolerance, shortTolerance, stringTolerance, uriTolerance, equivalentTermsMap, isDetectionEnabled); } private void init(final Tolerance booleanTolerance, final Tolerance byteTolerance, final Tolerance dateTolerance, final Tolerance doubleTolerance, final Tolerance floatTolerance, final Tolerance integerTolerance, final Tolerance longTolerance, final Tolerance shortTolerance, final Tolerance stringTolerance, final Tolerance uriTolerance, final Map<String, List<String>> equivalentTermsMap, final boolean isDetectionEnabled) { final List<ApproxEqualsDetector<?>> detectors = new ArrayList<>(); detectors.add(new BooleanApproxEqualsDetector(booleanTolerance)); detectors.add(new ByteApproxEqualsDetector(byteTolerance)); detectors.add(new DateApproxEqualsDetector(dateTolerance)); detectors.add(new DateTimeApproxEqualsDetector(dateTolerance)); detectors.add(new DoubleApproxEqualsDetector(doubleTolerance)); detectors.add(new FloatApproxEqualsDetector(floatTolerance)); detectors.add(new IntegerApproxEqualsDetector(integerTolerance)); detectors.add(new LongApproxEqualsDetector(longTolerance)); detectors.add(new ShortApproxEqualsDetector(shortTolerance)); detectors.add(new StringApproxEqualsDetector(stringTolerance, equivalentTermsMap)); detectors.add(new UriApproxEqualsDetector(uriTolerance)); for (final ApproxEqualsDetector<?> approxEqualsDetector : detectors) { uriMap.put(approxEqualsDetector.getXmlSchemaUri(), approxEqualsDetector); classMap.put(approxEqualsDetector.getTypeClass(), approxEqualsDetector); } this.isDetectionEnabled = isDetectionEnabled; } /** * @return {@code true} to enable detection. {@code false} to disable * detection. */ public boolean isDetectionEnabled() { return isDetectionEnabled; } /** * Removes any duplicate (nearly identical) entities from the collection * of entities. * @param entities the {@link List} of {@link Entity}s. (not {@code null}) * @throws SmartUriException */ public void removeDuplicatesFromCollection(final List<Entity> entities) throws SmartUriException { requireNonNull(entities); // Use a Sorted Set in reverse order to hold the indices final Set<Integer> indicesToRemove = new TreeSet<>((a, b) -> Integer.compare(b, a)); if (entities != null && entities.size() > 1) { // Compare all entities to each other while avoiding making the // same comparisons again and not comparing an entity to itself. for (int i = 0; i < entities.size() - 1; i++) { final Entity entity1 = entities.get(i); for (int j = entities.size() - 1; j > i; j--) { final Entity entity2 = entities.get(j); final boolean areDuplicates = compareEntities(entity1, entity2); if (areDuplicates) { indicesToRemove.add(j); } } } } if (!indicesToRemove.isEmpty()) { // Remove indices in reverse order (already sorted in descending // order so just loop through them) for (final int index : indicesToRemove) { entities.remove(index); } } } /** * Compares two Smart URI's to determine if they have nearly identical data. * @param uri1 the first Smart {@link URI}. (not {@code null}) * @param uri2 the second Smart {@link URI}. (not {@code null}) * @return {@code true} if the two Smart URI's have nearly identical data. * {@code false} otherwise. * @throws SmartUriException */ public boolean compareSmartUris(final URI uri1, final URI uri2) throws SmartUriException { requireNonNull(uri1); requireNonNull(uri2); final Entity entity1 = SmartUriAdapter.deserializeUriEntity(uri1); final Entity entity2 = SmartUriAdapter.deserializeUriEntity(uri2); return compareEntities(entity1, entity2); } /** * Compares two entities to determine if they have nearly identical data. * @param entity1 the first {@link Entity}. (not {@code null}) * @param entity2 the second {@link Entity}. (not {@code null}) * @return {@code true} if the two entities have nearly identical data. * {@code false} otherwise. * @throws SmartUriException */ public boolean compareEntities(final Entity entity1, final Entity entity2) throws SmartUriException { requireNonNull(entity1); requireNonNull(entity2); boolean allValuesNearlyEqual = true; final List<RyaURI> types1 = entity1.getExplicitTypeIds(); final List<RyaURI> types2 = entity2.getExplicitTypeIds(); final boolean doBothHaveSameTypes = types1.containsAll(types2); if (!doBothHaveSameTypes) { return false; } for (final Entry<RyaURI, ImmutableMap<RyaURI, Property>> entry : entity1.getProperties().entrySet()) { final RyaURI typeIdUri = entry.getKey(); for (final Entry<RyaURI, Property> typeProperty : entry.getValue().entrySet()) { final RyaURI propertyNameUri = typeProperty.getKey(); final Property property1 = typeProperty.getValue(); final Optional<Property> p2 = entity2.lookupTypeProperty(typeIdUri, propertyNameUri); if (p2.isPresent()) { final Property property2 = p2.get(); final RyaType value1 = property1.getValue(); final RyaType value2 = property2.getValue(); final String data1 = value1.getData(); final String data2 = value2.getData(); final URI xmlSchemaUri1 = value1.getDataType(); final ApproxEqualsDetector<?> approxEqualsDetector = uriMap.get(xmlSchemaUri1); if (approxEqualsDetector == null) { throw new SmartUriException("No appropriate detector found for the type: " + xmlSchemaUri1); } final boolean approxEquals = approxEqualsDetector.areApproxEquals(data1, data2); if (!approxEquals) { allValuesNearlyEqual = false; break; } } else { allValuesNearlyEqual = false; break; } } if (!allValuesNearlyEqual) { break; } } return allValuesNearlyEqual; } /** * Gets the appropriate {@link ApproxEqualsDetector} for the specified * class. * @param clazz the {@link Class} to find an {@link ApproxEqualsDetector} * for. * @return the {@link ApproxEqualsDetector} for the class or {@code null} if * none could be found. */ public ApproxEqualsDetector<?> getDetectorForType(final Class<?> clazz) { return classMap.get(clazz); } private static boolean isOnlyOneNull(final Object lhs, final Object rhs) { return (lhs == null && rhs != null) || (lhs != null && rhs == null); } /** * Class to detect if two booleans are considered approximately equal to * each other. */ public static class BooleanApproxEqualsDetector implements ApproxEqualsDetector<Boolean> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link BooleanApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public BooleanApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Boolean lhs, final Boolean rhs) { // Should never be almost equals when tolerance is 0, only exactly equals // Otherwise if there's any tolerance specified everything is equal return tolerance.getValue() == 0 ? Objects.equals(lhs, rhs) : true; } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Boolean convertStringToObject(final String string) throws SmartUriException { return Boolean.valueOf(string); } @Override public Class<?> getTypeClass() { return Boolean.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.BOOLEAN; } } /** * Class to detect if two bytes are considered approximately equal to each * other. */ public static class ByteApproxEqualsDetector implements ApproxEqualsDetector<Byte> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link ByteApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public ByteApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Byte lhs, final Byte rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Byte convertStringToObject(final String string) throws SmartUriException { return Byte.valueOf(string); } @Override public Class<?> getTypeClass() { return Byte.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.BYTE; } } /** * Class to detect if two dates are considered approximately equal to each * other. */ public static class DateApproxEqualsDetector implements ApproxEqualsDetector<Date> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(500.0, ToleranceType.DIFFERENCE); // milliseconds private final Tolerance tolerance; /** * Creates a new instance of {@link DateApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public DateApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Date lhs, final Date rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance final long lhsTime = lhs.getTime(); final long rhsTime = rhs.getTime(); switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhsTime == 0) { return lhsTime == rhsTime; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhsTime - rhsTime) / lhsTime) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhsTime - rhsTime) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Date convertStringToObject(final String string) throws SmartUriException { DateTime dateTime = null; try { dateTime = DateTime.parse(string, DateTimeRyaTypeResolver.XMLDATETIME_PARSER); } catch (final TypeEncodingException e) { throw new SmartUriException("Exception occurred serializing data[" + string + "]", e); } final Date date = dateTime.toDate(); return date; } @Override public Class<?> getTypeClass() { return Date.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.DATE; } } /** * Class to detect if two datetimes are considered approximately equal to * each other. */ public static class DateTimeApproxEqualsDetector implements ApproxEqualsDetector<DateTime> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(500.0, ToleranceType.DIFFERENCE); // milliseconds private final Tolerance tolerance; /** * Creates a new instance of {@link DateTimeApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public DateTimeApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final DateTime lhs, final DateTime rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance final long lhsTime = lhs.getMillis(); final long rhsTime = rhs.getMillis(); switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhsTime == 0) { return lhsTime == rhsTime; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhsTime - rhsTime) / lhsTime) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhsTime - rhsTime) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public DateTime convertStringToObject(final String string) throws SmartUriException { DateTime dateTime = null; try { dateTime = DateTime.parse(string, DateTimeRyaTypeResolver.XMLDATETIME_PARSER); } catch (final TypeEncodingException e) { throw new SmartUriException("Exception occurred serializing data[" + string + "]", e); } return dateTime; } @Override public Class<?> getTypeClass() { return DateTime.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.DATETIME; } } /** * Class to detect if two doubles are considered approximately equal to each * other. */ public static class DoubleApproxEqualsDetector implements ApproxEqualsDetector<Double> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0001, ToleranceType.PERCENTAGE); private final Tolerance tolerance; /** * Creates a new instance of {@link DoubleApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public DoubleApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Double lhs, final Double rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Doubles can be unpredictable with how they store a value // like 0.1. So use BigDecimal with its String constructor // to make things more predictable. final BigDecimal lhsBd = new BigDecimal(String.valueOf(lhs)); final BigDecimal rhsBd = new BigDecimal(String.valueOf(rhs)); switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } final BigDecimal absDiff = lhsBd.subtract(rhsBd).abs(); try { final BigDecimal percent = absDiff.divide(lhsBd); return percent.doubleValue() <= tolerance.getValue(); } catch (final ArithmeticException e) { // BigDecimal quotient did not have a terminating // decimal expansion. So, try without BigDecimal. return (Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); } case DIFFERENCE: default: final BigDecimal absDiff1 = lhsBd.subtract(rhsBd).abs(); return absDiff1.doubleValue() <= tolerance.getValue(); //return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Double convertStringToObject(final String string) throws SmartUriException { return Double.valueOf(string); } @Override public Class<?> getTypeClass() { return Double.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.DOUBLE; } } /** * Class to detect if two floats are considered approximately equal to each * other. */ public static class FloatApproxEqualsDetector implements ApproxEqualsDetector<Float> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.0001, ToleranceType.PERCENTAGE); private final Tolerance tolerance; /** * Creates a new instance of {@link FloatApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public FloatApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Float lhs, final Float rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance // Floats can be unpredictable with how they store a value // like 0.1. So use BigDecimal with its String constructor // to make things more predictable. final BigDecimal lhsBd = new BigDecimal(String.valueOf(lhs)); final BigDecimal rhsBd = new BigDecimal(String.valueOf(rhs)); switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } final BigDecimal absDiff = lhsBd.subtract(rhsBd).abs(); try { final BigDecimal percent = absDiff.divide(lhsBd); return percent.floatValue() <= tolerance.getValue(); } catch (final ArithmeticException e) { // BigDecimal quotient did not have a terminating // decimal expansion. So, try without BigDecimal. return ((double) Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); } case DIFFERENCE: default: final BigDecimal absDiff1 = lhsBd.subtract(rhsBd).abs(); return absDiff1.floatValue() <= tolerance.getValue(); //return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Float convertStringToObject(final String string) throws SmartUriException { return Float.valueOf(string); } @Override public Class<?> getTypeClass() { return Float.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.FLOAT; } } /** * Class to detect if two integers are considered approximately equal to * each other. */ public static class IntegerApproxEqualsDetector implements ApproxEqualsDetector<Integer> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link IntegerApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public IntegerApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Integer lhs, final Integer rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Integer convertStringToObject(final String string) throws SmartUriException { return Integer.valueOf(string); } @Override public Class<?> getTypeClass() { return Integer.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.INTEGER; } } /** * Class to detect if two longs are considered approximately equal to * each other. */ public static class LongApproxEqualsDetector implements ApproxEqualsDetector<Long> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link LongApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public LongApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Long lhs, final Long rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Long convertStringToObject(final String string) throws SmartUriException { return Long.valueOf(string); } @Override public Class<?> getTypeClass() { return Long.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.LONG; } } /** * Class to detect if two shorts are considered approximately equal to each * other. */ public static class ShortApproxEqualsDetector implements ApproxEqualsDetector<Short> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link ShortApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public ShortApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final Short lhs, final Short rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs == 0) { return lhs == rhs; } if (tolerance.getValue() >= 1) { return true; } return ((double) Math.abs(lhs - rhs) / lhs) <= tolerance.getValue(); case DIFFERENCE: default: return Math.abs(lhs - rhs) <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public Short convertStringToObject(final String string) throws SmartUriException { return Short.valueOf(string); } @Override public Class<?> getTypeClass() { return Short.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.SHORT; } } /** * Class to detect if two string are considered approximately equal to each * other. */ public static class StringApproxEqualsDetector implements ApproxEqualsDetector<String> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(0.05, ToleranceType.PERCENTAGE); private final Tolerance tolerance; private final Map<String, List<String>> equivalentTermsMap; /** * Creates a new instance of {@link StringApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public StringApproxEqualsDetector(final Tolerance tolerance, final Map<String, List<String>> equivalentTermsMap) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); this.equivalentTermsMap = equivalentTermsMap; } @Override public boolean areObjectsApproxEquals(final String lhs, final String rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (StringUtils.equalsIgnoreCase(lhs, rhs)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } // Only check one-way. Terms are not bi-directionally equivalent // unless specified. final List<String> lhsTermEquivalents = equivalentTermsMap.get(lhs); if (lhsTermEquivalents != null && lhsTermEquivalents.contains(rhs)) { return true; } final int distance = StringUtils.getLevenshteinDistance(lhs, rhs); // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (lhs.length() == 0) { return lhs.length() == rhs.length(); } if (tolerance.getValue() >= 1) { return true; } return ((double) distance / lhs.length()) <= tolerance.getValue(); case DIFFERENCE: default: return distance <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public String convertStringToObject(final String string) throws SmartUriException { return string; } @Override public Class<?> getTypeClass() { return String.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.STRING; } } /** * Class to detect if two URIs are considered approximately equal to each * other. */ public static class UriApproxEqualsDetector implements ApproxEqualsDetector<URI> { private static final Tolerance DEFAULT_TOLERANCE = new Tolerance(1.0, ToleranceType.DIFFERENCE); private final Tolerance tolerance; /** * Creates a new instance of {@link UriApproxEqualsDetector}. * @param tolerance the {@link Tolerance}. */ public UriApproxEqualsDetector(final Tolerance tolerance) { this.tolerance = tolerance != null ? tolerance : getDefaultTolerance(); } @Override public boolean areObjectsApproxEquals(final URI lhs, final URI rhs) { if (isOnlyOneNull(lhs, rhs)) { return false; } if (Objects.equals(lhs, rhs)) { return true; } final String uriString1 = lhs.stringValue(); final String uriString2 = rhs.stringValue(); if (StringUtils.equalsIgnoreCase(uriString1, uriString2)) { // They're exactly equals so get out return true; } else if (tolerance.getValue() == 0) { // If they're not exactly equals with zero tolerance then get out return false; } final int distance = StringUtils.getLevenshteinDistance(uriString1, uriString2); // Check based on tolerance switch (tolerance.getToleranceType()) { case PERCENTAGE: if (uriString1.length() == 0) { return uriString1.length() == uriString2.length(); } if (tolerance.getValue() >= 1) { return true; } return ((double) distance / uriString1.length()) <= tolerance.getValue(); case DIFFERENCE: default: return distance <= tolerance.getValue(); } } @Override public Tolerance getDefaultTolerance() { return DEFAULT_TOLERANCE; } @Override public URI convertStringToObject(final String string) throws SmartUriException { return new URIImpl(string); } @Override public Class<?> getTypeClass() { return URI.class; } @Override public URI getXmlSchemaUri() { return XMLSchema.ANYURI; } } }