eu.delving.metadata.RecordValidator.java Source code

Java tutorial

Introduction

Here is the source code for eu.delving.metadata.RecordValidator.java

Source

/*
 * Copyright 2010 DELVING BV
 *
 *  Licensed under the EUPL, Version 1.0 or? as soon they
 *  will be approved by the European Commission - subsequent
 *  versions of the EUPL (the "Licence");
 *  you may not use this work except in compliance with the
 *  Licence.
 *  You may obtain a copy of the Licence at:
 *
 *  http://ec.europa.eu/idabc/eupl
 *
 *  Unless required by applicable law or agreed to in
 *  writing, software distributed under the Licence is
 *  distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.
 *  See the Licence for the specific language governing
 *  permissions and limitations under the Licence.
 */

package eu.delving.metadata;

import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.xml.sax.InputSource;

import java.io.StringReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 * Parse, filter, validate a record
 *
 * @author Gerald de Jong <geralddejong@gmail.com>
 */

public class RecordValidator {
    private Logger log = Logger.getLogger(getClass());
    private RecordDefinition recordDefinition;
    private List<FieldDefinition> validatableFields = new ArrayList<FieldDefinition>();
    private Uniqueness idUniqueness;
    private String context;
    private int contextBegin, contextEnd;
    private long totalParseTime, totalValidateTime, totalWriteTime;
    private SAXReader reader = new SAXReader();

    public RecordValidator(RecordDefinition recordDefinition) {
        this.recordDefinition = recordDefinition;
        StringBuilder contextString = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<validate\n");
        for (NamespaceDefinition namespaceDefinition : recordDefinition.namespaces) {
            contextString.append(
                    String.format("xmlns:%s=\"%s\"\n", namespaceDefinition.prefix, namespaceDefinition.uri));
        }
        contextString.append(">\n%s</validate>\n");
        this.context = contextString.toString();
        this.contextBegin = this.context.indexOf("%s");
        int afterPercentS = contextBegin + 2;
        this.contextEnd = this.context.length() - afterPercentS;
        for (FieldDefinition fieldDefinition : recordDefinition.getMappableFields()) {
            if (fieldDefinition.validation != null) {
                validatableFields.add(fieldDefinition);
            }
        }
    }

    public void guardUniqueness(Uniqueness uniqueness) {
        this.idUniqueness = uniqueness;
    }

    public String validateRecord(String recordString, List<String> problems) {
        if (!recordString.contains("<")) {
            return recordString;
        }
        String contextualizedRecord = String.format(context, recordString);
        StringWriter out = new StringWriter();
        try {
            long before = System.currentTimeMillis();
            InputSource source = new InputSource(new StringReader(contextualizedRecord));
            source.setEncoding("UTF-8");
            Document document = reader.read(source);
            totalParseTime += System.currentTimeMillis() - before;
            Map<Path, Counter> counters = new TreeMap<Path, Counter>();
            before = System.currentTimeMillis();
            validateDocument(document, problems, new TreeSet<String>(), counters);
            validateCardinalities(counters, problems);
            totalValidateTime += System.currentTimeMillis() - before;
            before = System.currentTimeMillis();
            XMLWriter writer = new XMLWriter(out, OutputFormat.createPrettyPrint());
            writer.write(document);
            totalWriteTime += System.currentTimeMillis() - before;
        } catch (Exception e) {
            problems.add("Problem parsing: " + e.toString());
            return "Invalid";
        }
        out.getBuffer().delete(0, contextBegin);
        out.getBuffer().delete(out.getBuffer().length() - contextEnd, out.getBuffer().length());
        return out.toString();
    }

    public void report() {
        log.info(String.format("Parse %d", totalParseTime));
        log.info(String.format("Validate %d", totalValidateTime));
        log.info(String.format("Write %d", totalWriteTime));
    }

    private void validateCardinalities(Map<Path, Counter> counters, List<String> problems) {
        Map<String, Boolean> requiredGroupMap = new TreeMap<String, Boolean>();
        for (FieldDefinition fieldDefinition : validatableFields) {
            if (fieldDefinition.validation.requiredGroup != null) {
                requiredGroupMap.put(fieldDefinition.validation.requiredGroup, false);
            }
            Counter counter = counters.get(fieldDefinition.path);
            if (!fieldDefinition.validation.multivalued && counter != null && counter.count > 1) {
                problems.add(
                        String.format("Single-valued field [%s] has more than one value", fieldDefinition.path));
            }
        }
        for (Map.Entry<Path, Counter> entry : counters.entrySet()) {
            FieldDefinition fieldDefinition = recordDefinition.getFieldDefinition(entry.getKey());
            if (fieldDefinition.validation != null) {
                if (fieldDefinition.validation.requiredGroup != null) {
                    requiredGroupMap.put(fieldDefinition.validation.requiredGroup, true);
                }
            }
        }
        for (Map.Entry<String, Boolean> entry : requiredGroupMap.entrySet()) {
            if (!entry.getValue()) {
                problems.add(String.format("Required field violation for [%s]", entry.getKey()));
            }
        }
    }

    private void validateDocument(Document document, List<String> problems, Set<String> entries,
            Map<Path, Counter> counters) {
        Element rootElement = document.getRootElement();
        if (!rootElement.getQName().getName().equals("validate")) {
            throw new RuntimeException("Root element should be 'validate'");
        }
        Element recordElement = rootElement.element("record");
        validateElement(recordElement, new Path(), problems, entries, counters);
    }

    private boolean validateElement(Element element, Path path, List<String> problems, Set<String> entries,
            Map<Path, Counter> counters) {
        path.push(Tag.create(element.getNamespacePrefix(), element.getName()));
        boolean hasElements = false;
        Iterator walk = element.elementIterator();
        while (walk.hasNext()) {
            Element subelement = (Element) walk.next();
            boolean remove = validateElement(subelement, path, problems, entries, counters);
            if (remove) {
                walk.remove();
            }
            hasElements = true;
        }
        if (!hasElements) {
            boolean fieldRemove = validatePath(element.getTextTrim(), path, problems, entries, counters);
            path.pop();
            return fieldRemove;
        }
        path.pop();
        return false;
    }

    private boolean validatePath(String text, Path path, List<String> problems, Set<String> entries,
            Map<Path, Counter> counters) {
        FieldDefinition field = recordDefinition.getFieldDefinition(path);
        if (field == null) {
            problems.add(String.format("No field definition found for path [%s]", path));
            return true;
        }
        String entryString = field + "=" + text;
        if (text.isEmpty() || entries.contains(entryString)) {
            return true;
        } else {
            entries.add(entryString);
            Counter counter = counters.get(field.path);
            if (counter == null) {
                counters.put(field.path, counter = new Counter());
            }
            counter.count++;
            validateField(text, field, problems);
            return false;
        }
    }

    private void validateField(String text, FieldDefinition fieldDefinition, List<String> problems) {
        FieldDefinition.Validation validation = fieldDefinition.validation;
        if (validation != null) {
            if (validation.hasOptions()) {
                if (!validation.allowOption(text)) {
                    problems.add(String.format("Value for [%s] was [%s] which does not belong to [%s]",
                            fieldDefinition.path, text, validation.getOptionsString()));
                }
            }
            if (validation.url) {
                try {
                    new URL(text);
                } catch (MalformedURLException e) {
                    problems.add(String.format("URL value for [%s] was [%s] which is malformed",
                            fieldDefinition.path, text));
                }
            }
            if (validation.id && idUniqueness != null) {
                if (idUniqueness.isRepeated(text)) {
                    problems.add(String.format(
                            "Identifier [%s] must be unique but the value [%s] appears more than once",
                            fieldDefinition.path, text));
                }
            }
        }
    }

    private static class Counter {
        int count;
    }

}