eu.europena.research.EdmMetadataConverter.java Source code

Java tutorial

Introduction

Here is the source code for eu.europena.research.EdmMetadataConverter.java

Source

package eu.europena.research;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.GregorianCalendar;
import java.util.HashSet;

import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import eu.europeana.language.nal.EuropeanLanguagesNal;
import eu.europeana.language.nal.NalLanguage;
import eudat.b2sharev2.model.AlternateIdentifier;
import eudat.b2sharev2.model.Contributor;
import eudat.b2sharev2.model.Creator;
import eudat.b2sharev2.model.Description;
import eudat.b2sharev2.model.License;
import eudat.b2sharev2.model.Description.DescriptionType;
import eudat.b2sharev2.model.Record.PublicationState;
import eudat.b2sharev2.model.Record;
import eudat.b2sharev2.model.ResourceType;
import eudat.b2sharev2.model.ResourceType.ResourceTypeGeneral;
import eudat.b2sharev2.model.Title;
import eudat.b2sharev2.model.linguistics.LingResourceType;
import eudat.b2sharev2.model.linguistics.LinguisticsRecord;

public class EdmMetadataConverter {

    public static EuropeanLanguagesNal languagesNal = new EuropeanLanguagesNal();

    public static Record toB2shareMetadata(Document edmDom) {

        try {
            LinguisticsRecord record = new LinguisticsRecord();
            record.setOpenAccess(true);

            NodeList titles = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                    "(//edm:ProvidedCHO/dc:title | //edm:ProvidedCHO/dcterms:alternative)", edmDom);
            for (int i = 0; i < titles.getLength(); i++) {
                String val = XmlUtil.getElementText(((Element) titles.item(i))).trim();
                if (val != null) {
                    Title title = new Title();
                    title.setTitle(val);
                    record.getTitles().add(title);
                    break;
                }
            }

            NodeList creators = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap, "//edm:ProvidedCHO/dc:creator",
                    edmDom);
            for (int i = 0; i < creators.getLength(); i++) {
                String resourceIri = XmlUtil.getElementText(((Element) creators.item(i))).trim();
                String creatorVal = null;
                if (resourceIri != null) {
                    NodeList agents = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                            "//edm:Agent[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                    if (agents.getLength() > 0) {
                        creatorVal = XmlUtil.getElementText(((Element) agents.item(0))).trim();
                    }
                }
                if (creatorVal == null || StringUtils.isEmpty(creatorVal)) {
                    creatorVal = XmlUtil.getElementText(((Element) creators.item(i))).trim();
                }
                if (creatorVal != null && !StringUtils.isEmpty(creatorVal)) {
                    Creator creator = new Creator();
                    creator.setCreatorName(creatorVal);
                    record.getCreators().add(creator);
                }
            }
            {
                NodeList contributoors = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:contributor", edmDom);
                for (int i = 0; i < contributoors.getLength(); i++) {
                    String resourceIri = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    String creatorVal = null;
                    if (resourceIri != null) {
                        NodeList agents = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                                "//edm:Agent[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                        if (agents.getLength() > 0) {
                            creatorVal = XmlUtil.getElementText(((Element) agents.item(0))).trim();
                        }
                    }
                    if (creatorVal == null || StringUtils.isEmpty(creatorVal)) {
                        creatorVal = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    }
                    if (creatorVal != null && !StringUtils.isEmpty(creatorVal)) {
                        Contributor contrib = new Contributor();
                        contrib.setContributorName(creatorVal);
                        record.getContributors().add(contrib);
                    }
                }
            }
            {
                NodeList contributoors = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:publisher", edmDom);
                for (int i = 0; i < contributoors.getLength(); i++) {
                    String resourceIri = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    String creatorVal = null;
                    if (resourceIri != null) {
                        NodeList agents = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                                "//edm:Agent[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                        if (agents.getLength() > 0) {
                            creatorVal = XmlUtil.getElementText(((Element) agents.item(0))).trim();
                        }
                    }
                    if (creatorVal == null || StringUtils.isEmpty(creatorVal)) {
                        creatorVal = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    }
                    if (creatorVal != null && !StringUtils.isEmpty(creatorVal)) {
                        record.setPublisher(creatorVal);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:description", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        Description description = new Description();
                        description.setDescription(desc);
                        description.setDescriptionType(DescriptionType.OTHER);
                        record.getDescriptions().add(description);
                    }
                }
            }
            {
                NodeList contributoors = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:subject", edmDom);
                for (int i = 0; i < contributoors.getLength(); i++) {
                    String resourceIri = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    String creatorVal = null;
                    if (resourceIri != null) {
                        NodeList agents = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                                "//edm:Agent[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                        if (agents.getLength() > 0) {
                            creatorVal = XmlUtil.getElementText(((Element) agents.item(0))).trim();
                        } else {
                            NodeList concepts = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                                    "//edm:Concept[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                            if (concepts.getLength() > 0) {
                                creatorVal = XmlUtil.getElementText(((Element) concepts.item(0))).trim();
                            } else {
                                NodeList places = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                                        "//edm:Place[@rdf:about='" + resourceIri + "']/skos:prefLabel", edmDom);
                                if (concepts.getLength() > 0) {
                                    creatorVal = XmlUtil.getElementText(((Element) places.item(0))).trim();
                                }
                            }
                        }
                    }
                    if (creatorVal == null || StringUtils.isEmpty(creatorVal)) {
                        creatorVal = XmlUtil.getElementText(((Element) contributoors.item(i))).trim();
                    }
                    if (creatorVal != null && !StringUtils.isEmpty(creatorVal)) {
                        record.getKeywords().add(creatorVal);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dcterms:tableOfContents", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        Description description = new Description();
                        description.setDescription(desc);
                        description.setDescriptionType(DescriptionType.TABLE_OF_CONTENTS);
                        record.getDescriptions().add(description);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dcterms:abstract", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        Description description = new Description();
                        description.setDescription(desc);
                        description.setDescriptionType(DescriptionType.ABSTRACT);
                        record.getDescriptions().add(description);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:identifier", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        AlternateIdentifier altId = new AlternateIdentifier();
                        altId.setAlternateIdentifier(desc);
                        altId.setAlternateIdentifierType("");
                        record.getAlternateIdentifiers().add(altId);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dcterms:issued", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        record.setPublicationDate(desc);
                    }
                }
            }
            {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/edm:rights", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    License license = null;
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        try {
                            license = new License();
                            license.setLicenseUri(new URI(desc));
                            record.setLicense(license);
                            break;
                        } catch (URISyntaxException e) {
                            //invalid URI, do not map
                        }
                    }
                    String resourceIri = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (!StringUtils.isEmpty(resourceIri)) {
                        try {
                            license = new License();
                            record.setLicense(license);
                            URI licUri = new URI(resourceIri);
                            license.setLicenseUri(licUri);
                        } catch (URISyntaxException e) {
                            //invalid URI, do not map
                        }
                    }
                }
            }
            if (record.getLicense() == null) {
                NodeList descriptions = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap,
                        "//edm:ProvidedCHO/dc:rights", edmDom);
                for (int i = 0; i < descriptions.getLength(); i++) {
                    String desc = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    License license = null;
                    if (desc != null && !StringUtils.isEmpty(desc)) {
                        license = new License();
                        license.setLicense(desc);
                        record.setLicense(license);
                    }
                    String resourceIri = XmlUtil.getElementText(((Element) descriptions.item(i))).trim();
                    if (!StringUtils.isEmpty(resourceIri)) {
                        if (license == null) {
                            license = new License();
                            record.setLicense(license);
                        }
                        try {
                            URI licUri = new URI(resourceIri);
                            license.setLicenseUri(licUri);
                        } catch (URISyntaxException e) {
                            //invalid URI, do not map
                        }
                    }
                }
            }

            NodeList dcLangs = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap, "//edm:ProvidedCHO/dc:language",
                    edmDom);
            for (int i = 0; i < dcLangs.getLength(); i++) {
                String lVal = XmlUtil.getElementText(((Element) dcLangs.item(i))).trim();
                NalLanguage nalLang = languagesNal.lookupIsoCode(lVal);
                if (nalLang != null) {
                    record.getCommunityFields().setLanguageCode("eng");
                    break;
                }
            }

            NodeList edmTypes = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap, "//edm:ProvidedCHO/edm:type",
                    edmDom);
            for (int i = 0; i < edmTypes.getLength(); i++) {
                String val = XmlUtil.getElementText(((Element) edmTypes.item(i))).trim();
                LingResourceType lingType = convertEdmType(val);
                if (lingType != null) {
                    record.getCommunityFields().getLingResourceType().add(lingType);
                    ResourceType resourceType = new ResourceType();
                    resourceType.setResourceTypeGeneral(convertEdmTypeToResourceTypeGeneral(val));
                    record.getResourceTypes().add(resourceType);
                }
            }
            record.getCommunityFields().setProjectName("Europeana Newspapers");

            NodeList dcFormats = XPathUtil.queryDom(XmlNsUtil.xpathEdmPrefixMap, "//edm:ProvidedCHO/dc:format",
                    edmDom);
            for (int i = 0; i < dcFormats.getLength(); i++) {
                String val = XmlUtil.getElementText(((Element) dcFormats.item(i))).trim();
                if (val != null && val.startsWith("[OCR confidence]")) {
                    record.getCommunityFields().setQuality(val);
                    break;
                }
            }

            return record;
        } catch (XPathExpressionException e) {
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    private static LingResourceType convertEdmType(String edmVal) {
        if (edmVal.equals("TEXT"))
            return LingResourceType.TEXT;
        if (edmVal.equals("IMAGE"))
            return LingResourceType.IMAGE;
        if (edmVal.equals("SOUND"))
            return LingResourceType.AUDIO;
        if (edmVal.equals("3D"))
            return LingResourceType.OTHER;
        if (edmVal.equals("VIDEO"))
            return LingResourceType.VIDEO;
        return null;
    }

    private static ResourceType.ResourceTypeGeneral convertEdmTypeToResourceTypeGeneral(String edmVal) {
        if (edmVal.equals("TEXT"))
            return ResourceType.ResourceTypeGeneral.TEXT;
        if (edmVal.equals("IMAGE"))
            return ResourceType.ResourceTypeGeneral.IMAGE;
        if (edmVal.equals("SOUND"))
            return ResourceType.ResourceTypeGeneral.SOUND;
        if (edmVal.equals("3D"))
            return ResourceType.ResourceTypeGeneral.OTHER;
        if (edmVal.equals("VIDEO"))
            return ResourceType.ResourceTypeGeneral.AUDIOVISUAL;
        return null;
    }

}