eu.apenet.dpt.utils.util.Ead2EdmInformation.java Source code

Java tutorial

Introduction

Here is the source code for eu.apenet.dpt.utils.util.Ead2EdmInformation.java

Source

package eu.apenet.dpt.utils.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.TreeSet;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * User: Yoann Moranville Date: 15/04/2013
 *
 * @author Yoann Moranville
 */
public class Ead2EdmInformation {

    private static final Logger LOG = Logger.getLogger(Ead2EdmInformation.class);
    private String languageCode;
    private String repository;
    private String roleType;
    private StringBuilder archdescValue = new StringBuilder();
    private String archdescRepository;
    private TreeSet<String> languagesCodes; // List of languages exist inside "<langusage>" element.
    private TreeSet<String> alternativeLanguages; // List of languages exist inside "<archdesc><langmaterial>" element.
    private boolean languagesOnParent; // Check the existence of languages in "<archdesc>" element.
    private boolean languagesOnAllCLevels; // Check the existence of languages in all "<c>" elements.
    private String userestrictDaoLicenceType;
    private String userestrictDaoLicenceLink;
    private TreeSet<String> userestrictDaoLicenceText;
    private boolean licensesOnParent; // Check the existence of licenses in "<archdesc>" element.
    private boolean licensesOnAllCLevels; // Check the existence of licenses in all "<c>" elements.
    private String archdescLicenceLink;
    private String archdescLicenceType;
    private TreeSet<String> archdescLicenceText;
    private String archdescUnittitle;
    private String titlestmtTitleproper;

    public String getLanguageCode() {
        return this.languageCode;
    }

    public String getRepository() {
        return this.repository;
    }

    public String getRoleType() {
        return this.roleType;
    }

    public String getArchdescRepository() {
        return this.archdescRepository;
    }

    public TreeSet<String> getLanguagesCodes() {
        return this.languagesCodes;
    }

    public TreeSet<String> getAlternativeLanguages() {
        return this.alternativeLanguages;
    }

    public boolean isLanguagesOnParent() {
        return this.languagesOnParent;
    }

    public boolean isLanguagesOnAllCLevels() {
        return this.languagesOnAllCLevels;
    }

    public String getUserestrictDaoLicenceType() {
        return userestrictDaoLicenceType;
    }

    public String getUserestrictDaoLicenceLink() {
        return userestrictDaoLicenceLink;
    }

    public TreeSet<String> getUserestrictDaoLicenceText() {
        return this.userestrictDaoLicenceText;
    }

    public boolean isLicensesOnParent() {
        return licensesOnParent;
    }

    public boolean isLicensesOnAllCLevels() {
        return licensesOnAllCLevels;
    }

    public String getArchdescLicenceLink() {
        return archdescLicenceLink;
    }

    public String getArchdescLicenceType() {
        return archdescLicenceType;
    }

    public TreeSet<String> getArchdescLicenceText() {
        return archdescLicenceText;
    }

    public String getArchdescUnittitle() {
        return archdescUnittitle;
    }

    public String getTitlestmtTitleproper() {
        return titlestmtTitleproper;
    }

    public Ead2EdmInformation(File fileToRead, String databaseRoleType, String archdescRepository)
            throws IOException, SAXException, ParserConfigurationException {
        this();
        this.archdescRepository = archdescRepository;
        this.languagesCodes = new TreeSet<String>();
        this.alternativeLanguages = new TreeSet<String>();
        this.languagesOnParent = false;
        this.languagesOnAllCLevels = true;
        this.languageCode = "";
        this.repository = "";
        this.roleType = databaseRoleType;
        this.userestrictDaoLicenceType = "";
        this.userestrictDaoLicenceLink = "";
        this.userestrictDaoLicenceText = new TreeSet<String>();
        this.licensesOnParent = false;
        this.licensesOnAllCLevels = true;
        this.archdescLicenceLink = "";
        this.archdescLicenceType = "";
        this.archdescLicenceText = new TreeSet<String>();
        this.archdescUnittitle = "";
        this.titlestmtTitleproper = "";
        this.determineDaoInformation(fileToRead);
    }

    public Ead2EdmInformation() {
        this.languageCode = "";
        this.repository = "";
        this.roleType = "";
        this.archdescRepository = "";
        this.languagesCodes = new TreeSet<String>();
        this.alternativeLanguages = new TreeSet<String>();
        this.languagesOnParent = false;
        this.languagesOnAllCLevels = true;
        this.userestrictDaoLicenceType = "";
        this.userestrictDaoLicenceLink = "";
        this.userestrictDaoLicenceText = new TreeSet<String>();
        this.licensesOnParent = false;
        this.licensesOnAllCLevels = true;
        this.archdescLicenceLink = "";
        this.archdescLicenceType = "";
        this.archdescLicenceText = new TreeSet<String>();
        this.archdescUnittitle = "";
        this.titlestmtTitleproper = "";
    }

    private void determineDaoInformation(File fileToRead)
            throws IOException, SAXException, ParserConfigurationException {
        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();
        EadContentHandler myContentHandler = new EadContentHandler();
        xr.setContentHandler(myContentHandler);
        xr.parse(new InputSource(new InputStreamReader(new BOMInputStream(new FileInputStream(fileToRead)))));

        if (this.roleType == null) {
            this.roleType = "UNSPECIFIED";
        }
    }

    private class EadContentHandler extends DefaultHandler {

        private static final String CREATIVECOMMONS_CPDM = "cpdm";
        private static final String CREATIVECOMMONS_CC0 = "cc0";
        private static final String CREATIVECOMMONS = "creativecommons";
        private static final String EUROPEANA = "europeana";
        private static final String OUT_OF_COPYRIGHT = "outofcopyright";
        private static final String CREATIVECOMMONS_CPDM_LINK = "http://creativecommons.org/publicdomain/mark/";
        private static final String CREATIVECOMMONS_CC0_LINK = "http://creativecommons.org/publicdomain/zero/";
        private static final String CREATIVECOMMONS_LINK = "http://creativecommons.org/licenses/";
        private static final String EUROPEANA_LINK = "http://www.europeana.eu/rights/";
        private static final String OUT_OF_COPYRIGHT_LINK = "http://www.europeana.eu/portal/rights/out-of-copyright";

        private boolean inLangusage = false;
        private boolean inArchdesc = false;
        private boolean inC = false;
        private boolean inDid = false;
        private boolean inRepository = false;
        private boolean inUserestrictDao = false;
        private boolean inP = false;
        private boolean inExtref = false;
        private boolean hasDao = false;
        private boolean inUnittitle = false;
        private boolean inFiledesc = false;
        private boolean inTitlestmt = false;
        private boolean inTitleproper = false;

        //flags for first occuring type and DAO in XML
        private boolean typeRetrieved = false;
        private boolean daoRetrieved = false;
        private boolean metsLicenceRetrieved;
        private boolean languageOnCLevel;

        private boolean hasDataFromRepository = false;
        private boolean stillInFirstDataRepository = true;

        @Override
        public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
                throws SAXException {

            if (qName.equals("langusage")) {
                this.inLangusage = true;
            } else if (qName.equals("archdesc")) {
                this.inArchdesc = true;
            } else if (qName.equals("c")) {
                this.inC = true;
                this.metsLicenceRetrieved = false;
            } else if (qName.equals("did")) {
                this.inDid = true;
                if (this.inC) {
                    this.languageOnCLevel = false;
                }
            } else if (qName.equals("dao")) {
                this.hasDao = true;
                if (this.typeRetrieved == false && atts.getValue("xlink:role") != null) {
                    roleType = atts.getValue("xlink:role");
                    this.typeRetrieved = true;
                }
            } else if (qName.equals("repository")) {
                this.inRepository = true;
            } else if (qName.equals("language")) {
                // Check if the language is in element "<langusage>".
                if (this.inLangusage && atts.getValue("langcode") != null) {
                    languagesCodes.add(atts.getValue("langcode"));
                } else if (this.inArchdesc && !this.inC && this.inDid && !this.hasDao
                        && atts.getValue("langcode") != null) {
                    // Check if the language is in element "<archdesc><langmaterial>".
                    languagesOnParent = true;
                    alternativeLanguages.add(atts.getValue("langcode"));
                } else if (this.inC && this.inDid && atts.getValue("langcode") != null) {
                    // Check if the language is in element "<c>".
                    if ("".equals(languageCode)) {
                        languageCode = atts.getValue("langcode");
                    }
                    this.languageOnCLevel = true;
                }
            } else if (qName.equals("userestrict")) {
                if (atts.getValue("type") != null) {
                    if ("dao".equals(atts.getValue("type"))) {
                        this.inUserestrictDao = true;
                    }
                }
            } else if (qName.equals("p")) {
                this.inP = true;
            } else if (qName.equals("extref")) {
                this.inExtref = true;
                if (this.metsLicenceRetrieved == false && this.inUserestrictDao) {
                    userestrictDaoLicenceLink = atts.getValue("xlink:href");
                    if (userestrictDaoLicenceLink.startsWith(CREATIVECOMMONS_LINK)) {
                        userestrictDaoLicenceType = CREATIVECOMMONS;
                    } else if (userestrictDaoLicenceLink.startsWith(CREATIVECOMMONS_CC0_LINK)) {
                        userestrictDaoLicenceType = CREATIVECOMMONS_CC0;
                    } else if (userestrictDaoLicenceLink.startsWith(CREATIVECOMMONS_CPDM_LINK)) {
                        userestrictDaoLicenceType = CREATIVECOMMONS_CPDM;
                    } else if (userestrictDaoLicenceLink.startsWith(EUROPEANA_LINK)) {
                        userestrictDaoLicenceType = EUROPEANA;
                    } else if (userestrictDaoLicenceLink.startsWith(OUT_OF_COPYRIGHT_LINK)) {
                        userestrictDaoLicenceType = OUT_OF_COPYRIGHT;
                    } else {
                        userestrictDaoLicenceType = "";
                    }
                    if (this.inArchdesc && !this.inC && userestrictDaoLicenceType != "") {
                        archdescLicenceLink = userestrictDaoLicenceLink;
                        archdescLicenceType = userestrictDaoLicenceType;
                        licensesOnParent = true;
                    }
                }
            } else if (qName.equals("unittitle")) {
                this.inUnittitle = true;
            } else if (qName.equals("filedesc")) {
                this.inFiledesc = true;
            } else if (qName.equals("titlestmt")) {
                this.inTitlestmt = true;
            } else if (qName.equals("titleproper")) {
                this.inTitleproper = true;
            }
        }

        @Override
        public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
            if (qName.equals("langusage")) {
                this.inLangusage = false;
            } else if (qName.equals("archdesc")) {
                this.inArchdesc = false;
            } else if (qName.equals("c")) {
                this.inC = false;
                if (!this.metsLicenceRetrieved) {
                    licensesOnAllCLevels = false;
                    LOG.debug("No license present in c level.");
                }
            } else if (qName.equals("did")) {
                this.inDid = false;

                // Set the value of no language present in all C Levels if necessary.
                if (this.inC) {
                    if (!this.languageOnCLevel) {
                        languagesOnAllCLevels = false;
                        LOG.debug("No language present in c level.");
                    }
                }
            } else if (qName.equals("dao")) {
                // Nothing to do here
            } else if (qName.equals("repository")) {
                this.inRepository = false;
                if (hasDataFromRepository) {
                    stillInFirstDataRepository = false;
                }
            } else if (qName.equals("language")) {
                // Nothing to do here
            } else if (qName.equals("userestrict")) {
                if (this.inUserestrictDao) {
                    this.metsLicenceRetrieved = true;
                }
                this.inUserestrictDao = false;
            } else if (qName.equals("p")) {
                this.inP = false;
            } else if (qName.equals("extref")) {
                this.inExtref = false;
            } else if (qName.equals("unittitle")) {
                this.inUnittitle = false;
            } else if (qName.equals("filedesc")) {
                this.inFiledesc = false;
            } else if (qName.equals("titlestmt")) {
                this.inTitlestmt = false;
            } else if (qName.equals("titleproper")) {
                this.inTitleproper = false;
            }
        }

        @Override
        public void characters(char ch[], int start, int length) {
            if (this.inRepository && (!hasDataFromRepository || stillInFirstDataRepository)) {
                int index = 0;
                String textBetween = new String(ch, start, length);
                while (index < textBetween.length()) {
                    if (textBetween.charAt(index) >= ' ') {
                        index++;
                    } else {
                        break;
                    }
                }
                if (this.inArchdesc && !this.inC) {
                    if (archdescRepository != null && !archdescRepository.endsWith(" ")) {
                        archdescValue.append(" ");
                    }
                    archdescValue.append(textBetween.substring(0, index));
                    archdescRepository = archdescValue.toString();
                    if (StringUtils.isNotBlank(archdescRepository)) {
                        hasDataFromRepository = true;
                    }
                }
            }
            if (this.inP) {
                if (this.metsLicenceRetrieved == false && this.inUserestrictDao && this.inExtref == false) {
                    String textBetween = new String(ch, start, length);
                    if (StringUtils.isNotEmpty(textBetween.trim())) {
                        userestrictDaoLicenceText.add(textBetween);
                        if (this.inArchdesc && !this.inC) {
                            archdescLicenceText.add(textBetween);
                        }
                    }
                }
            }
            if (this.inArchdesc && !this.inC && this.inDid && this.inUnittitle) {
                archdescUnittitle = new String(ch, start, length);
            }
            if (this.inTitlestmt && this.inTitleproper) {
                titlestmtTitleproper = new String(ch, start, length);
            }
        }
    }
}