org.emonocot.job.dwc.write.ArchiveMetadataWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.emonocot.job.dwc.write.ArchiveMetadataWriter.java

Source

/*
 * This is eMonocot, a global online biodiversity information resource.
 *
 * Copyright  20112015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford
 *
 * eMonocot is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * The complete text of the GNU Affero General Public License is in the source repository as the file
 * COPYING.  It is also available from <http://www.gnu.org/licenses/>.
 */
package org.emonocot.job.dwc.write;

import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.emonocot.api.job.TermFactory;
import org.gbif.dwc.terms.Term;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifTerm;

import org.gbif.dwc.text.Archive;
import org.gbif.dwc.text.ArchiveField;
import org.gbif.dwc.text.ArchiveFile;
import org.gbif.dwc.text.MetaDescriptorWriter;
import org.gbif.metadata.eml.Agent;
import org.gbif.metadata.eml.Eml;
import org.gbif.metadata.eml.EmlWriter;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.core.io.FileSystemResource;

import freemarker.template.TemplateException;

public class ArchiveMetadataWriter implements Tasklet {

    private DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern("dd/MM/YYYY");

    private TermFactory termFactory = new TermFactory();

    private Pattern defaultValuesPattern = Pattern.compile("((?:[^\\\\,]|\\\\.)*)(?:,|$)");

    private String archiveFile;

    private String[] taxonFields;

    private Map<String, String> taxonDefaultValues = new HashMap<String, String>();

    private String[] descriptionFields;

    private Map<String, String> descriptionDefaultValues = new HashMap<String, String>();

    private String[] distributionFields;

    private Map<String, String> distributionDefaultValues = new HashMap<String, String>();

    private String[] referenceFields;

    private Map<String, String> referenceDefaultValues = new HashMap<String, String>();

    private String[] imageFields;

    private Map<String, String> imageDefaultValues = new HashMap<String, String>();

    private String[] typeAndSpecimenFields;

    private Map<String, String> typeAndSpecimenDefaultValues = new HashMap<String, String>();

    private String[] measurementOrFactFields;

    private Map<String, String> measurementOrFactDefaultValues = new HashMap<String, String>();

    private String[] vernacularNameFields;

    private Map<String, String> vernacularNameDefaultValues = new HashMap<String, String>();

    private String[] identifierFields;

    private Map<String, String> identifierDefaultValues = new HashMap<String, String>();

    private Character quoteCharacter;

    private String delimiter;

    private FileSystemResource outputDirectory;

    private String citationString;

    private String creatorEmail;

    private String creatorName;

    private String description;

    private String homepageUrl;

    private String identifier;

    private String logoUrl;

    private String publisherEmail;

    private String publisherName;

    private String rights;

    private String subject;

    private String title;

    private int ignoreHeaderLines = 0;

    public void setIgnoreHeaderLines(int ignoreHeaderLines) {
        this.ignoreHeaderLines = ignoreHeaderLines;
    }

    public void setCitationString(String citationString) {
        this.citationString = citationString;
    }

    public void setCreatorEmail(String creatorEmail) {
        this.creatorEmail = creatorEmail;
    }

    public void setCreatorName(String creatorName) {
        this.creatorName = creatorName;
    }

    public void setDescription(String description) {
        this.description = description;
    }

    public void setHomepageUrl(String homepageUrl) {
        this.homepageUrl = homepageUrl;
    }

    public void setIdentifier(String identifier) {
        this.identifier = identifier;
    }

    public void setLogoUrl(String logoUrl) {
        this.logoUrl = logoUrl;
    }

    public void setPublisherEmail(String publisherEmail) {
        this.publisherEmail = publisherEmail;
    }

    public void setPublisherName(String publisherName) {
        this.publisherName = publisherName;
    }

    public void setRights(String rights) {
        this.rights = rights;
    }

    public void setSubject(String subject) {
        this.subject = subject;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public void setQuoteCharacter(Character quoteCharacter) {
        this.quoteCharacter = quoteCharacter;
    }

    public void setDelimiter(String delimiter) {
        this.delimiter = delimiter;
    }

    public void setArchiveFile(String archiveFile) {
        this.archiveFile = archiveFile;
    }

    public void setTaxonDefaultValues(String taxonDefaultValues) {
        this.taxonDefaultValues = toDefaultValues(taxonDefaultValues);
    }

    public void setDescriptionDefaultValues(String descriptionDefaultValues) {
        this.descriptionDefaultValues = toDefaultValues(descriptionDefaultValues);
    }

    public void setDistributionDefaultValues(String distributionDefaultValues) {
        this.distributionDefaultValues = toDefaultValues(distributionDefaultValues);
    }

    public void setReferenceDefaultValuesFields(String referenceDefaultValues) {
        this.referenceDefaultValues = toDefaultValues(referenceDefaultValues);
    }

    public void setImageDefaultValues(String imageDefaultValues) {
        this.imageDefaultValues = toDefaultValues(imageDefaultValues);
    }

    public void setTypeAndSpecimenDefaultValues(String typeAndSpecimenDefaultValues) {
        this.typeAndSpecimenDefaultValues = toDefaultValues(typeAndSpecimenDefaultValues);
    }

    public void setMeasurementOrFactDefaultValues(String measurementOrFactDefaultValues) {
        this.measurementOrFactDefaultValues = toDefaultValues(measurementOrFactDefaultValues);
    }

    public void setVernacularNameDefaultValues(String vernacularNameDefaultValues) {
        this.vernacularNameDefaultValues = toDefaultValues(vernacularNameDefaultValues);
    }

    public void setIdentifierDefaultValues(String identifierDefaultValues) {
        this.identifierDefaultValues = toDefaultValues(identifierDefaultValues);
    }

    public void setReferenceDefaultValues(String referenceDefaultValues) {
        this.referenceDefaultValues = toDefaultValues(referenceDefaultValues);
    }

    public void setTaxonFields(String[] taxonFields) {
        this.taxonFields = taxonFields;
    }

    public void setDescriptionFields(String[] descriptionFields) {
        this.descriptionFields = descriptionFields;
    }

    public void setDistributionFields(String[] distributionFields) {
        this.distributionFields = distributionFields;
    }

    public void setReferenceFields(String[] referenceFields) {
        this.referenceFields = referenceFields;
    }

    public void setImageFields(String[] imageFields) {
        this.imageFields = imageFields;
    }

    public void setTypeAndSpecimenFields(String[] typeAndSpecimenFields) {
        this.typeAndSpecimenFields = typeAndSpecimenFields;
    }

    public void setMeasurementOrFactFields(String[] measurementOrFactFields) {
        this.measurementOrFactFields = measurementOrFactFields;
    }

    public void setVernacularNameFields(String[] vernacularNameFields) {
        this.vernacularNameFields = vernacularNameFields;
    }

    public void setIdentifierFields(String[] identifierFields) {
        this.identifierFields = identifierFields;
    }

    public void setOutputDirectory(FileSystemResource outputDirectory) {
        this.outputDirectory = outputDirectory;
    }

    Map<String, String> toDefaultValues(String defaultValueList) {

        Map<String, String> defaultValues = new HashMap<String, String>();
        if (defaultValueList != null && !defaultValueList.isEmpty()) {
            Matcher matcher = defaultValuesPattern.matcher(defaultValueList);
            while (matcher.find()) {
                String defaultValue = matcher.group(1);
                if (defaultValue.indexOf("=") != -1) {
                    int i = defaultValue.indexOf("=");
                    String key = defaultValue.substring(0, i);
                    String value = defaultValue.substring(i + 1, defaultValue.length());
                    value = value.replace("\\", "");
                    defaultValues.put(key, value);
                }
            }
        }
        return defaultValues;
    }

    public RepeatStatus execute(StepContribution stepContribution, final ChunkContext chunkContext)
            throws Exception {
        Archive archive = new Archive();

        archive.setCore(buildArchiveFile(taxonFields, taxonDefaultValues, DwcTerm.Taxon, DwcTerm.taxonID,
                "taxon.txt", ignoreHeaderLines, "UTF-8", quoteCharacter, delimiter));

        if (descriptionFields != null) {
            archive.addExtension(buildArchiveFile(descriptionFields, descriptionDefaultValues, GbifTerm.Description,
                    DwcTerm.taxonID, "description.txt", ignoreHeaderLines, "UTF-8", quoteCharacter, delimiter));
        }
        if (distributionFields != null) {
            archive.addExtension(buildArchiveFile(distributionFields, distributionDefaultValues,
                    GbifTerm.Distribution, DwcTerm.taxonID, "distribution.txt", ignoreHeaderLines, "UTF-8",
                    quoteCharacter, delimiter));
        }
        if (referenceFields != null) {
            archive.addExtension(buildArchiveFile(referenceFields, referenceDefaultValues, GbifTerm.Reference,
                    DwcTerm.taxonID, "reference.txt", ignoreHeaderLines, "UTF-8", quoteCharacter, delimiter));
        }
        if (imageFields != null) {
            archive.addExtension(buildArchiveFile(imageFields, imageDefaultValues, GbifTerm.Image, DwcTerm.taxonID,
                    "image.txt", ignoreHeaderLines, "UTF-8", quoteCharacter, delimiter));
        }
        if (typeAndSpecimenFields != null) {
            archive.addExtension(buildArchiveFile(typeAndSpecimenFields, typeAndSpecimenDefaultValues,
                    GbifTerm.TypesAndSpecimen, DwcTerm.taxonID, "typeAndSpecimen.txt", ignoreHeaderLines, "UTF-8",
                    quoteCharacter, delimiter));
        }
        if (measurementOrFactFields != null) {
            archive.addExtension(buildArchiveFile(measurementOrFactFields, measurementOrFactDefaultValues,
                    DwcTerm.MeasurementOrFact, DwcTerm.taxonID, "measurementOrFact.txt", ignoreHeaderLines, "UTF-8",
                    quoteCharacter, delimiter));
        }
        if (vernacularNameFields != null) {
            archive.addExtension(buildArchiveFile(vernacularNameFields, vernacularNameDefaultValues,
                    GbifTerm.VernacularName, DwcTerm.taxonID, "vernacularName.txt", ignoreHeaderLines, "UTF-8",
                    quoteCharacter, delimiter));
        }
        if (identifierFields != null) {
            archive.addExtension(buildArchiveFile(identifierFields, identifierDefaultValues, GbifTerm.Identifier,
                    DwcTerm.taxonID, "identifier.txt", ignoreHeaderLines, "UTF-8", quoteCharacter, delimiter));
        }

        archive.setMetadataLocation("eml.xml");
        File workDirectory = new File(outputDirectory.getFile(), archiveFile);
        if (!workDirectory.exists()) {
            workDirectory.mkdir();
        }
        File metaFile = new File(workDirectory, "meta.xml");
        try {
            MetaDescriptorWriter.writeMetaFile(metaFile, archive);
        } catch (TemplateException te) {
            throw new IOException("Exception writing meta.xml", te);
        }

        File emlFile = new File(workDirectory, "eml.xml");
        Eml eml = getEml();
        try {
            EmlWriter.writeEmlFile(emlFile, eml);
        } catch (TemplateException te) {
            throw new IOException("Exception writing eml.xml", te);
        }

        return RepeatStatus.FINISHED;
    }

    private Eml getEml() {
        Eml eml = new Eml();
        if (citationString != null) {
            DateTime now = new DateTime();
            Integer year = new Integer(now.getYear());
            citationString = citationString.replace("{0}", year.toString()).replace("{1}",
                    dateTimeFormatter.print(now));
        }
        eml.setCitation(citationString, identifier);
        Agent resourceCreator = new Agent();
        resourceCreator.setEmail(creatorEmail);
        resourceCreator.setFirstName(creatorName);
        eml.setResourceCreator(resourceCreator);
        eml.setDescription(description);
        eml.setHomepageUrl(homepageUrl);
        eml.setLogoUrl(logoUrl);
        eml.setTitle(title);
        eml.setSubject(subject);
        eml.setPublished(new Date());
        Agent metadataProvider = new Agent();
        metadataProvider.setEmail(publisherEmail);
        metadataProvider.setFirstName(publisherName);
        eml.setMetadataProvider(metadataProvider);
        eml.setIntellectualRights(rights);
        return eml;
    }

    private ArchiveFile buildArchiveFile(String[] fieldNames, Map<String, String> defaultValues, Term rowType,
            Term idTerm, String location, Integer ignoreHeaderLines, String encoding, Character fieldsEnclosedBy,
            String fieldsTerminatedBy) {
        ArchiveFile archiveFile = new ArchiveFile();
        ArchiveField idField = new ArchiveField();
        idField.setIndex(0);
        idField.setTerm(idTerm);
        archiveFile.setId(idField);

        for (int i = 1; i < fieldNames.length; i++) {
            Term term = termFactory.findTerm(fieldNames[i]);
            ArchiveField archiveField = new ArchiveField();
            archiveField.setTerm(term);
            archiveField.setIndex(i);
            if (defaultValues.containsKey(fieldNames[i])) {
                archiveField.setDefaultValue(defaultValues.get(fieldNames[i]));
                defaultValues.remove(fieldNames[i]);
            }
            archiveFile.addField(archiveField);
        }
        for (String fieldName : defaultValues.keySet()) {
            Term term = termFactory.findTerm(fieldName);
            ArchiveField archiveField = new ArchiveField();
            archiveField.setTerm(term);
            archiveField.setDefaultValue(defaultValues.get(fieldName));
            archiveFile.addField(archiveField);
        }
        archiveFile.setRowType(rowType.qualifiedName());
        archiveFile.setIgnoreHeaderLines(ignoreHeaderLines);
        archiveFile.setEncoding(encoding);
        archiveFile.setFieldsEnclosedBy(fieldsEnclosedBy);
        archiveFile.setFieldsTerminatedBy(fieldsTerminatedBy);
        archiveFile.addLocation(location);

        return archiveFile;
    }
}