com.socrata.datasync.job.MetadataJob.java Source code

Java tutorial

Introduction

Here is the source code for com.socrata.datasync.job.MetadataJob.java

Source

package com.socrata.datasync.job;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import com.google.common.collect.ImmutableMap;
import com.socrata.api.Soda2Producer;
import com.socrata.api.SodaDdl;
import com.socrata.api.SodaWorkflow;
import com.socrata.datasync.*;
import com.socrata.datasync.config.userpreferences.UserPreferences;
import com.socrata.datasync.config.userpreferences.UserPreferencesFile;
import com.socrata.datasync.config.userpreferences.UserPreferencesJava;
import com.socrata.exceptions.SodaError;
import com.socrata.model.importer.DatasetInfo;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.map.annotate.JsonSerialize;

@JsonIgnoreProperties(ignoreUnknown = true)
@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
public class MetadataJob extends Job {
    /**
     * @author Brian Williamson
     *
     * Stores a single metadata job that can be opened/run in the GUI
     * or in command-line mode.
     */
    static AtomicInteger jobCounter = new AtomicInteger(0);
    int jobNum = jobCounter.getAndIncrement();
    private String defaultJobName = "Unsaved Metadata Job" + " (" + jobNum + ")";
    private UserPreferences userPrefs;

    private static final int DATASET_ID_LENGTH = 9;

    // Anytime a @JsonProperty is added/removed/updated in this class add 1 to this value
    private static final long fileVersionUID = 1L;

    private String datasetID = "";
    private String title = "";
    private String description = "";
    private String category = "";
    private List<String> keywords = new ArrayList<String>();
    private LicenseType licenseType = LicenseType.no_license;
    private String dataProvidedBy = "";
    private String sourceLink = "";
    private String contactInfo = "";

    private static final String DEFAULT_JOB_NAME = "Untitled Metadata Job";
    public static final List<String> allowedFileToPublishExtensions = Arrays.asList("csv", "tsv");

    public MetadataJob() {
        userPrefs = new UserPreferencesJava();
    }

    /*
     * This is a method that enables DataSync preferences to be loaded from
     * a .json file instead of Java Preferences class
     */
    public MetadataJob(File preferencesConfigFile) throws IOException {
        ObjectMapper mapper = new ObjectMapper();
        try {
            userPrefs = mapper.readValue(preferencesConfigFile, UserPreferencesFile.class);
        } catch (IOException e) {
            // TODO add log entry???
            throw new IOException(e.toString());
        }
    }

    public MetadataJob(UserPreferences userPreferences) {
        userPrefs = userPreferences;
    }

    /**
     * Loads metadata job data from a file and
     * uses the saved data to populate the fields
     * of this object
     */
    public MetadataJob(String pathToFile) throws IOException {
        userPrefs = new UserPreferencesJava();

        ObjectMapper mapper = new ObjectMapper();
        try {
            MetadataJob loadedJob = mapper.readValue(new File(pathToFile), MetadataJob.class);
            setDatasetID(loadedJob.getDatasetID());
            setTitle(loadedJob.getTitle());
            setDescription(loadedJob.getDescription());
            setCategory(loadedJob.getCategory());
            setKeywords(loadedJob.getKeywords());
            setLicenseType(LicenseType.getLicenseTypeForValue(loadedJob.getLicenseTypeId()));
            setDataProvidedBy(loadedJob.getDataProvidedBy());
            setSourceLink(loadedJob.getSourceLink());
            setContactInfo(loadedJob.getContactInfo());
            setPathToSavedFile(pathToFile);
        } catch (Exception e) {
            throw new IOException(e.toString());
        }
    }

    public String getDefaultJobName() {
        return defaultJobName;
    }

    public void configure(CommandLine cmd) {
        //TODO when a metadata job can work from the cmd line, not just a job file
    }

    public boolean validateArgs(CommandLine cmd) {
        //TODO when a metadata job can work from the cmd line, not just a job file
        return true;
    }

    /**
     *
     * @return an error JobStatus if any input is invalid, otherwise JobStatus.VALID
     */
    //TODO: Need to get info on max sizes on fields and validate here.
    public JobStatus validate(SocrataConnectionInfo connectionInfo) {

        if (!(connectionInfo.getUrl().startsWith("http://") || connectionInfo.getUrl().startsWith("https://"))) {
            return JobStatus.INVALID_DOMAIN;
        }
        if (datasetID.length() != DATASET_ID_LENGTH) {
            return JobStatus.INVALID_DATASET_ID;
        }
        if (StringUtils.isBlank(title)) {
            return JobStatus.MISSING_METADATA_TITLE;
        }

        return JobStatus.VALID;
    }

    public JobStatus run() {
        SocrataConnectionInfo connectionInfo = userPrefs.getConnectionInfo();
        JobStatus runStatus = JobStatus.SUCCESS;
        String runErrorMessage = null;
        JobStatus validationStatus = validate(connectionInfo);
        //boolean workingCopyCreated = false;
        String workingCopyDatasetId = null;

        if (validationStatus.isError()) {
            runStatus = validationStatus;
        } else {

            final SodaDdl updater = SodaDdl.newDdl(connectionInfo.getUrl(), connectionInfo.getUser(),
                    connectionInfo.getPassword(), connectionInfo.getToken());
            final SodaWorkflow workflower = SodaWorkflow.newWorkflow(connectionInfo.getUrl(),
                    connectionInfo.getUser(), connectionInfo.getPassword(), connectionInfo.getToken());

            boolean noExceptions = false;
            try {
                DatasetInfo datasetInfo = updater.loadDatasetInfo(datasetID);

                if (datasetInfo == null) {
                    runErrorMessage = "Dataset with that ID does not exist or you do not have permission to publish to it";
                    runStatus = JobStatus.PUBLISH_ERROR;
                } else {
                    if (datasetInfo.PUBLISHED.equals(datasetInfo.getPublicationStage())) {
                        DatasetInfo workingCopyDatasetInfo = workflower.createWorkingCopy(datasetInfo.getId());
                        datasetInfo = updater.loadDatasetInfo(workingCopyDatasetInfo.getId());
                        workingCopyDatasetId = datasetInfo.getId();
                    }

                    datasetInfo.setName(title);
                    datasetInfo.setDescription(description);
                    datasetInfo.setCategory(category);
                    if (keywords != null && !keywords.isEmpty()) {
                        datasetInfo.setTags(keywords);
                    } else {
                        datasetInfo.setTags(new ArrayList<String>());
                    }
                    if (licenseType != null) {
                        //TODO: Once issue with setting no license via api is resolved, update below to handle
                        if (licenseType == LicenseType.no_license) {
                            datasetInfo.setLicenseId(""); //null, "", "''", "\"\"", Tried all of these, no luck, validation errors on all, so
                        } else {
                            datasetInfo.setLicenseId(licenseType.getValue());
                        }
                    }
                    datasetInfo.setAttribution(dataProvidedBy);
                    datasetInfo.setAttributionLink(sourceLink);
                    Map<String, Object> privateMetadata = datasetInfo.getPrivateMetadata();
                    if (privateMetadata == null) {
                        privateMetadata = new HashMap<String, Object>();
                    }
                    privateMetadata.put("contactEmail", contactInfo);

                    updater.updateDatasetInfo(datasetInfo);

                    if (!StringUtils.isBlank(workingCopyDatasetId)) {
                        workflower.publish(datasetInfo.getId());
                        workingCopyDatasetId = null;
                    }
                    noExceptions = true;
                }
            } catch (SodaError sodaError) {
                runErrorMessage = sodaError.getMessage();
                runStatus = JobStatus.PUBLISH_ERROR;
            } catch (InterruptedException intrruptException) {
                runErrorMessage = intrruptException.getMessage();
                runStatus = JobStatus.PUBLISH_ERROR;
            } catch (Exception other) {
                runErrorMessage = other.toString() + ": " + other.getMessage() + " \r\n "
                        + ExceptionUtils.getStackTrace(other);
                runStatus = JobStatus.PUBLISH_ERROR;
            } finally {
                try {
                    if (!StringUtils.isBlank(workingCopyDatasetId)) {
                        workflower.publish(workingCopyDatasetId);
                    }
                } catch (Exception e) {
                    runErrorMessage += " | Unable to publish dataset after updates";
                    runStatus = JobStatus.PUBLISH_ERROR;
                }
            }
        }

        String adminEmail = userPrefs.getAdminEmail();
        String logDatasetID = userPrefs.getLogDatasetID();
        JobStatus logStatus = JobStatus.SUCCESS;
        if (!logDatasetID.equals("")) {
            if (runErrorMessage != null)
                runStatus.setMessage(runErrorMessage);
            logStatus = addLogEntry(logDatasetID, connectionInfo, this, runStatus);
        }
        //Send email if there was an error updating log or target dataset
        if (userPrefs.emailUponError() && !adminEmail.equals("")) {
            String errorEmailMessage = "";
            String urlToLogDataset = connectionInfo.getUrl() + "/d/" + logDatasetID;
            if (runStatus.isError()) {
                errorEmailMessage += "There was an error updating dataset metadata.\n" + "\nDataset: "
                        + connectionInfo.getUrl() + "/d/" + getDatasetID() + "\nJob File: " + pathToSavedJobFile
                        + "\nError message: " + runErrorMessage + "\nLog dataset: " + urlToLogDataset + "\n\n";
            }
            if (logStatus.isError()) {
                errorEmailMessage += "There was an error updating the log dataset: " + urlToLogDataset + "\n"
                        + "Error message: " + logStatus.getMessage() + "\n\n";
            }
            if (runStatus.isError() || logStatus.isError()) {
                try {
                    SMTPMailer.send(adminEmail, "Socrata DataSync Error", errorEmailMessage);
                } catch (Exception e) {
                    System.out.println("Error sending email to: " + adminEmail + "\n" + e.getMessage());
                }
            }
        }

        // IMPORTANT because setMessage from Logging dataset interferes with enum
        if (runErrorMessage != null)
            runStatus.setMessage(runErrorMessage);

        return runStatus;
    }

    public String loadCurrentMetadata() {
        try {
            SocrataConnectionInfo connectionInfo = userPrefs.getConnectionInfo();
            final SodaDdl sodaDdl = SodaDdl.newDdl(connectionInfo.getUrl(), connectionInfo.getUser(),
                    connectionInfo.getPassword(), connectionInfo.getToken());
            readDatasetInfo(sodaDdl.loadDatasetInfo(datasetID));
            return "";
        } catch (Exception e) {
            return e.getMessage();
        }
    }

    //Probably makes sense to make one generic addLogEntry() for all job types
    public static JobStatus addLogEntry(String logDatasetID, SocrataConnectionInfo connectionInfo, MetadataJob job,
            JobStatus status) {
        final Soda2Producer producer = Soda2Producer.newProducer(connectionInfo.getUrl(), connectionInfo.getUser(),
                connectionInfo.getPassword(), connectionInfo.getToken());

        List<Map<String, Object>> upsertObjects = new ArrayList<Map<String, Object>>();
        Map<String, Object> newCols = new HashMap<String, Object>();

        // add standard log data
        Date currentDateTime = new Date();
        newCols.put("Date", (Object) currentDateTime);
        newCols.put("DatasetID", (Object) job.getDatasetID());
        newCols.put("JobFile", (Object) job.getPathToSavedFile());
        if (status.isError()) {
            newCols.put("Errors", (Object) status.getMessage());
        } else {
            newCols.put("Success", (Object) true);
        }
        upsertObjects.add(ImmutableMap.copyOf(newCols));

        JobStatus logStatus = JobStatus.SUCCESS;
        String errorMessage = "";
        boolean noPublishExceptions = false;
        try {
            producer.upsert(logDatasetID, upsertObjects);
            noPublishExceptions = true;
        } catch (SodaError sodaError) {
            errorMessage = sodaError.getMessage();
        } catch (InterruptedException intrruptException) {
            errorMessage = intrruptException.getMessage();
        } catch (Exception other) {
            errorMessage = other.toString() + ": " + other.getMessage();
        } finally {
            if (!noPublishExceptions) {
                logStatus = JobStatus.PUBLISH_ERROR;
                logStatus.setMessage(errorMessage);
            }
        }
        return logStatus;
    }

    @JsonProperty("fileVersionUID")
    public long getFileVersionUID() {
        return fileVersionUID;
    }

    @JsonProperty("datasetID")
    public void setDatasetID(String newDatasetID) {
        datasetID = newDatasetID;
    }

    @JsonProperty("datasetID")
    public String getDatasetID() {
        return datasetID;
    }

    @JsonProperty("title")
    public String getTitle() {
        return title;
    }

    @JsonProperty("title")
    public void setTitle(String title) {
        this.title = title;
    }

    @JsonProperty("description")
    public String getDescription() {
        return description;
    }

    @JsonProperty("description")
    public void setDescription(String description) {
        this.description = description;
    }

    @JsonProperty("category")
    public String getCategory() {
        return category;
    }

    @JsonProperty("category")
    public void setCategory(String category) {
        this.category = category;
    }

    @JsonProperty("keywords")
    public List<String> getKeywords() {
        return keywords;
    }

    @JsonProperty("keywords")
    public void setKeywords(List<String> keywords) {
        this.keywords = keywords;
    }

    @JsonProperty("license_type_id")
    public String getLicenseTypeId() {
        if (licenseType != null) {
            return licenseType.getValue();
        }
        return licenseType.no_license.getValue();
    }

    @JsonProperty("license_type_id")
    public void setLicenseTypeId(String licenseTypeId) {
        this.licenseType = LicenseType.getLicenseTypeForValue(licenseTypeId);
    }

    public LicenseType getLicenseType() {
        return this.licenseType;
    }

    public void setLicenseType(LicenseType licenseType) {
        this.licenseType = licenseType;
    }

    @JsonProperty("data_provided_by")
    public String getDataProvidedBy() {
        return dataProvidedBy;
    }

    @JsonProperty("data_provided_by")
    public void setDataProvidedBy(String dataProvidedBy) {
        this.dataProvidedBy = dataProvidedBy;
    }

    @JsonProperty("source_link")
    public String getSourceLink() {
        return sourceLink;
    }

    @JsonProperty("source_link")
    public void setSourceLink(String sourceLink) {
        this.sourceLink = sourceLink;
    }

    @JsonProperty("contact_info")
    public String getContactInfo() {
        return contactInfo;
    }

    @JsonProperty("contact_info")
    public void setContactInfo(String contactInfo) {
        this.contactInfo = contactInfo;
    }

    private void readDatasetInfo(DatasetInfo datasetInfo) {
        if (datasetInfo == null) {
            return;
        }
        this.datasetID = datasetInfo.getId();
        this.title = datasetInfo.getName();
        this.description = datasetInfo.getDescription();
        this.category = datasetInfo.getCategory();
        this.keywords = datasetInfo.getTags();
        this.licenseType = LicenseType.getLicenseTypeForValue(datasetInfo.getLicenseId());
        this.dataProvidedBy = datasetInfo.getAttribution();
        this.sourceLink = datasetInfo.getAttributionLink();
        Map<String, Object> privateMetadata = datasetInfo.getPrivateMetadata();
        if (privateMetadata != null) {
            Object contactInfoObj = privateMetadata.get("contactEmail");
            if (contactInfoObj instanceof String) {
                this.contactInfo = (String) contactInfoObj;
            }
        }
    }
}