cz.opendata.linked.lodcloud.loader.Loader.java Source code

Java tutorial

Introduction

Here is the source code for cz.opendata.linked.lodcloud.loader.Loader.java

Source

package cz.opendata.linked.lodcloud.loader;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.DCTERMS;
import org.openrdf.repository.RepositoryConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cz.opendata.linked.lodcloud.loader.LoaderConfig.LinkCount;
import cz.opendata.linked.lodcloud.loader.LoaderConfig.MappingFile;
import eu.unifiedviews.dataunit.DataUnit;
import eu.unifiedviews.dataunit.rdf.RDFDataUnit;
import eu.unifiedviews.dpu.DPU;
import eu.unifiedviews.dpu.DPUException;
import eu.unifiedviews.helpers.dataunit.DataUnitUtils;
import eu.unifiedviews.helpers.dpu.config.ConfigHistory;
import eu.unifiedviews.helpers.dpu.context.ContextUtils;
import eu.unifiedviews.helpers.dpu.exec.AbstractDpu;
import eu.unifiedviews.helpers.dpu.extension.ExtensionInitializer;
import eu.unifiedviews.helpers.dpu.extension.faulttolerance.FaultTolerance;
import eu.unifiedviews.helpers.dpu.rdf.sparql.SparqlUtils;

@DPU.AsLoader
public class Loader extends AbstractDpu<LoaderConfig> {

    private static final Logger logger = LoggerFactory.getLogger(Loader.class);

    @DataUnit.AsInput(name = "metadata")
    public RDFDataUnit metadata;

    @ExtensionInitializer.Init
    public FaultTolerance faultTolerance;

    public Loader() {
        super(LoaderDialog.class, ConfigHistory.noHistory(LoaderConfig.class));
    }

    @Override
    protected void innerExecute() throws DPUException {
        logger.debug("Querying metadata");

        String datasetUrl = executeSimpleSelectQuery(
                "SELECT ?d WHERE {?d a <" + LoaderVocabulary.DCAT_DATASET_CLASS + ">}", "d");

        List<Map<String, Value>> distributions = executeSelectQuery(
                "SELECT ?distribution WHERE {<" + datasetUrl + "> <" + LoaderVocabulary.DCAT_DISTRIBUTION
                        + "> ?distribution . ?distribution <" + LoaderVocabulary.VOID_SPARQLENDPOINT + "> [] .  }");

        if (distributions.size() != 1) {
            throw new DPUException("Expected 1 distribution with SPARQL endpoint. Found: " + distributions.size());
        }

        String distribution = distributions.get(0).get("distribution").stringValue();
        String title = executeSimpleSelectQuery("SELECT ?title WHERE {<" + distribution + "> <" + DCTERMS.TITLE
                + "> ?title FILTER(LANGMATCHES(LANG(?title), \"en\"))}", "title");
        String description = executeSimpleSelectQuery("SELECT ?description WHERE {<" + distribution + "> <"
                + DCTERMS.DESCRIPTION + "> ?description FILTER(LANGMATCHES(LANG(?description), \"en\"))}",
                "description");
        String sparqlEndpointVoid = executeSimpleSelectQuery("SELECT ?sparqlEndpoint WHERE {<" + distribution
                + "> <" + LoaderVocabulary.VOID_SPARQLENDPOINT + "> ?sparqlEndpoint }", "sparqlEndpoint");
        String datadump = executeSimpleSelectQuery(
                "SELECT ?dwnld WHERE {<" + distribution + "> <" + LoaderVocabulary.VOID_DATADUMP + "> ?dwnld }",
                "dwnld");
        String triplecount = executeSimpleSelectQuery("SELECT ?triplecount WHERE {<" + distribution + "> <"
                + LoaderVocabulary.VOID_TRIPLES + "> ?triplecount }", "triplecount");
        String dformat = executeSimpleSelectQuery(
                "SELECT ?format WHERE {<" + distribution + "> <" + DCTERMS.FORMAT + "> ?format }", "format");
        String dlicense = executeSimpleSelectQuery(
                "SELECT ?license WHERE {<" + distribution + "> <" + DCTERMS.LICENSE + "> ?license }", "license");
        String dschema = executeSimpleSelectQuery("SELECT ?schema WHERE {<" + distribution + "> <"
                + LoaderVocabulary.WDRS_DESCRIBEDBY + "> ?schema }", "schema");

        LinkedList<String> examples = new LinkedList<String>();
        for (Map<String, Value> map : executeSelectQuery("SELECT ?exampleResource WHERE {<" + distribution + "> <"
                + LoaderVocabulary.VOID_EXAMPLERESOURCE + "> ?exampleResource }")) {
            examples.add(map.get("exampleResource").stringValue());
        }

        logger.debug("Querying for the dataset in CKAN");
        boolean exists = false;
        Map<String, String> resUrlIdMap = new HashMap<String, String>();
        Map<String, String> resFormatIdMap = new HashMap<String, String>();

        CloseableHttpClient queryClient = HttpClientBuilder.create().setRedirectStrategy(new LaxRedirectStrategy())
                .build();
        HttpGet httpGet = new HttpGet(config.getApiUri() + "/" + config.getDatasetID());
        CloseableHttpResponse queryResponse = null;
        try {
            queryResponse = queryClient.execute(httpGet);
            if (queryResponse.getStatusLine().getStatusCode() == 200) {
                logger.info("Dataset found");
                exists = true;

                JSONObject response = new JSONObject(EntityUtils.toString(queryResponse.getEntity()));
                JSONArray resourcesArray = response.getJSONArray("resources");
                for (int i = 0; i < resourcesArray.length(); i++) {
                    try {
                        String id = resourcesArray.getJSONObject(i).getString("id");
                        String url = resourcesArray.getJSONObject(i).getString("url");
                        resUrlIdMap.put(url, id);

                        if (resourcesArray.getJSONObject(i).has("format")) {
                            String format = resourcesArray.getJSONObject(i).getString("format");
                            resFormatIdMap.put(format, id);
                        }

                    } catch (JSONException e) {
                        logger.error(e.getLocalizedMessage(), e);
                    }
                }

            } else {
                String ent = EntityUtils.toString(queryResponse.getEntity());
                logger.info("Dataset not found");
            }
        } catch (ClientProtocolException e) {
            logger.error(e.getLocalizedMessage(), e);
        } catch (IOException e) {
            logger.error(e.getLocalizedMessage(), e);
        } catch (ParseException e) {
            logger.error(e.getLocalizedMessage(), e);
        } catch (JSONException e) {
            logger.error(e.getLocalizedMessage(), e);
        } finally {
            if (queryResponse != null) {
                try {
                    queryResponse.close();
                    queryClient.close();
                } catch (IOException e) {
                    logger.error(e.getLocalizedMessage(), e);
                }
            }
        }

        logger.debug("Creating JSON");
        try {
            JSONObject root = new JSONObject();

            JSONArray tags = new JSONArray();
            tags.put("lod");
            tags.put(config.getVocabTag().toString());
            tags.put(config.getVocabMappingTag().toString());
            tags.put(config.getPublishedTag().toString());
            tags.put(config.getProvenanceMetadataTag().toString());
            tags.put(config.getLicenseMetadataTag().toString());
            if (config.isLimitedSparql())
                tags.put("limited-sparql-endpoint");
            if (config.isLodcloudNolinks())
                tags.put("lodcloud.nolinks");
            if (config.isLodcloudUnconnected())
                tags.put("lodcloud.unconnected");
            if (config.isLodcloudNeedsInfo())
                tags.put("lodcloud.needsinfo");
            if (config.isLodcloudNeedsFixing())
                tags.put("lodcloud.needsfixing");
            for (String prefix : config.getVocabularies()) {
                tags.put("format-" + prefix);
            }
            tags.put(config.getTopic());
            for (String s : config.getAdditionalTags())
                tags.put(s);

            JSONArray resources = new JSONArray();

            // Start of Sparql Endpoint resource
            JSONObject sparqlEndpoint = new JSONObject();

            sparqlEndpoint.put("format", "api/sparql");
            sparqlEndpoint.put("resource_type", "api");
            sparqlEndpoint.put("description", config.getSparqlEndpointDescription());
            sparqlEndpoint.put("name", config.getSparqlEndpointName());
            sparqlEndpoint.put("url", sparqlEndpointVoid);

            if (resFormatIdMap.containsKey("api/sparql"))
                sparqlEndpoint.put("id", resFormatIdMap.get("api/sparql"));

            resources.put(sparqlEndpoint);
            // End of Sparql Endpoint resource

            // Start of VoID resource
            JSONObject voidJson = new JSONObject();

            voidJson.put("format", "meta/void");
            voidJson.put("resource_type", "file");
            voidJson.put("description", "VoID description generated live");
            voidJson.put("name", "VoID");
            String voidUrl = sparqlEndpointVoid + "?query="
                    + URLEncoder.encode("DESCRIBE <" + distribution + ">", "UTF-8") + "&output="
                    + URLEncoder.encode("text/turtle", "UTF-8");
            voidJson.put("url", voidUrl);

            if (resFormatIdMap.containsKey("meta/void"))
                voidJson.put("id", resFormatIdMap.get("meta/void"));

            resources.put(voidJson);
            // End of VoID resource

            if (config.getVocabTag() != LoaderConfig.VocabTags.NoProprietaryVocab && !dschema.isEmpty()) {
                // Start of RDFS/OWL schema resource
                JSONObject schemaResource = new JSONObject();

                schemaResource.put("format", "meta/rdf-schema");
                schemaResource.put("resource_type", "file");
                schemaResource.put("description", "RDFS/OWL Schema with proprietary vocabulary");
                schemaResource.put("name", "RDFS/OWL schema");
                schemaResource.put("url", dschema);

                if (resFormatIdMap.containsKey("meta/rdf-schema"))
                    schemaResource.put("id", resFormatIdMap.get("meta/rdf-schema"));

                resources.put(schemaResource);
                // End of RDFS/OWL schema resource
            }

            // Start of Dump resource
            JSONObject dump = new JSONObject();

            dump.put("format", dformat);
            dump.put("resource_type", "file");
            //dump.put("description","Dump is a zipped TriG file");
            dump.put("name", "Dump");
            dump.put("url", datadump);

            if (resUrlIdMap.containsKey(datadump))
                dump.put("id", resUrlIdMap.get(datadump));

            resources.put(dump);
            // End of Dump resource

            for (String example : examples) {
                // Start of Example resource text/turtle
                JSONObject exTurtle = new JSONObject();

                exTurtle.put("format", "example/turtle");
                exTurtle.put("resource_type", "file");
                //exTurtle.put("description","Generated by Virtuoso FCT");
                exTurtle.put("name", "Example resource in Turtle");
                String exTurtleUrl = sparqlEndpointVoid + "?query="
                        + URLEncoder.encode("DESCRIBE <" + example + ">", "UTF-8") + "&output="
                        + URLEncoder.encode("text/turtle", "UTF-8");
                exTurtle.put("url", exTurtleUrl);

                if (resUrlIdMap.containsKey(exTurtleUrl))
                    exTurtle.put("id", resUrlIdMap.get(exTurtleUrl));

                resources.put(exTurtle);
                // End of text/turtle resource

                // Start of Example resource html
                JSONObject exHTML = new JSONObject();

                exHTML.put("format", "HTML");
                exHTML.put("resource_type", "file");
                exHTML.put("description", "Generated by Virtuoso FCT");
                exHTML.put("name", "Example resource in Virtuoso FCT");
                exHTML.put("url", example);

                if (resUrlIdMap.containsKey(example))
                    exHTML.put("id", resUrlIdMap.get(example));

                resources.put(exHTML);
                // End of html resource

                // Mapping file resources
                for (MappingFile mapping : config.getMappingFiles()) {
                    JSONObject exMapping = new JSONObject();

                    String mappingMime = "mapping/" + mapping.getMappingFormat();
                    exMapping.put("format", mappingMime);
                    exMapping.put("resource_type", "file");
                    exMapping.put("description",
                            "Schema mapping file in " + mapping.getMappingFormat() + " format.");
                    exMapping.put("name", "Mapping " + mapping.getMappingFormat());
                    exMapping.put("url", mapping.getMappingFile());

                    if (resFormatIdMap.containsKey(mappingMime))
                        exMapping.put("id", resFormatIdMap.get(mappingMime));

                    resources.put(exMapping);
                }
                // End of mapping file resources

            }

            JSONObject extras = new JSONObject();
            extras.put("triples", triplecount);
            if (!config.getShortname().isEmpty())
                extras.put("shortname", config.getShortname());
            if (!config.getNamespace().isEmpty())
                extras.put("namespace", config.getNamespace());
            if (!dlicense.isEmpty())
                extras.put("license_link", dlicense);
            extras.put("sparql_graph_name", datasetUrl);
            for (LinkCount link : config.getLinks()) {
                extras.put("links:" + link.getTargetDataset(), link.getLinkCount());
            }

            if (!config.getDatasetID().isEmpty())
                root.put("name", config.getDatasetID());
            root.put("url", datasetUrl);
            root.put("title", title);
            if (!config.getMaintainerName().isEmpty())
                root.put("maintainer", config.getMaintainerName());
            if (!config.getMaintainerEmail().isEmpty())
                root.put("maintainer_email", config.getMaintainerEmail());
            root.put("license_id", config.getLicense_id());
            root.put("notes", description);
            if (!config.getAuthorName().isEmpty())
                root.put("author", config.getAuthorName());
            if (!config.getAuthorEmail().isEmpty())
                root.put("author_email", config.getAuthorEmail());

            if (config.isVersionGenerated()) {
                DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
                Date versiondate = new Date();
                String version = dateFormat.format(versiondate);
                root.put("version", version);
            } else if (!config.getVersion().isEmpty())
                root.put("version", config.getVersion());

            root.put("tags", tags);
            root.put("resources", resources);
            root.put("extras", extras);

            if (!exists) {
                JSONObject createRoot = new JSONObject();

                createRoot.put("name", config.getDatasetID());
                createRoot.put("title", title);
                createRoot.put("owner_org", config.getOrgID());

                logger.debug("Creating dataset in CKAN");
                CloseableHttpClient client = HttpClientBuilder.create()
                        .setRedirectStrategy(new LaxRedirectStrategy()).build();
                HttpPost httpPost = new HttpPost(config.getApiUri());
                httpPost.addHeader(new BasicHeader("Authorization", config.getApiKey()));

                String json = createRoot.toString();

                logger.debug("Creating dataset with: " + json);

                httpPost.setEntity(new StringEntity(json, Charset.forName("utf-8")));

                CloseableHttpResponse response = null;

                try {
                    response = client.execute(httpPost);
                    if (response.getStatusLine().getStatusCode() == 201) {
                        logger.info("Dataset created OK: " + response.getStatusLine());
                    } else if (response.getStatusLine().getStatusCode() == 409) {
                        logger.error("Dataset already exists: " + response.getStatusLine());
                        ContextUtils.sendError(ctx, "Dataset already exists", "Dataset already exists: {0}",
                                response.getStatusLine());
                    } else {
                        ContextUtils.sendError(ctx, "Error creating dataset",
                                "Response while creating dataset: {0}", response.getStatusLine());
                    }
                } catch (ClientProtocolException e) {
                    logger.error(e.getLocalizedMessage(), e);
                } catch (IOException e) {
                    logger.error(e.getLocalizedMessage(), e);
                } finally {
                    if (response != null) {
                        try {
                            response.close();
                            client.close();
                        } catch (IOException e) {
                            logger.error(e.getLocalizedMessage(), e);
                            ContextUtils.sendError(ctx, "Error creating dataset", e.getLocalizedMessage());
                        }
                    }
                }
            }

            if (!ctx.canceled()) {
                logger.debug("Posting to CKAN");
                CloseableHttpClient client = HttpClients.createDefault();
                URIBuilder uriBuilder = new URIBuilder(config.getApiUri() + "/" + config.getDatasetID());
                HttpPost httpPost = new HttpPost(uriBuilder.build().normalize());
                httpPost.addHeader(new BasicHeader("Authorization", config.getApiKey()));

                String json = root.toString();

                httpPost.setEntity(new StringEntity(json, Charset.forName("utf-8")));

                CloseableHttpResponse response = null;

                try {
                    response = client.execute(httpPost);
                    if (response.getStatusLine().getStatusCode() == 200) {
                        logger.info("Response: " + response.getEntity());
                    } else {
                        ContextUtils.sendError(ctx, "Error updating dataset",
                                "Response while updating dataset: {0}", response.getStatusLine());
                    }
                } catch (ClientProtocolException e) {
                    logger.error(e.getLocalizedMessage(), e);
                } catch (IOException e) {
                    logger.error(e.getLocalizedMessage(), e);
                } finally {
                    if (response != null) {
                        try {
                            response.close();
                            client.close();
                        } catch (IOException e) {
                            logger.error(e.getLocalizedMessage(), e);
                            ContextUtils.sendError(ctx, "Error updating dataset", e.getLocalizedMessage());
                        }
                    }
                }
            }
        } catch (JSONException e) {
            logger.error(e.getLocalizedMessage(), e);
        } catch (URISyntaxException e) {
            logger.error(e.getLocalizedMessage(), e);
        } catch (UnsupportedEncodingException e) {
            logger.error(e.getLocalizedMessage(), e);
        }

    }

    private String executeSimpleSelectQuery(final String queryAsString, String bindingName) throws DPUException {
        // Prepare SPARQL update query.
        final SparqlUtils.SparqlSelectObject query = faultTolerance
                .execute(new FaultTolerance.ActionReturn<SparqlUtils.SparqlSelectObject>() {

                    @Override
                    public SparqlUtils.SparqlSelectObject action() throws Exception {
                        return SparqlUtils.createSelect(queryAsString,
                                DataUnitUtils.getEntries(metadata, RDFDataUnit.Entry.class));
                    }
                });
        final SparqlUtils.QueryResultCollector result = new SparqlUtils.QueryResultCollector();
        faultTolerance.execute(metadata, new FaultTolerance.ConnectionAction() {

            @Override
            public void action(RepositoryConnection connection) throws Exception {
                result.prepare();
                SparqlUtils.execute(connection, ctx, query, result);
            }
        });
        if (result.getResults().size() == 1) {
            try {
                return result.getResults().get(0).get(bindingName).stringValue();
            } catch (NumberFormatException ex) {
                throw new DPUException(ex);
            }
        } else if (result.getResults().isEmpty()) {
            return "";
        } else {
            throw new DPUException("Unexpected number of results: " + result.getResults().size());
        }
    }

    private List<Map<String, Value>> executeSelectQuery(final String queryAsString) throws DPUException {
        // Prepare SPARQL update query.
        final SparqlUtils.SparqlSelectObject query = faultTolerance
                .execute(new FaultTolerance.ActionReturn<SparqlUtils.SparqlSelectObject>() {

                    @Override
                    public SparqlUtils.SparqlSelectObject action() throws Exception {
                        return SparqlUtils.createSelect(queryAsString,
                                DataUnitUtils.getEntries(metadata, RDFDataUnit.Entry.class));
                    }
                });
        final SparqlUtils.QueryResultCollector result = new SparqlUtils.QueryResultCollector();
        faultTolerance.execute(metadata, new FaultTolerance.ConnectionAction() {

            @Override
            public void action(RepositoryConnection connection) throws Exception {
                result.prepare();
                SparqlUtils.execute(connection, ctx, query, result);
            }
        });

        return result.getResults();
    }

}