eumetsat.pn.solr.SolrFeeder.java Source code

Java tutorial

Introduction

Here is the source code for eumetsat.pn.solr.SolrFeeder.java

Source

package eumetsat.pn.solr;

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
import com.github.autermann.yaml.YamlNode;
import eumetsat.pn.common.ISO2JSON;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.Collection;
import org.apache.commons.io.FileUtils;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.apache.logging.log4j.core.config.Configurator;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import org.json.simple.JSONArray;

/**
 *
 * @author danu
 */
public class SolrFeeder extends ISO2JSON {

    private SolrServer server;

    public SolrFeeder() {
        super();
    }

    public SolrFeeder(Path configFile) {
        super(configFile);
    }

    public SolrFeeder(SolrServer server) {
        super();
        this.server = server;
    }

    public static void main(String[] args) throws IOException {
        try {
            URI configuration = ISO2JSON.class.getResource("/feeder-log4j2.xml").toURI();
            Configurator.initialize("eumetsat.pn", null, configuration);
        } catch (URISyntaxException e) {
            log.error("Could not configure logging: {}", e.getMessage());
        }

        ISO2JSON transformer = new SolrFeeder();
        transformer.transformAndIndex();

        log.info("Finished.");
    }

    @Override
    protected void indexDirContent(Path aSrcDir) {
        log.info("Indexing dir content {}", aSrcDir);

        JSONParser parser = new JSONParser();

        YamlNode endpointConfig = this.config.get("endpoint");
        String collection = endpointConfig.get("collection").asTextValue();

        SolrServer solr;
        if (this.server != null) {
            solr = server;
            log.info("Using embedded SolrServer: {}", solr);
        } else {
            log.info("Endpoint configuration: {}", endpointConfig);

            String solrEndpoint = endpointConfig.get("url").asTextValue();
            solr = new ConcurrentUpdateSolrServer(solrEndpoint + "/" + collection, 10, 1);
            //        CloudSolrServer solr = new CloudSolrServer(solrEndpoint);
            //        solr.setDefaultCollection(collection);
            log.info("Using HTTP SolrServer: {}", solr);
        }

        SolrPingResponse ping;
        try {
            ping = solr.ping();
            log.debug("Pinged Solr server: {}", ping);
        } catch (SolrServerException | IOException | RemoteSolrException e) {
            log.error("Could not ping Solr server", e);
        }

        //        Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", endpointConfig.get("cluster.name").asTextValue()).build();
        int cpt = 0;
        Collection<File> inputFiles = FileUtils.listFiles(aSrcDir.toFile(), new String[] { "json" }, false);
        log.info("Indexing {} files...", inputFiles.size());

        for (File file : inputFiles) {
            try {
                String jsonStr = FileUtils.readFileToString(file);
                JSONObject jsObj = (JSONObject) parser.parse(jsonStr);

                try {
                    SolrInputDocument input = createInputDoc(jsObj);

                    log.debug("Adding {} to collection {}", file.getName(), collection);
                    log.trace("Full json of {}: {}", file.getName(), input);
                    solr.add(input);

                    cpt++;
                } catch (RuntimeException e) {
                    log.error("Error processing input document {}: {}, {}", file, e, e.getMessage());
                }

                if (cpt % 42 == 0) { // periodically flush
                    log.info("Commiting to server, document count is {}", cpt);
                    UpdateResponse response = solr.commit();
                    log.info("Response status: {} (time: {}): {}", response.getStatus(), response.getElapsedTime(),
                            response.toString());
                }

            } catch (IOException | ParseException | SolrServerException e) {
                log.error("Error comitting document based on file {}", file, e);
            }
        }

        try {
            solr.commit();
        } catch (IOException | SolrServerException e) {
            log.error("Error comitting document", e);
        }

        solr.shutdown();

        log.info("Indexed {} of {} files.", cpt, inputFiles.size());
    }

    private SolrInputDocument createInputDoc(JSONObject jsObj) {
        SolrInputDocument input = new SolrInputDocument();
        // field values should match schema.xml

        // solr can add new fields on the fly: http://heliosearch.org/solr/getting-started/
        String id = (String) jsObj.get(FILE_IDENTIFIER_PROPERTY);
        input.addField("id", id);

        JSONObject info = (JSONObject) jsObj.get("identificationInfo");
        input.addField("title", info.get("title"));
        input.addField("description", info.get("abstract"));
        if (!info.get("thumbnail").toString().isEmpty()) {
            input.addField("thumbnail_s", info.get("thumbnail"));
        }

        JSONArray keywords = (JSONArray) info.get("keywords");
        if (!keywords.isEmpty()) {
            input.addField("keywords", keywords.toArray());
        }

        if (!info.get("status").toString().isEmpty()) {
            input.addField("status_s", info.get("status"));
        }

        JSONObject hierarchy = (JSONObject) jsObj.get("hierarchyNames");

        //public static final ImmutableMap<String, String> FACETS2HIERACHYNAMES = ImmutableMap.of("satellites", "hierarchyNames.satellite",
        //"instruments", "hierarchyNames.instrument",
        //"categories", "hierarchyNames.category",
        //"societalBenefitArea", "hierarchyNames.societalBenefitArea",
        //"distribution", "hierarchyNames.distribution");
        if (hierarchy.get("satellite") != null && !hierarchy.get("satellite").toString().isEmpty()) {
            input.addField("satellite_s", hierarchy.get("satellite"));
        }

        if (hierarchy.get("instrument") != null && !hierarchy.get("instrument").toString().isEmpty()) {
            input.addField("instrument_s", hierarchy.get("instrument"));
        }

        JSONArray categories = (JSONArray) hierarchy.get("category");
        if (categories != null && !categories.isEmpty()) {
            input.addField("category", categories);
        }

        JSONArray sbas = (JSONArray) hierarchy.get("societalBenefitArea");
        if (sbas != null && !sbas.isEmpty()) {
            input.addField("societalBenefitArea_ss", sbas);
        }

        Collection<String> distrs = (Collection<String>) hierarchy.get("distribution");
        if (distrs != null && !distrs.isEmpty()) {
            input.addField("distribution_ss", distrs);
        }

        // https://cwiki.apache.org/confluence/display/solr/Spatial+Search
        JSONObject location = (JSONObject) jsObj.get("location");
        String type = (String) location.get("type");
        if ("envelope".equals(type)) {
            StringBuilder envelope = new StringBuilder();
            // ISO2JSON: envelope.add(leftTopPt); envelope.add(rightDownPt);
            JSONArray coords = (JSONArray) location.get("coordinates");
            JSONArray leftTopPoint = (JSONArray) coords.get(0);
            JSONArray rightDownPoint = (JSONArray) coords.get(1);

            // Spatial search envelope: minX, maxX, maxY, minY
            envelope.append("ENVELOPE(").append(leftTopPoint.get(0)).append(", ");
            envelope.append(rightDownPoint.get(0)).append(", ");
            envelope.append(leftTopPoint.get(1)).append(", ");
            envelope.append(rightDownPoint.get(1)).append(")");
            input.addField("boundingbox", envelope.toString());
        } else {
            log.warn("Unsupported location field value: {}", location.toJSONString());
        }

        JSONObject contact = (JSONObject) jsObj.get("contact");
        input.addField("email_s", contact.get("email"));
        input.addField("address_s", contact.get("address"));

        input.addField("xmldoc", jsObj.get("xmldoc"));

        return input;
    }

    @Override
    protected String getConfigBasename() {
        return "solr";
    }
}