esiptestbed.mudrod.metadata.pre.ApiHarvester.java Source code

Introduction

Here is the source code for esiptestbed.mudrod.metadata.pre.ApiHarvester.java
Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package esiptestbed.mudrod.metadata.pre;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

import org.apache.commons.io.IOUtils;
import org.elasticsearch.action.index.IndexRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;

import esiptestbed.mudrod.discoveryengine.DiscoveryStepAbstract;
import esiptestbed.mudrod.driver.ESDriver;
import esiptestbed.mudrod.driver.SparkDriver;
import esiptestbed.mudrod.main.MudrodConstants;
import esiptestbed.mudrod.utils.HttpRequest;

/**
 * ClassName: ApiHarvester Function: Harvest metadata from PO.DAACweb service.
 */
public class ApiHarvester extends DiscoveryStepAbstract {

    private static final long serialVersionUID = 1L;
    private static final Logger LOG = LoggerFactory.getLogger(ApiHarvester.class);

    /**
     * Creates a new instance of ApiHarvester.
     *
     * @param props
     *          the Mudrod configuration
     * @param es
     *          the Elasticsearch drive
     * @param spark
     *          the spark driver
     */
    public ApiHarvester(Properties props, ESDriver es, SparkDriver spark) {
        super(props, es, spark);
    }

    @Override
    public Object execute() {
        LOG.info("Starting Metadata harvesting.");
        startTime = System.currentTimeMillis();
        es.createBulkProcessor();
        addMetadataMapping();
        importToES();
        es.destroyBulkProcessor();
        endTime = System.currentTimeMillis();
        es.refreshIndex();
        LOG.info("Metadata harvesting completed. Time elapsed: {}", (endTime - startTime) / 1000);
        return null;
    }

    /**
     * addMetadataMapping: Add mapping to index metadata in Elasticsearch. Please
     * invoke this method before import metadata to Elasticsearch.
     */
    public void addMetadataMapping() {
        String mappingJson = "{\r\n   \"dynamic_templates\": " + "[\r\n      " + "{\r\n         \"strings\": "
                + "{\r\n            \"match_mapping_type\": \"string\","
                + "\r\n            \"mapping\": {\r\n               \"type\": \"string\","
                + "\r\n               \"analyzer\": \"english\"\r\n            }"
                + "\r\n         }\r\n      }\r\n   ]\r\n}";
        es.getClient().admin().indices().preparePutMapping(props.getProperty("indexName"))
                .setType(props.getProperty("raw_metadataType")).setSource(mappingJson).execute().actionGet();
    }

    /**
     * importToES: Index metadata into elasticsearch from local file directory.
     * Please make sure metadata have been harvest from web service before
     * invoking this method.
     */
    private void importToES() {
        File directory = new File(props.getProperty(MudrodConstants.RAW_METADATA_PATH));
        File[] fList = directory.listFiles();
        for (File file : fList) {
            InputStream is;
            try {
                is = new FileInputStream(file);
                importSingleFileToES(is);
            } catch (FileNotFoundException e) {
                LOG.error("Error finding file!", e);
            }

        }
    }

    private void importSingleFileToES(InputStream is) {
        try {
            String jsonTxt = IOUtils.toString(is);
            JsonParser parser = new JsonParser();
            JsonElement item = parser.parse(jsonTxt);
            IndexRequest ir = new IndexRequest(props.getProperty("indexName"),
                    props.getProperty("raw_metadataType")).source(item.toString());
            es.getBulkProcessor().add(ir);
        } catch (IOException e) {
            LOG.error("Error indexing metadata record!", e);
        }
    }

    /**
     * harvestMetadatafromWeb: Harvest metadata from PO.DAAC web service.
     */
    private void harvestMetadatafromWeb() {
        int startIndex = 0;
        int doc_length = 0;
        JsonParser parser = new JsonParser();
        do {
            String searchAPI = "https://podaac.jpl.nasa.gov/api/dataset?startIndex=" + Integer.toString(startIndex)
                    + "&entries=10&sortField=Dataset-AllTimePopularity&sortOrder=asc&id=&value=&search=";
            HttpRequest http = new HttpRequest();
            String response = http.getRequest(searchAPI);

            JsonElement json = parser.parse(response);
            JsonObject responseObject = json.getAsJsonObject();
            JsonArray docs = responseObject.getAsJsonObject("response").getAsJsonArray("docs");

            doc_length = docs.size();

            File file = new File(props.getProperty(MudrodConstants.RAW_METADATA_PATH));
            if (!file.exists()) {
                if (file.mkdir()) {
                    LOG.info("Directory is created!");
                } else {
                    LOG.error("Failed to create directory!");
                }
            }
            for (int i = 0; i < doc_length; i++) {
                JsonElement item = docs.get(i);
                int docId = startIndex + i;
                File itemfile = new File(
                        props.getProperty(MudrodConstants.RAW_METADATA_PATH) + "/" + docId + ".json");

                try (FileWriter fw = new FileWriter(itemfile.getAbsoluteFile());
                        BufferedWriter bw = new BufferedWriter(fw);) {
                    itemfile.createNewFile();
                    bw.write(item.toString());
                } catch (IOException e) {
                    LOG.error("Error writing metadata to local file!", e);
                }
            }

            startIndex += 10;

            try {
                Thread.sleep(100);
            } catch (InterruptedException e) {
                LOG.error("Error entering Elasticsearch Mappings!", e);
                Thread.currentThread().interrupt();
            }

        } while (doc_length != 0);
    }

    @Override
    public Object execute(Object o) {
        return null;
    }

}