org.apache.beam.sdk.io.elasticsearch.ElasticSearchIOTestUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.sdk.io.elasticsearch.ElasticSearchIOTestUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.io.elasticsearch;

import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration;
import static org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.parseResponse;

import com.fasterxml.jackson.databind.JsonNode;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NStringEntity;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.RestClient;

/** Test utilities to use with {@link ElasticsearchIO}. */
class ElasticSearchIOTestUtils {
    static final String[] FAMOUS_SCIENTISTS = { "Einstein", "Darwin", "Copernicus", "Pasteur", "Curie", "Faraday",
            "Newton", "Bohr", "Galilei", "Maxwell" };
    static final int NUM_SCIENTISTS = FAMOUS_SCIENTISTS.length;

    /** Enumeration that specifies whether to insert malformed documents. */
    public enum InjectionMode {
        INJECT_SOME_INVALID_DOCS, DO_NOT_INJECT_INVALID_DOCS
    }

    /** Deletes the given index synchronously. */
    static void deleteIndex(ConnectionConfiguration connectionConfiguration, RestClient restClient)
            throws IOException {
        deleteIndex(restClient, connectionConfiguration.getIndex());
    }

    private static void closeIndex(RestClient restClient, String index) throws IOException {
        restClient.performRequest("POST", String.format("/%s/_close", index));
    }

    private static void deleteIndex(RestClient restClient, String index) throws IOException {
        try {
            closeIndex(restClient, index);
            restClient.performRequest("DELETE", String.format("/%s", index));
        } catch (IOException e) {
            // it is fine to ignore this expression as deleteIndex occurs in @before,
            // so when the first tests is run, the index does not exist yet
            if (!e.getMessage().contains("index_not_found_exception")) {
                throw e;
            }
        }
    }

    /**
     * Synchronously deletes the target if it exists and then (re)creates it as a copy of source
     * synchronously.
     */
    static void copyIndex(RestClient restClient, String source, String target) throws IOException {
        deleteIndex(restClient, target);
        HttpEntity entity = new NStringEntity(
                String.format("{\"source\" : { \"index\" : \"%s\" }, \"dest\" : { \"index\" : \"%s\" } }", source,
                        target),
                ContentType.APPLICATION_JSON);
        restClient.performRequest("POST", "/_reindex", Collections.EMPTY_MAP, entity);
    }

    /** Inserts the given number of test documents into Elasticsearch. */
    static void insertTestDocuments(ConnectionConfiguration connectionConfiguration, long numDocs,
            RestClient restClient) throws IOException {
        List<String> data = ElasticSearchIOTestUtils.createDocuments(numDocs,
                ElasticSearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
        StringBuilder bulkRequest = new StringBuilder();
        int i = 0;
        for (String document : data) {
            bulkRequest.append(String.format(
                    "{ \"index\" : { \"_index\" : \"%s\", \"_type\" : \"%s\", \"_id\" : \"%s\" } }%n%s%n",
                    connectionConfiguration.getIndex(), connectionConfiguration.getType(), i++, document));
        }
        String endPoint = String.format("/%s/%s/_bulk", connectionConfiguration.getIndex(),
                connectionConfiguration.getType());
        HttpEntity requestBody = new NStringEntity(bulkRequest.toString(), ContentType.APPLICATION_JSON);
        Response response = restClient.performRequest("POST", endPoint, Collections.singletonMap("refresh", "true"),
                requestBody);
        ElasticsearchIO.checkForErrors(response, ElasticsearchIO.getBackendVersion(connectionConfiguration));
    }

    /**
     * Forces a refresh of the given index to make recently inserted documents available for search
     * using the index and type named in the connectionConfiguration.
     *
     * @param connectionConfiguration providing the index and type
     * @param restClient To use for issuing queries
     * @return The number of docs in the index
     * @throws IOException On error communicating with Elasticsearch
     */
    static long refreshIndexAndGetCurrentNumDocs(ConnectionConfiguration connectionConfiguration,
            RestClient restClient) throws IOException {
        return refreshIndexAndGetCurrentNumDocs(restClient, connectionConfiguration.getIndex(),
                connectionConfiguration.getType());
    }

    /**
     * Forces a refresh of the given index to make recently inserted documents available for search.
     *
     * @param restClient To use for issuing queries
     * @param index The Elasticsearch index
     * @param type The Elasticsearch type
     * @return The number of docs in the index
     * @throws IOException On error communicating with Elasticsearch
     */
    static long refreshIndexAndGetCurrentNumDocs(RestClient restClient, String index, String type)
            throws IOException {
        long result = 0;
        try {
            String endPoint = String.format("/%s/_refresh", index);
            restClient.performRequest("POST", endPoint);

            endPoint = String.format("/%s/%s/_search", index, type);
            Response response = restClient.performRequest("GET", endPoint);
            JsonNode searchResult = ElasticsearchIO.parseResponse(response);
            result = searchResult.path("hits").path("total").asLong();
        } catch (IOException e) {
            // it is fine to ignore bellow exceptions because in testWriteWithBatchSize* sometimes,
            // we call upgrade before any doc have been written
            // (when there are fewer docs processed than batchSize).
            // In that cases index/type has not been created (created upon first doc insertion)
            if (!e.getMessage().contains("index_not_found_exception")) {
                throw e;
            }
        }
        return result;
    }

    /**
     * Generates a list of test documents for insertion.
     *
     * @param numDocs Number of docs to generate
     * @param injectionMode {@link InjectionMode} that specifies whether to insert malformed documents
     * @return the list of json String representing the documents
     */
    static List<String> createDocuments(long numDocs, InjectionMode injectionMode) {

        ArrayList<String> data = new ArrayList<>();
        for (int i = 0; i < numDocs; i++) {
            int index = i % FAMOUS_SCIENTISTS.length;
            // insert 2 malformed documents
            if (InjectionMode.INJECT_SOME_INVALID_DOCS.equals(injectionMode) && (i == 6 || i == 7)) {
                data.add(String.format("{\"scientist\";\"%s\", \"id\":%s}", FAMOUS_SCIENTISTS[index], i));
            } else {
                data.add(String.format("{\"scientist\":\"%s\", \"id\":%s}", FAMOUS_SCIENTISTS[index], i));
            }
        }
        return data;
    }

    /**
     * Executes a query for the named scientist and returns the count from the result.
     *
     * @param connectionConfiguration Specifies the index and type
     * @param restClient To use to execute the call
     * @param scientistName The scientist to query for
     * @return The cound of documents found
     * @throws IOException On error talking to Elasticsearch
     */
    static int countByScientistName(ConnectionConfiguration connectionConfiguration, RestClient restClient,
            String scientistName) throws IOException {
        return countByMatch(connectionConfiguration, restClient, "scientist", scientistName);
    }

    /**
     * Executes a match query for given field/value and returns the count of results.
     *
     * @param connectionConfiguration Specifies the index and type
     * @param restClient To use to execute the call
     * @param field The field to query
     * @param value The value to match
     * @return The count of documents in the search result
     * @throws IOException On error communicating with Elasticsearch
     */
    static int countByMatch(ConnectionConfiguration connectionConfiguration, RestClient restClient, String field,
            String value) throws IOException {
        String requestBody = "{\n" + "  \"query\" : {\"match\": {\n" + "    \"" + field + "\": \"" + value + "\"\n"
                + "  }}\n" + "}\n";
        String endPoint = String.format("/%s/%s/_search", connectionConfiguration.getIndex(),
                connectionConfiguration.getType());
        HttpEntity httpEntity = new NStringEntity(requestBody, ContentType.APPLICATION_JSON);
        Response response = restClient.performRequest("GET", endPoint, Collections.emptyMap(), httpEntity);
        JsonNode searchResult = parseResponse(response);
        return searchResult.path("hits").path("total").asInt();
    }
}