com.stratio.ingestion.serializer.elasticsearch.ElasticSearchSerializerWithMapping.java Source code

Java tutorial

Introduction

Here is the source code for com.stratio.ingestion.serializer.elasticsearch.ElasticSearchSerializerWithMapping.java

Source

/**
 * Copyright (C) 2014 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.ingestion.serializer.elasticsearch;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Map;

import com.google.common.base.Charsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.sink.elasticsearch.ContentBuilderUtil;
import org.apache.flume.sink.elasticsearch.ElasticSearchIndexRequestBuilderFactory;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.io.BytesStream;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.joda.time.DateTime;
import org.joda.time.DateTimeUtils;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Throwables;
import com.google.common.collect.Maps;

public class ElasticSearchSerializerWithMapping implements ElasticSearchIndexRequestBuilderFactory {

    private static final Logger logger = LoggerFactory.getLogger(ElasticSearchSerializerWithMapping.class);

    private static final String CONF_MAPPING_FILE = "mappingFile";

    private String jsonMapping = "";
    private String oldIndexName = "";

    @Override
    public void configure(Context context) {
        String mappingFile = context.getString(CONF_MAPPING_FILE);
        try {
            jsonMapping = readFile(new File(mappingFile));
        } catch (IOException e) {
            Throwables.propagate(e);
        }
    }

    /**
     * Gets the content of the mapping file
     * 
     * @return The content of the file in UTF-8
     * @param file
     *            File with the mapping
     */
    private static final String readFile(File file) throws IOException {
        FileInputStream inputStream;
        inputStream = new FileInputStream(file);
        return IOUtils.toString(inputStream, "UTF-8");
    }

    @Override
    public void configure(ComponentConfiguration conf) {
    }

    @Override
    public IndexRequestBuilder createIndexRequest(Client client, String indexPrefix, String indexType, Event event)
            throws IOException {
        IndexRequestBuilder request = prepareIndex(client);
        TimestampedEvent timestampedEvent = new TimestampedEvent(event);
        long timestamp = timestampedEvent.getTimestamp();
        String indexName = getIndexName(indexPrefix, timestamp);

        if (!jsonMapping.isEmpty() && !oldIndexName.equals(indexName)) {
            oldIndexName = indexName;
            createIndexWithMapping(client, indexName, indexType);
        }

        prepareIndexRequest(request, indexName, indexType, timestampedEvent);
        return request;
    }

    /**
     * Creates the index if no exists with the mapping defined by the user
     * 
     * @param client
     *            ElasticSearch {@link Client}
     * @param indexName
     *            Index name to use -- as per
     *            {@link #getIndexName(String, long)}
     * @param indexType
     *            Index type to use -- as configured on the sink
     */
    private void createIndexWithMapping(Client client, String indexName, String indexType) {
        try {
            client.admin().indices().create(new CreateIndexRequest(indexName).mapping(indexType, jsonMapping))
                    .actionGet();
        } catch (IndexAlreadyExistsException e) {
            logger.info("The index " + indexName + " already exists");
        }
    }

    @VisibleForTesting
    IndexRequestBuilder prepareIndex(Client client) {
        return client.prepareIndex();
    }

    /**
     * Gets the name of the index to use for an index request
     * 
     * @return index name of the form 'indexPrefix-formattedTimestamp'
     * @param indexPrefix
     *            Prefix of index name to use -- as configured on the sink
     * @param timestamp
     *            timestamp (millis) to format / use
     */
    private String getIndexName(String indexPrefix, long timestamp) {
        return new StringBuilder(indexPrefix).append('-')
                .append(ElasticSearchIndexRequestBuilderFactory.df.format(timestamp)).toString();
    }

    /**
     * Prepares an ElasticSearch {@link IndexRequestBuilder} instance
     * 
     * @param indexRequest
     *            The (empty) ElasticSearch {@link IndexRequestBuilder} to
     *            prepare
     * @param indexName
     *            Index name to use -- as per
     *            {@link #getIndexName(String, long)}
     * @param indexType
     *            Index type to use -- as configured on the sink
     * @param event
     *            Flume event to serialize and add to index request
     * @throws IOException
     *             If an error occurs e.g. during serialization
     */
    private void prepareIndexRequest(IndexRequestBuilder indexRequest, String indexName, String indexType,
            Event event) throws IOException {
        BytesStream contentBuilder = getContentBuilder(event);
        indexRequest.setIndex(indexName).setType(indexType).setSource(contentBuilder.bytes());
        final String _id = event.getHeaders().get("_id");
        if (_id != null) {
            indexRequest.setId(_id);
        }
    }

    private BytesStream getContentBuilder(Event event) throws IOException {
        XContentBuilder builder = jsonBuilder().startObject();
        appendBody(builder, event);
        appendHeaders(builder, event);
        return builder;
    }

    private void appendBody(XContentBuilder builder, Event event) throws IOException, UnsupportedEncodingException {
        byte[] body = event.getBody();
        ContentBuilderUtil.appendField(builder, "@message", body);
    }

    private void appendHeaders(XContentBuilder builder, Event event) throws IOException {
        Map<String, String> headers = Maps.newHashMap(event.getHeaders());

        for (Map.Entry<String, String> entry : headers.entrySet()) {
            byte[] val = entry.getValue().getBytes(Charsets.UTF_8);
            ContentBuilderUtil.appendField(builder, entry.getKey(), val);
        }
    }

}

/**
 * {@link Event} implementation that has a timestamp. The timestamp is taken
 * from the "@timestamp" header or set to current time if "@timestamp" is not
 * present or is invalid.
 */
final class TimestampedEvent extends SimpleEvent {

    private static final Logger log = LoggerFactory.getLogger(TimestampedEvent.class);

    private final long timestamp;

    TimestampedEvent(Event base) {
        super();
        setBody(base.getBody());
        Map<String, String> headers = Maps.newHashMap(base.getHeaders());

        String timestampHeader = headers.get("@timestamp");
        Long ts = null;
        if (!StringUtils.isBlank(timestampHeader)) {
            try {
                ts = Long.parseLong(timestampHeader);
                headers.put("@timestamp", ISODateTimeFormat.dateTime().withZoneUTC().print(ts));
            } catch (RuntimeException ex) {
                log.trace("Could not parse timestamp as long: {}", timestampHeader);
                try {
                    ts = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC().parseMillis(timestampHeader);
                } catch (RuntimeException ex2) {
                    log.trace("Could not parse timestamp as dateOptionalTime: {}", timestampHeader);
                }
            }
        }

        if (ts == null) {
            DateTime now = DateTime.now();
            ts = now.getMillis();
            headers.put("@timestamp", ISODateTimeFormat.dateTime().withZoneUTC().print(now));
        }

        this.timestamp = ts;

        setHeaders(headers);
    }

    long getTimestamp() {
        return timestamp;
    }
}