de.saly.elasticsearch.importer.imap.maildestination.ElasticsearchMailDestination.java Source code

Java tutorial

Introduction

Here is the source code for de.saly.elasticsearch.importer.imap.maildestination.ElasticsearchMailDestination.java

Source

/***********************************************************************************************************************
 *
 * Elasticsearch IMAP/Pop3 E-Mail Importer
 * ==========================================
 *
 * Copyright (C) 2014 by Hendrik Saly (http://saly.de) and others.
 * 
 * Contains (partially) copied code from Jrg Prante's Elasticsearch JDBC river (https://github.com/jprante/elasticsearch-river-jdbc)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * $Id:$
 *
 **********************************************************************************************************************/
package de.saly.elasticsearch.importer.imap.maildestination;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.MessagingException;

import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.get.GetField;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;

import com.sun.mail.imap.IMAPFolder;
import com.sun.mail.pop3.POP3Folder;

import de.saly.elasticsearch.importer.imap.impl.IMAPImporter;
import de.saly.elasticsearch.importer.imap.support.DeleteByQuery;
import de.saly.elasticsearch.importer.imap.support.IndexableMailMessage;

public class ElasticsearchMailDestination implements MailDestination {

    private Client client;

    private volatile boolean closed;

    private volatile boolean error;

    private String index;

    private Map<String, Object> mapping;

    private Map<String, Object> settings;

    private volatile boolean started;

    private volatile boolean initialized;

    private boolean stripTagsFromTextContent = true;

    private String type;

    private boolean withAttachments = false;

    private boolean withTextContent = true;

    private boolean withHtmlContent = false;

    private boolean preferHtmlContent = false;

    private List<String> headersToFields;

    protected final ESLogger logger = ESLoggerFactory.getLogger(this.getClass().getName());

    @Override
    public void clearDataForFolder(final Folder folder) throws IOException, MessagingException {

        logger.info("Delete locally all messages for folder {} in {}/{}", folder.getURLName().toString(), index,
                type);

        createIndexIfNotExists();

        client.admin().indices().refresh(new RefreshRequest()).actionGet();

        DeleteByQuery.deleteByQuery(client, index, new String[] { type },
                QueryBuilders.termQuery("folderUri", folder.getURLName().toString()));

    }

    public ElasticsearchMailDestination client(final Client client) {
        this.client = client;
        return this;
    }

    @Override
    public synchronized void close() {

        if (closed) {
            return;
        }

        closed = true;

        logger.info("Closed");
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    @Override
    public Set getCurrentlyStoredMessageUids(final Folder folder) throws IOException, MessagingException {

        createIndexIfNotExists();

        client.admin().indices().refresh(new RefreshRequest()).actionGet();

        final Set uids = new HashSet();

        final TermQueryBuilder b = QueryBuilders.termQuery("folderUri", folder.getURLName().toString());

        logger.debug("Term query: " + b.buildAsBytes().toUtf8());

        SearchResponse scrollResp = client.prepareSearch().setIndices(index).setTypes(type)
                .setSearchType(SearchType.SCAN).setQuery(b).setScroll(new TimeValue(1000)).setSize(1000).execute()
                .actionGet();

        while (true) {
            scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(1000))
                    .execute().actionGet();
            boolean hitsRead = false;
            for (final SearchHit hit : scrollResp.getHits()) {
                hitsRead = true;

                if (folder instanceof IMAPFolder) {
                    uids.add(Long.parseLong(hit.getId().split("::")[0]));
                } else {
                    uids.add(hit.getId().split("::")[0]);
                }

                logger.debug("Local: " + hit.getId());
            }
            if (!hitsRead) {
                break;
            }
        }

        logger.debug("Currently locally stored messages for folder {}: {}", folder.getURLName(), uids.size());

        return uids;

    }

    @Override
    public int getFlaghashcode(final String id) throws IOException, MessagingException {

        createIndexIfNotExists();

        client.admin().indices().refresh(new RefreshRequest()).actionGet();

        final GetResponse getResponse = client.prepareGet().setIndex(index).setType(type).setId(id)
                .setFields(new String[] { "flaghashcode" }).execute().actionGet();

        if (getResponse == null || !getResponse.isExists()) {
            return -1;
        }

        final GetField flaghashcodeField = getResponse.getField("flaghashcode");

        if (flaghashcodeField == null || flaghashcodeField.getValue() == null
                || !(flaghashcodeField.getValue() instanceof Number)) {
            throw new IOException("No flaghashcode field for id " + id + " ("
                    + (flaghashcodeField == null ? "null" : "Val: " + flaghashcodeField.getValue()) + ")");
        }

        return ((Number) flaghashcodeField.getValue()).intValue();

    }

    @Override
    public Set<String> getFolderNames() throws IOException, MessagingException {

        createIndexIfNotExists();

        client.admin().indices().refresh(new RefreshRequest()).actionGet();

        final HashSet<String> uids = new HashSet<String>();

        SearchResponse scrollResp = client.prepareSearch().setIndices(index).setTypes(type)
                .setSearchType(SearchType.SCAN).setQuery(QueryBuilders.matchAllQuery()).addField("folderFullName")
                .setScroll(new TimeValue(1000)).setSize(1000).execute().actionGet();

        while (true) {
            scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(1000))
                    .execute().actionGet();
            boolean hitsRead = false;
            for (final SearchHit hit : scrollResp.getHits()) {
                hitsRead = true;
                uids.add((String) hit.getFields().get("folderFullName").getValue());

            }
            if (!hitsRead) {
                break;
            }
        }

        if (logger.isDebugEnabled()) {
            logger.debug("Currently locally stored folders: {}", uids);
        }

        return uids;

    }

    public List<String> getHeadersToFields() {
        return headersToFields;
    }

    public boolean isStripTagsFromTextContent() {
        return stripTagsFromTextContent;
    }

    public boolean isWithAttachments() {
        return withAttachments;
    }

    public boolean isWithTextContent() {
        return withTextContent;
    }

    public boolean isWithHtmlContent() {
        return withHtmlContent;
    }

    public boolean isPreferHtmlContent() {
        return preferHtmlContent;
    }

    @Override
    public void onMessage(final Message msg) throws IOException, MessagingException {
        if (closed) {
            if (logger.isTraceEnabled()) {
                logger.trace("Is closed, will not index");
            }
            return;
        }

        if (isError()) {
            if (logger.isTraceEnabled()) {
                logger.trace("error, not indexing");
            }
            return;
        }

        createIndexIfNotExists();

        final IndexableMailMessage imsg = IndexableMailMessage.fromJavaMailMessage(msg, withTextContent,
                withHtmlContent, preferHtmlContent, withAttachments, stripTagsFromTextContent, headersToFields);

        if (logger.isTraceEnabled()) {
            logger.trace("Process mail " + imsg.getUid() + "/" + imsg.getPopId() + " :: " + imsg.getSubject() + "/"
                    + imsg.getSentDate());
        }

        client.index(createIndexRequest(imsg)).actionGet();

    }

    @SuppressWarnings("rawtypes")
    @Override
    public void onMessageDeletes(final Set msgs, final Folder folder) throws IOException, MessagingException {

        if (msgs.size() == 0) {
            return;
        }

        createIndexIfNotExists();

        client.admin().indices().refresh(new RefreshRequest()).actionGet();

        logger.info(
                "Will delete " + msgs.size() + " messages locally for folder " + folder.getURLName().toString());

        final BoolQueryBuilder query = new BoolQueryBuilder();

        if (folder instanceof POP3Folder) {
            query.must(QueryBuilders.termsQuery("popId", msgs));
        } else {
            query.must(QueryBuilders.termsQuery("uid", msgs));
        }

        query.must(QueryBuilders.termQuery("folderUri", folder.getURLName().toString()));

        DeleteByQuery.deleteByQuery(client, index, new String[] { type }, query);

    }

    public ElasticsearchMailDestination setIndex(final String index) {
        this.index = index;
        return this;
    }

    public ElasticsearchMailDestination setMapping(final Map<String, Object> mapping) {
        this.mapping = mapping;
        return this;
    }

    public ElasticsearchMailDestination setSettings(final Map<String, Object> settings) {
        this.settings = settings;
        return this;
    }

    public ElasticsearchMailDestination setStripTagsFromTextContent(final boolean stripTagsFromTextContent) {
        this.stripTagsFromTextContent = stripTagsFromTextContent;
        return this;
    }

    public ElasticsearchMailDestination setType(final String type) {
        this.type = type;
        return this;
    }

    public ElasticsearchMailDestination setWithAttachments(final boolean withAttachments) {
        this.withAttachments = withAttachments;
        return this;
    }

    public ElasticsearchMailDestination setWithTextContent(final boolean withTextContent) {
        this.withTextContent = withTextContent;
        return this;
    }

    public ElasticsearchMailDestination setWithHtmlContent(final boolean withHtmlContent) {
        this.withHtmlContent = withHtmlContent;
        return this;
    }

    public ElasticsearchMailDestination setPreferHtmlContent(final boolean preferHtmlContent) {
        this.preferHtmlContent = preferHtmlContent;
        return this;
    }

    public MailDestination setHeadersToFields(List<String> headersToFields) {
        this.headersToFields = headersToFields;
        return this;
    }

    @Override
    public synchronized ElasticsearchMailDestination startup() throws IOException {

        if (started) {
            logger.debug("Destination already started");
            return this;
        }
        started = true;
        logger.debug("Destination started");
        return this;
    }

    private synchronized void createIndexIfNotExists() throws IOException {
        if (isError()) {
            if (logger.isTraceEnabled()) {
                logger.trace("error, not creating index");
            }
            return;
        }

        if (initialized) {
            return;
        }

        IMAPImporter.waitForYellowCluster(client);

        // create index if it doesn't already exist
        if (!client.admin().indices().prepareExists(index).execute().actionGet().isExists()) {

            final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin().indices()
                    .prepareCreate(index);
            if (settings != null) {
                logger.debug("index settings are provided, will apply them {}", settings);
                createIndexRequestBuilder.setSettings(settings);
            } else {
                logger.debug("no settings given for index '{}'", index);
            }

            if (mapping != null) {
                logger.warn("mapping for type '{}' is provided, will apply {}", type, mapping);
                createIndexRequestBuilder.addMapping(type, mapping);
            } else {
                logger.debug("no mapping given for type '{}', will apply default mapping", type);
                createIndexRequestBuilder.addMapping(type, getDefaultTypeMapping());
            }

            final CreateIndexResponse res = createIndexRequestBuilder.get();

            if (!res.isAcknowledged()) {
                throw new IOException("Could not create index " + index);
            }

            IMAPImporter.waitForYellowCluster(client);

            logger.info("Index {} created", index);

        } else {
            logger.debug("Index {} already exists", index);
        }

        initialized = true;
    }

    private XContentBuilder getDefaultTypeMapping() throws IOException {

        final XContentBuilder mappingBuilder = jsonBuilder().startObject().startObject(type)
                .startObject("properties").startObject("folderFullName").field("index", "not_analyzed")
                .field("type", "string").endObject().startObject("folderUri").field("index", "not_analyzed")
                .field("type", "string").endObject().startObject("contentType").field("index", "not_analyzed")
                .field("type", "string").endObject().startObject("receivedDate").field("type", "date")
                .field("format", "basic_date_time").endObject().startObject("sentDate").field("type", "date")
                .field("format", "basic_date_time").endObject().startObject("flaghashcode").field("type", "integer")
                .endObject();

        /* "attachments":{
        "properties":{
           "content":{
              "type":"attachment",
              "fields":{
                 "content":{
                    "store": true,
                    "index": "analyzed"
                 },
                 "title" : {"store" : "yes"},
                 "content_type" : {"store" : "yes"}
              }
           }
        }
          }*/

        if (withAttachments) {
            logger.info("Configure Attachments Mapper Plugin");
            mappingBuilder.startObject("attachments").startObject("properties").startObject("content")
                    .field("type", "attachment").startObject("fields").startObject("content").field("store", true)
                    .field("index", "analyzed").endObject().startObject("title").field("store", true).endObject()
                    .startObject("content_type").field("store", true).endObject().endObject().endObject()
                    .endObject().endObject();
        }

        // .startObject("attachments").startObject("properties").startObject("content").field("type",
        // "attachment").endObject().endObject().endObject()
        mappingBuilder.endObject().endObject().endObject();

        return mappingBuilder;

    }

    protected IndexRequest createIndexRequest(final IndexableMailMessage message) throws IOException {

        final String id = (!StringUtils.isEmpty(message.getPopId()) ? message.getPopId() : message.getUid()) + "::"
                + message.getFolderUri();

        //if(logger.isTraceEnabled()) {
        //   logger.trace("Message: "+message.build());
        //}

        final IndexRequest request = Requests.indexRequest(index).type(type).id(id).source(message.build());

        return request;

    }

    protected Client getClient() {
        return client;
    }

    protected synchronized boolean isClosed() {
        return closed;
    }

    protected synchronized boolean isError() {
        return error;
    }

    protected void setError(final boolean error) {
        this.error = error;
    }

}