com.shmsoft.dmass.data.index.SolrIndex.java Source code

Java tutorial

Introduction

Here is the source code for com.shmsoft.dmass.data.index.SolrIndex.java

Source

/*
 *
 * Copyright SHMsoft, Inc. 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/
package com.shmsoft.dmass.data.index;

import java.util.concurrent.atomic.AtomicLong;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.apache.tika.metadata.Metadata;
import com.shmsoft.dmass.services.History;
import com.shmsoft.dmass.services.Project;
import com.shmsoft.dmass.services.Settings;

/**
 * 
 * Create Solr index.
 * 
 * Currently implement only creation via HTTP.
 * 
 * @author ivanl
 *
 */
public abstract class SolrIndex implements ComponentLifecycle {
    private static final String SOLR_INSTANCE_DIR = "shmcloud";

    private static SolrIndex __instance;
    protected boolean supportMultipleProjects = true;
    protected boolean supportSolrCloud = false;
    protected String checkedSolrCloudEndpoint = null;
    protected boolean isInited = false;

    public static synchronized SolrIndex getInstance() {
        if (__instance == null) {
            if (Project.getProject().isSendIndexToSolrEnabled()) {
                System.out.println("SolrIndex Create HttpSolrIndex");
                __instance = new HttpSolrIndex();
            } else {
                System.out.println("SolrIndex Create DisabledSolrIndex");
                __instance = new DisabledSolrIndex();
            }
        }

        return __instance;
    }

    public abstract void addData(Metadata metadata);

    public abstract void addBatchData(Metadata metadata);

    public abstract void flushBatchData();

    @Override
    public abstract void init();

    @Override
    public void destroy() {
        synchronized (SolrIndex.class) {
            __instance = null;
        }
    }

    public boolean isSolrCloud() throws SolrException {
        String endpoint = getSolrEndpoint();
        if (checkedSolrCloudEndpoint == null || checkedSolrCloudEndpoint.equals(endpoint) == false) {
            checkedSolrCloudEndpoint = endpoint;
            HttpClient httpClient = new DefaultHttpClient();

            String command = checkedSolrCloudEndpoint
                    + "/solr/zookeeper?wt=json&detail=false&path=%2Fclusterstate.json";
            try {
                HttpGet request = new HttpGet(command);
                HttpResponse response = httpClient.execute(request);
                if (response.getStatusLine().getStatusCode() == 200) {
                    supportSolrCloud = true;
                } else {
                    supportSolrCloud = false;
                }
            } catch (Exception ex) {
                supportSolrCloud = false;
                checkedSolrCloudEndpoint = null;
                throw new SolrException("Problem Checking SolrCloud Status", ex);
            }
        }

        return supportSolrCloud;
    }

    protected void sendPostCommand(String point, String param) throws SolrException {
        HttpClient httpClient = new DefaultHttpClient();

        try {
            HttpPost request = new HttpPost(point);
            StringEntity params = new StringEntity(param, HTTP.UTF_8);
            params.setContentType("text/xml");

            request.setEntity(params);

            HttpResponse response = httpClient.execute(request);
            if (response.getStatusLine().getStatusCode() != 200) {
                History.appendToHistory("Solr Invalid Response: " + response.getStatusLine().getStatusCode());
            }

        } catch (Exception ex) {
            throw new SolrException("Problem sending request", ex);
        }
    }

    protected void sendGetCommand(String command) throws SolrException {
        HttpClient httpClient = new DefaultHttpClient();

        try {
            HttpGet request = new HttpGet(command);
            HttpResponse response = httpClient.execute(request);
            if (response.getStatusLine().getStatusCode() != 200) {
                History.appendToHistory("Solr Invalid Response: " + response.getStatusLine().getStatusCode());
                throw new SolrException("Invalid response");
            }
        } catch (Exception ex) {
            throw new SolrException("Problem sending request", ex);
        }
    }

    private static final class HttpSolrIndex extends SolrIndex {
        private static AtomicLong solrId = new AtomicLong(0);
        private String updateUrl;
        protected StringBuffer batchBuffer = new StringBuffer(1024 * 1024);

        @Override
        public synchronized void addBatchData(Metadata metadata) {

            Settings settings = Settings.getSettings();
            batchBuffer.append("<doc>");
            if (settings.containsKey("mapred.task.id")) {
                String[] idParts = settings.getProperty("mapred.task.id").split("_");
                String taskId = idParts[idParts.length - 2];
                batchBuffer.append("<field name=\"id\">SOLRID_" + taskId + "_");
            } else {
                batchBuffer.append("<field name=\"id\">SOLRID_");
            }
            String projectCode = Project.getProject().getProjectCode();
            batchBuffer.append(projectCode + "_");
            batchBuffer.append(solrId.incrementAndGet());
            batchBuffer.append("</field>");

            String[] metadataNames = metadata.names();
            for (String name : metadataNames) {
                String data = metadata.get(name);
                batchBuffer.append("<field name=\"");
                batchBuffer.append(name);
                batchBuffer.append("\">");
                batchBuffer.append("<![CDATA[");
                batchBuffer.append(filterNotCorrectCharacters(data));
                batchBuffer.append("]]></field>");
            }

            batchBuffer.append("</doc>");

            if (batchBuffer.length() > .9 * 1024 * 1024) {
                flushBatchData();
            }
        }

        @Override
        public synchronized void flushBatchData() {
            try {
                if (batchBuffer.length() > 0) {
                    if (updateUrl == null) {
                        if (isInited) {
                            System.err.println("No updateUrl set");
                            batchBuffer.delete(0, batchBuffer.length());
                            return;
                        }
                        resetUpdateUrl();
                    }
                    sendPostCommand(updateUrl, "<add>" + batchBuffer.toString() + "</add>");
                    batchBuffer.delete(0, batchBuffer.length());
                    sendPostCommand(updateUrl, "<commit/>");
                }

            } catch (SolrException e) {
                e.printStackTrace();
            }
        }

        @Override
        public void addData(Metadata metadata) {
            if (updateUrl == null) {
                if (isInited) {
                    System.err.println("No updateUrl set");
                    return;
                }
                resetUpdateUrl();
            }

            try {
                Settings settings = Settings.getSettings();
                StringBuffer param = new StringBuffer();
                param.append("<add>");
                param.append("<doc>");
                if (settings.containsKey("mapred.task.id")) {
                    String[] idParts = settings.getProperty("mapred.task.id").split("_");
                    String taskId = idParts[idParts.length - 2];
                    param.append("<field name=\"id\">SOLRID_" + taskId + "_");
                } else {
                    param.append("<field name=\"id\">SOLRID_");
                }
                String projectCode = Project.getProject().getProjectCode();
                param.append(projectCode + "_");
                param.append(solrId.incrementAndGet());
                param.append("</field>");

                String[] metadataNames = metadata.names();
                for (String name : metadataNames) {
                    String data = metadata.get(name);
                    param.append("<field name=\"");
                    param.append(name);
                    param.append("\">");
                    param.append("<![CDATA[");
                    param.append(filterNotCorrectCharacters(data));
                    param.append("]]></field>");
                }

                param.append("</doc></add>");

                sendPostCommand(updateUrl, param.toString());
                sendPostCommand(updateUrl, "<commit/>");
            } catch (SolrException e) {
                e.printStackTrace();
            }
        }

        private String filterNotCorrectCharacters(String data) {
            return data.replaceAll("[\\x00-\\x09\\x11\\x12\\x14-\\x1F\\x7F]", "");
        }

        @Override
        public void init() {
            isInited = true;
            String command = null;
            String projectCode = Project.getProject().getProjectCode();
            String projectName = Project.getProject().getProjectName();
            try {
                String endpoint = getSolrEndpoint();

                if (isSolrCloud()) {
                    Settings settings = Settings.getSettings();

                    command = endpoint + "solr/admin/collections?action=CREATE&name=" + SOLR_INSTANCE_DIR + "_"
                            + projectCode + "&collection.configName=" + SOLR_INSTANCE_DIR + "&numShards="
                            + settings.getSolrCloudShardCount() + "&replicationFactor="
                            + settings.getSolrCloudReplicaCount() + "&maxShardsPerNode="
                            + settings.getSolrCloudReplicaCount();
                    try {
                        sendGetCommand(command);
                    } catch (Exception ex) {
                        History.appendToHistory(
                                "Unable to create Collection: " + SOLR_INSTANCE_DIR + "_" + projectCode);
                        History.appendToHistory("Collection command: " + command);
                    }

                    command = endpoint + "solr/admin/collections?action=CREATEALIAS&name="
                            + projectName.replaceAll("[^A-Za-z0-9]", "_") + "_" + projectCode + "&collections="
                            + SOLR_INSTANCE_DIR + "_" + projectCode;
                    sendGetCommand(command);

                    this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update";
                } else if (supportMultipleProjects) {
                    command = endpoint + "solr/admin/cores?action=CREATE&name=" + SOLR_INSTANCE_DIR + "_"
                            + projectCode + "&instanceDir=" + SOLR_INSTANCE_DIR
                            + "&config=solrconfig.xml&dataDir=data_" + projectCode + "&schema=schema.xml";
                    try {
                        sendGetCommand(command);
                    } catch (Exception ex) {
                        History.appendToHistory("Unable to create Core: " + SOLR_INSTANCE_DIR + "_" + projectCode);
                        History.appendToHistory("Core command: " + command);
                    }

                    this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update";
                } else {
                    sendGetCommand(endpoint + "solr/admin/ping");

                    this.updateUrl = endpoint + "solr/update";
                }

                String deleteAll = "<delete><query>id:[*TO *]</query></delete>";
                sendPostCommand(updateUrl, deleteAll);
                sendPostCommand(updateUrl, "<commit/>");
            } catch (SolrException se) {
                History.appendToHistory("Problem with SOLR init: " + se.getMessage());
                //se.printStackTrace();
            }
        }

        protected void resetUpdateUrl() {
            String command = null;
            String projectCode = Project.getProject().getProjectCode();
            String projectName = Project.getProject().getProjectName();
            try {
                String endpoint = getSolrEndpoint();

                if (isSolrCloud()) {
                    Settings settings = Settings.getSettings();
                    this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update";
                } else if (supportMultipleProjects) {
                    this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update";
                } else {
                    this.updateUrl = endpoint + "solr/update";
                }
            } catch (SolrException se) {
                History.appendToHistory("Problem with SOLR resetUpdateUrl: " + se.getMessage());
                //se.printStackTrace();
            }
        }

    }

    protected String getSolrEndpoint() throws SolrException {
        String endpoint = Settings.getSettings().getSolrEndpoint();

        if (endpoint == null || endpoint.length() == 0) {
            throw new SolrException("Endpoint not configured");
        }

        if (endpoint.endsWith("/")) {
            return endpoint;
        }

        return endpoint + "/";
    }

    private static final class DisabledSolrIndex extends SolrIndex {

        @Override
        public void addData(Metadata metadata) {
            //do nothing
        }

        @Override
        public void addBatchData(Metadata metadata) {
            //do nothing
        }

        @Override
        public void flushBatchData() {
            //do nothing
        }

        @Override
        public void init() {
            //do nothing
        }
    }

    private static final class SolrException extends Exception {
        private static final long serialVersionUID = 5904372392164798773L;

        public SolrException(String message) {
            super(message);
        }

        public SolrException(String message, Exception e) {
            super(message, e);
        }
    }
}