org.apache.tajo.storage.elasticsearch.ElasticsearchScanner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tajo.storage.elasticsearch.ElasticsearchScanner.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tajo.storage.elasticsearch;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import net.minidev.json.JSONObject;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.storage.*;
import org.apache.tajo.storage.fragment.Fragment;
import org.apache.tajo.storage.json.JsonLineDeserializer;
import org.apache.tajo.storage.text.TextLineDeserializer;
import org.apache.tajo.storage.text.TextLineParsingError;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;

public class ElasticsearchScanner implements Scanner {
    private static final Log LOG = LogFactory.getLog(ElasticsearchScanner.class);

    private TajoConf conf;
    private Schema schema;
    private TableMeta tableMeta;
    private ElasticsearchFragment fragment;
    private ElasticsearchWithOptionInfo optionInfo;
    private TableStats tableStats;
    private Client client;

    protected boolean inited = false;
    private AtomicBoolean finished = new AtomicBoolean(false);
    private float progress = 0.0f;

    private Column[] columns;
    private int[] columnIdxs;
    private TextLineDeserializer deserializer;
    private List<JSONObject> docs = null;
    private int docCount = 0;
    private boolean scanFlag = true;

    public ElasticsearchScanner(Configuration conf, Schema schema, TableMeta tableMeta, Fragment fragment)
            throws IOException {
        this.conf = (TajoConf) conf;
        this.schema = schema;
        this.tableMeta = tableMeta;
        this.fragment = (ElasticsearchFragment) fragment;
        this.tableStats = new TableStats();
        this.optionInfo = new ElasticsearchWithOptionInfo(tableMeta);
        this.client = null;
    }

    @Override
    public void init() throws IOException {
        docs = new ArrayList<JSONObject>();
        inited = true;

        if (columns == null) {
            columns = schema.toArray();
        }
        //
        //    columnIdxs = new int[columns.length];
        //
        //    for (int i = 0; i < columns.length; i++) {
        //      columnIdxs[i] = schema.getColumnId(columns[i].getQualifiedName());
        //    }
        //
        //    Arrays.sort(columnIdxs);

        deserializer = new JsonLineDeserializer(schema, tableMeta, columns);
        deserializer.init();

        documentScanner();

    }

    public void documentScanner() {
        String indexName = fragment.getIndexName();
        String indexType = fragment.getIndexType();
        int offset = fragment.getOffset();
        int fetchSize = fragment.getFetchSize();
        int shardId = fragment.getShardId();

        try {
            if (client == null) {
                client = ((ElasticsearchStorageManager) StorageManager.getStorageManager(conf, "ELASTICSEARCH"))
                        .getClient(optionInfo);
            }

            ConstantScoreQueryBuilder constantScoreQueryBuilder = QueryBuilders
                    .constantScoreQuery(new MatchAllQueryBuilder());

            SearchResponse res = client.prepareSearch(optionInfo.index()).setTypes(optionInfo.type())
                    .setSearchType(SearchType.DFS_QUERY_AND_FETCH)
                    .setPreference("_shards:" + shardId + ";_primary_first").setQuery(constantScoreQueryBuilder) // if it can receive a qual, query have to generate to query dsl.
                    .setFrom(offset).setSize(fetchSize).execute().actionGet(optionInfo.timeAction());

            for (SearchHit doc : res.getHits().getHits()) { // we replace to tajo data format which is a depth json line.
                doc.sourceAsMap().put(ElasticsearchConstants.GLOBAL_FIELDS_TYPE, doc.getType());
                doc.sourceAsMap().put(ElasticsearchConstants.GLOBAL_FIELDS_SCORE, doc.getScore());
                doc.sourceAsMap().put(ElasticsearchConstants.GLOBAL_FIELDS_ID, doc.getId());

                docs.add(new JSONObject(doc.sourceAsMap()));
            }
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
        } finally {
        }
    }

    public void setClient(Client client) {
        this.client = client;
    }

    @Override
    public Tuple next() throws IOException {
        if (finished.get()) {
            return null;
        }

        if (null == docs) {
            return null;
        }

        if (docs.size() <= docCount) {
            finished.set(true);
            progress = 1.0f;

            return null;
        }

        VTuple tuple;
        JSONObject doc = docs.get(docCount);
        ByteBuf buf = Unpooled.wrappedBuffer(doc.toString().getBytes(ElasticsearchConstants.CHARSET));

        if (buf == null) {
            return null;
        }

        if (columns.length == 0) {
            docCount++;
            return EmptyTuple.get();
        }

        tuple = new VTuple(schema.size());

        try {
            deserializer.deserialize(buf, tuple);
        } catch (TextLineParsingError tae) {
            throw new IOException(tae);
        } finally {
            docCount++;
        }

        return tuple;
    }

    @Override
    public void reset() throws IOException {
        progress = 0.0f;
        docCount = 0;
        finished.set(false);
    }

    @Override
    public void close() throws IOException {
        progress = 1.0f;
        finished.set(true);
        docs = null;

        if (tableStats != null) {
            tableStats.setNumRows(docCount);
        }
    }

    @Override
    public boolean isProjectable() {
        return true;
    }

    @Override
    public void setTarget(Column[] targets) {
        if (inited) {
            throw new IllegalStateException("Should be called before init()");
        }

        this.columns = targets;
    }

    @Override
    public boolean isSelectable() {
        return false;
    }

    @Override
    public void setSearchCondition(Object expr) {

    }

    @Override
    public boolean isSplittable() {
        return false;
    }

    @Override
    public float getProgress() {
        return 0;
    }

    @Override
    public TableStats getInputStats() {
        if (tableStats != null) {
            tableStats.setNumRows(docCount);
        }

        return tableStats;
    }

    @Override
    public Schema getSchema() {
        return null;
    }
}