Source code

Java tutorial


Here is the source code for


 * Copyright 2010 Karthik Kumar
 * Based off the original code by Lucandra project, (C): Jake Luciani
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.hbasene.index;

import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.NavigableMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.TermVectorMapper;

 * Index Reader specific to HBase
public class HBaseIndexReader extends IndexReader {

    private static final Log LOG = LogFactory.getLog(HBaseIndexReader.class);

     * The indexName represents the tableName as well.
    private final String indexName;

     * HTable is not thread-safe, but HTable caches the region locations within.
     * Rudimentary implementation of a table pool used.
    private final HTablePool tablePool;

     * The default norm as per the given field.
    static final byte DEFAULT_NORM = DefaultSimilarity.encodeNorm(1.0f);

     * Encoder of termPositions
    private final AbstractTermPositionsEncoder termPositionEncoder = new AlphaTermPositionsEncoder();
    //TODO: Use HBaseIndexStore to do this 

     * Primary Key Field
    private final byte[] primaryKeyField;

     * @param tablePool
     *    TablePool to be used by the index reader
     * @param indexName
     *    Name of the index to be read from.  
    public HBaseIndexReader(final HTablePool tablePool, final String indexName, final String primaryKeyField) {
        this.tablePool = tablePool;
        this.indexName = indexName;
        this.primaryKeyField = Bytes.toBytes(primaryKeyField);

    protected void doClose() throws IOException {
        // TODO: HBASE-2435 in place.
        // this.tablePool.closeTablePool(this.indexName);
        // for (int i = 0; i < this.tablePoolSize; ++i) {
        // this.tablePool.getTable(this.indexName).close();
        // }

    protected void doCommit(Map<String, String> commitUserData) throws IOException {
        // TODO Auto-generated method stub


    protected void doDelete(int docNum) throws CorruptIndexException, IOException {
        // TODO Auto-generated method stub


    protected void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException {
        // TODO Auto-generated method stub


    protected void doUndeleteAll() throws CorruptIndexException, IOException {
        // TODO Auto-generated method stub


    public int docFreq(Term t) throws IOException {
        // same as in TermEnum. Avoid duplication.
        final String rowKey = t.field() + "/" + t.text();
        Get get = new Get(Bytes.toBytes(rowKey));
        HTableInterface table = this.getTablePool().getTable(this.indexName);
        try {
            Result result = table.get(get);
            if (result == null) {
                return 0;
            NavigableMap<byte[], byte[]> map = result.getFamilyMap(HBaseneConstants.FAMILY_TERMVECTOR);
            if (map == null) {
                return 0;
            return map.size();
        } finally {

    public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
        final long index = (long) n; // internally, all row keys are long.
        Document doc = null;
        HTableInterface table = this.getTablePool().getTable(this.indexName);
        try {
            Get get = new Get(Bytes.toBytes(index));
            get.addColumn(HBaseneConstants.FAMILY_FIELDS, this.primaryKeyField);
            doc = new Document();

            Result result = table.get(get);
            byte[] docId = result.getValue(HBaseneConstants.FAMILY_FIELDS, this.primaryKeyField);
            // TODO: Get the document schema, for the given document.
            // Change the HBaseIndexWriter appropriately to enable easy
            // reconstruction.

            // For now, only the id is available for assertion back.
            doc.add(new Field("id", Bytes.toString(docId), Field.Store.YES, Field.Index.NO));

        } finally {
        return doc;

    /* @Override
     public Collection<String> getFieldNames(FieldOption fldOption) {
       return Arrays.asList(new String[] {});

    public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {

        return null;

    public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
        // TODO Auto-generated method stub


    public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
        // TODO Auto-generated method stub


    public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
        // TODO Auto-generated method stub
        return null;

    public boolean hasDeletions() {
        // TODO Auto-generated method stub
        return false;

    public boolean isDeleted(int n) {
        // TODO Auto-generated method stub
        return false;

    public int maxDoc() {
        return this.numDocs() + 1;

    public byte[] norms(String field) throws IOException {
        byte[] result = new byte[this.maxDoc()];
        Arrays.fill(result, DEFAULT_NORM);
        return result;

    public void norms(String field, byte[] bytes, int offset) throws IOException {
        // TODO Auto-generated method stub


    public int numDocs() {
        HTableInterface table = this.getTablePool().getTable(this.indexName);
        int numDocs = 0;
        try {
            Get get = new Get(HBaseneConstants.ROW_SEQUENCE_ID);
            get.addColumn(HBaseneConstants.FAMILY_SEQUENCE, HBaseneConstants.QUALIFIER_SEQUENCE);
            Result result = table.get(get);
            numDocs = (int) Bytes
                    .toLong(result.getValue(HBaseneConstants.FAMILY_SEQUENCE, HBaseneConstants.QUALIFIER_SEQUENCE));
        } catch (IOException e) {
            LOG.warn("Error in numDocs() ", e);
        } finally {
            try {
            } catch (IOException e) {
        return numDocs;

    public TermDocs termDocs() throws IOException {
        return new HBaseTermPositions(this, this.termPositionEncoder);

    public TermPositions termPositions() throws IOException {
        return new HBaseTermPositions(this, this.termPositionEncoder);


    public TermEnum terms() throws IOException {
        return new HBaseTermEnum(this);

    public TermEnum terms(Term t) throws IOException {
        HBaseTermEnum termEnum = (HBaseTermEnum) terms();
        return termEnum;

     * Create a reference to HTable to the index under consideration.
     * @return
     * @throws IOException
    public HTablePool getTablePool() {
        return this.tablePool;

     * Return the index Name of the given table.
     * @return
    public String getIndexName() {
        return this.indexName;

    public FieldInfos getFieldInfos() {
        // TODO Auto-generated method stub
        return null;