org.solbase.lucenehbase.IndexWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.solbase.lucenehbase.IndexWriter.java

Source

/**
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.solbase.lucenehbase;

import java.io.IOException;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentSkipListMap;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.EmbeddedSortField;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.solbase.SolbaseUtil;
import org.solbase.common.SolbaseException;
import org.solbase.indexer.ParsedDoc;
import org.solbase.indexer.SolbaseIndexUtil;

public class IndexWriter {
    private static final Logger logger = Logger.getLogger(IndexWriter.class);

    private static final InheritableThreadLocal<String> indexName = new InheritableThreadLocal<String>();

    private static SolbaseIndexUtil indexUtil;

    private Similarity similarity = Similarity.getDefault(); // how to
    // normalize;
    // going to hold onto puts until later
    public List<Put> puts = new ArrayList<Put>();

    // private static final Logger logger = Logger.getLogger(IndexWriter.class);

    public IndexWriter() {

    }

    public IndexWriter(String indexName) {
        setIndexName(indexName);
    }

    public void setIndexUtil(SolbaseIndexUtil indexUtil) {
        this.indexUtil = indexUtil;
    }

    public void addDocument(Put documentPut, Document doc) {
        byte[] docId = documentPut.getRow();
        String uniqId = doc.get("global_uniq_id");

        if (uniqId != null && docId != null) {
            // for remote server update via solr update, we want to use
            // getDocTable(), but for now map/red can use local htable
            HTableInterface docTable = SolbaseUtil.getDocTable();
            // insert document to doctable
            try {
                documentPut.add(SolbaseUtil.timestampColumnFamilyName,
                        SolbaseUtil.tombstonedColumnFamilyQualifierBytes, Bytes.toBytes(0));
                docTable.put(documentPut);
            } catch (IOException e) {
                throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
            } finally {
                SolbaseUtil.releaseTable(docTable);
            }

            // need to insert to docKeyIdMap
            Put mapping = new Put(Bytes.toBytes(uniqId));
            mapping.add(Bytes.toBytes("docId"), Bytes.toBytes(""), SolbaseUtil.randomize(docId));
            mapping.add(SolbaseUtil.docIdColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes,
                    Bytes.toBytes(0));
            updateDocKeyIdMap(mapping);

            logger.info("adding document: " + Bytes.toInt(SolbaseUtil.randomize(docId)) + " uniqId: " + uniqId);

        } else {
            if (uniqId == null) {
                logger.info("uniqId is null: " + doc.toString());
            } else if (docId == null) {
                logger.info("docId is null: " + doc.toString());
            } else {
                logger.info("both uniqId and docId are null: " + doc.toString());
            }
        }
    }

    public void updateDocument(Put documentPut, Document doc) {
        String uniqId = doc.get("global_uniq_id");
        Put mappingPut = new Put(Bytes.toBytes(uniqId));
        mappingPut.add(SolbaseUtil.docIdColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes,
                Bytes.toBytes(0));
        updateDocKeyIdMap(mappingPut);

        // for remote server update via solr update, we want to use
        // getDocTable(), but for now map/red can use local htable
        HTableInterface docTable = SolbaseUtil.getDocTable();
        // insert document to doctable
        try {
            documentPut.add(SolbaseUtil.timestampColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes,
                    Bytes.toBytes(0));
            docTable.put(documentPut);
        } catch (IOException e) {
            throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
        } finally {
            SolbaseUtil.releaseTable(docTable);
        }
    }

    public void deleteDocument(Put documentPut) {
        HTableInterface docTable = SolbaseUtil.getDocTable();
        try {
            documentPut.add(SolbaseUtil.timestampColumnFamilyName, SolbaseUtil.tombstonedColumnFamilyQualifierBytes,
                    Bytes.toBytes(1));
            docTable.put(documentPut);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            SolbaseUtil.releaseTable(docTable);
        }
    }

    public void updateDocKeyIdMap(Put docKeyIdPut) {
        // for remote server update via solr update, we want to use
        // getDocTable(), but for now map/red can use local htable
        HTableInterface docKeyIdMap = SolbaseUtil.getDocKeyIdMapTable();
        // insert document to doctable
        try {
            docKeyIdMap.put(docKeyIdPut);
        } catch (IOException e) {
            throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
        } finally {
            SolbaseUtil.releaseTable(docKeyIdMap);
        }
    }

    public void deleteDocKeyIdMap(Put mappingPut) {
        // for remote server update via solr update, we want to use
        // getDocTable(), but for now map/red can use local htable
        HTableInterface mappingTable = SolbaseUtil.getDocKeyIdMapTable();
        // insert document to doctable
        try {
            Delete delete = new Delete(mappingPut.getRow());
            mappingTable.delete(delete);
        } catch (IOException e) {
            throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
        } finally {
            SolbaseUtil.releaseTable(mappingTable);
        }
    }

    public void updateTermVector(TermDocMetadata termDocMeta) throws CorruptIndexException, IOException {
        this.addTermVector(termDocMeta, 0, 0);
        int docNumber = termDocMeta.getDocId();
        logger.info("updating term vector: " + termDocMeta.getTerm().toString() + " docId: " + docNumber);
    }

    public void addTermVector(TermDocMetadata termDocMeta) throws CorruptIndexException, IOException {
        this.addTermVector(termDocMeta, 0, 0);
        int docNumber = termDocMeta.getDocId();
        logger.info("adding term vector: " + termDocMeta.getTerm().toString() + " docId: " + docNumber);
    }

    public void addTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId)
            throws CorruptIndexException, IOException {
        // getting terVector and doc tables
        HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();
        try {

            byte[] key = termDocMeta.getFieldTermKey();
            ByteBuffer buf = termDocMeta.serialize();

            int docNumber = termDocMeta.getDocId();

            Put put = null;

            switch (TermDocMetadataLoader.storageType) {
            case KEY_ONLY: {
                put = new Put(Bytes.add(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)),
                        Bytes.toBytes(buf)));
                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(""));
            }
                break;
            case WIDE_ROW:
                int chunkId = TermDocMetadataLoader.getChunkId(docNumber);
                put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(docNumber), Bytes.toBytes(buf));
                break;
            case NARROW_ROW:
            default: {
                put = new Put(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)));
                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(buf));
            }
            }

            termVectorTable.put(put);
        } catch (Exception e) {
            logger.error("failed to add term vector: " + termDocMeta.getTerm().toString() + " and docId: "
                    + termDocMeta.docId);
        } finally {
            SolbaseUtil.releaseTable(termVectorTable);
        }
    }

    public void updateTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId) {
        // to update, we should first delete existing term doc meta data.
        // getting terVector and doc tables

        try {
            // TODO: what do we do with doc update? just update anyway?
            boolean deleted = deleteTermVector(termDocMeta, startDocId, endDocId, true);
            if (deleted) {
                updateTermVector(termDocMeta);
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    public void updateTermVectorVersionId(TermDocMetadata termDocMeta, int startDocId, int endDocId) {
        HTableInterface versionIdTable = SolbaseUtil.getTermVectorVersionIDTable();

        Term term = termDocMeta.getTerm();
        byte[] fieldTermKey = SolbaseUtil.generateTermKey(term);

        Put updatePut = new Put(Bytes.add(fieldTermKey, Bytes.toBytes(startDocId), Bytes.toBytes(endDocId)));
        if (termDocMeta.versionIdentifier == null) {
            // we havn't loaded this term into cache yet, but need to do update with
            try {
                TermDocMetadataVersionIdentifier versionIdentifier = TermDocMetadataLoader
                        .getStaticVersionIdentifier(term, startDocId, endDocId);
                updatePut.add(SolbaseUtil.timestampColumnFamilyName, Bytes.toBytes(""),
                        Bytes.toBytes(versionIdentifier.getVersionIdentifier()));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else {
            updatePut.add(SolbaseUtil.timestampColumnFamilyName, Bytes.toBytes(""),
                    Bytes.toBytes(termDocMeta.versionIdentifier.getVersionIdentifier()));
        }
        try {
            versionIdTable.put(updatePut);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            SolbaseUtil.releaseTable(versionIdTable);
        }
    }

    public void deleteTermVectorVersionId(TermDocMetadata termDocMeta) {
        HTableInterface versionIdTable = SolbaseUtil.getTermVectorVersionIDTable();

        Term term = termDocMeta.getTerm();
        byte[] fieldTermKey = SolbaseUtil.generateTermKey(term);
        Delete delete = new Delete(fieldTermKey);

        try {
            versionIdTable.delete(delete);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            SolbaseUtil.releaseTable(versionIdTable);
        }
    }

    /**
     * by default it's not going to compare current term vector with what's in tv table
     * 
     * @return boolean - indicating whether term vector's been deleted
     */
    public boolean deleteTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId) {
        return this.deleteTermVector(termDocMeta, startDocId, endDocId, false);
    }

    /**
     *
     * @param termDocMeta - term vector to be deleted
     * @param startDocId
     * @param endDocId
     * @param compare - if true, it will compare new and old term vectors and if same, don't bother deleting term vector
     * @return boolean - indicating whether term vector's been deleted
     */
    public boolean deleteTermVector(TermDocMetadata termDocMeta, int startDocId, int endDocId, boolean compare) {
        // to update, we should first delete existing term doc meta data.
        // getting terVector and doc tables
        HTableInterface termVectorTable = SolbaseUtil.getTermVectorTable();

        ResultScanner fieldScanner = null;

        try {
            byte[] key = termDocMeta.getFieldTermKey();
            int docNumber = termDocMeta.getDocId();

            Delete delete = null;

            switch (TermDocMetadataLoader.storageType) {
            case KEY_ONLY: {
                byte[] termBeginKey = Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber));
                byte[] termEndKey = Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber + 1));

                Scan fieldScan = new Scan(termBeginKey, termEndKey);
                fieldScan.addFamily(SolbaseUtil.termVectorDocColumnFamilyName);
                fieldScanner = termVectorTable.getScanner(fieldScan);

                Result termDoc;

                termDoc = fieldScanner.next();
                fieldScanner.close();

                if (termDoc != null && !termDoc.isEmpty()) {
                    if (compare) {
                        byte[] oldRow = termDoc.getRow();
                        ByteBuffer buf = termDocMeta.serialize();
                        byte[] newRow = Bytes.add(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)),
                                Bytes.toBytes(buf));

                        // if term vector hasn't changed, don't bother deleting
                        if (!ArrayUtils.isEquals(oldRow, newRow)) {
                            delete = new Delete(termDoc.getRow());
                        }
                    } else {
                        delete = new Delete(termDoc.getRow());
                    }
                }
            }
                break;
            case WIDE_ROW:
                int chunkId = TermDocMetadataLoader.getChunkId(docNumber);
                delete = new Delete(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
                break;
            case NARROW_ROW:
            default: {
                delete = new Delete(Bytes.add(key, SolbaseUtil.delimiter, Bytes.toBytes(docNumber)));
            }
            }

            if (delete != null) {
                termVectorTable.delete(delete);
                logger.info("deleting term vector: " + termDocMeta.getTerm().toString() + " docId: " + docNumber);
                return true;
            }
        } catch (IOException e) {
            throw new SolbaseException(SolbaseException.ErrorCode.SERVER_ERROR, e.getMessage());
        } finally {
            if (fieldScanner != null) {
                fieldScanner.close();
            }
            SolbaseUtil.releaseTable(termVectorTable);
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    public ParsedDoc parseDoc(Document doc, Analyzer analyzer, String indexName, int docNumber,
            List<String> sortFieldNames) throws CorruptIndexException, IOException {
        // given doc, what are all of terms we indexed
        List<Term> allIndexedTerms = new ArrayList<Term>();
        Map<String, byte[]> fieldCache = new HashMap<String, byte[]>(1024);

        // need to hold onto TermDocMetaData, so it can return this array
        List<TermDocMetadata> metadatas = new ArrayList<TermDocMetadata>();

        byte[] docId = Bytes.toBytes(docNumber);
        int position = 0;

        for (Fieldable field : (List<Fieldable>) doc.getFields()) {
            // Indexed field
            if (field.isIndexed() && field.isTokenized()) {

                TokenStream tokens = field.tokenStreamValue();

                if (tokens == null) {
                    tokens = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
                }

                // collect term information per field
                Map<Term, Map<ByteBuffer, List<Number>>> allTermInformation = new ConcurrentSkipListMap<Term, Map<ByteBuffer, List<Number>>>();

                int lastOffset = 0;
                if (position > 0) {
                    position += analyzer.getPositionIncrementGap(field.name());
                }

                tokens.reset(); // reset the TokenStream to the first token

                // offsets
                OffsetAttribute offsetAttribute = null;
                if (field.isStoreOffsetWithTermVector())
                    offsetAttribute = (OffsetAttribute) tokens.addAttribute(OffsetAttribute.class);

                // positions
                PositionIncrementAttribute posIncrAttribute = null;
                if (field.isStorePositionWithTermVector())
                    posIncrAttribute = (PositionIncrementAttribute) tokens
                            .addAttribute(PositionIncrementAttribute.class);

                TermAttribute termAttribute = (TermAttribute) tokens.addAttribute(TermAttribute.class);

                // store normalizations of field per term per document
                // rather
                // than per field.
                // this adds more to write but less to read on other side
                Integer tokensInField = new Integer(0);

                while (tokens.incrementToken()) {
                    tokensInField++;
                    Term term = new Term(field.name(), termAttribute.term());

                    allIndexedTerms.add(term);

                    // fetch all collected information for this term
                    Map<ByteBuffer, List<Number>> termInfo = allTermInformation.get(term);

                    if (termInfo == null) {
                        termInfo = new ConcurrentSkipListMap<ByteBuffer, List<Number>>();
                        allTermInformation.put(term, termInfo);
                    }

                    // term frequency
                    List<Number> termFrequency = termInfo.get(TermDocMetadata.termFrequencyKeyBytes);
                    if (termFrequency == null) {
                        termFrequency = new ArrayList<Number>();
                        termFrequency.add(new Integer(0));
                        termInfo.put(TermDocMetadata.termFrequencyKeyBytes, termFrequency);
                    }
                    // increment
                    termFrequency.set(0, termFrequency.get(0).intValue() + 1);

                    // position vector
                    if (field.isStorePositionWithTermVector()) {
                        position += (posIncrAttribute.getPositionIncrement() - 1);

                        List<Number> positionVector = termInfo.get(TermDocMetadata.positionVectorKeyBytes);

                        if (positionVector == null) {
                            positionVector = new ArrayList<Number>();
                            termInfo.put(TermDocMetadata.positionVectorKeyBytes, positionVector);
                        }

                        positionVector.add(++position);
                    }

                    // term offsets
                    if (field.isStoreOffsetWithTermVector()) {

                        List<Number> offsetVector = termInfo.get(TermDocMetadata.offsetVectorKeyBytes);
                        if (offsetVector == null) {
                            offsetVector = new ArrayList<Number>();
                            termInfo.put(TermDocMetadata.offsetVectorKeyBytes, offsetVector);
                        }

                        offsetVector.add(lastOffset + offsetAttribute.startOffset());
                        offsetVector.add(lastOffset + offsetAttribute.endOffset());

                    }

                    List<Number> sortValues = new ArrayList<Number>();
                    // init sortValues
                    for (int i = 0; i < Scorer.numSort; i++) {
                        sortValues.add(new Integer(-1));
                    }

                    int order = 0;

                    // extract sort field value and store it in term doc metadata obj
                    for (String fieldName : sortFieldNames) {
                        Fieldable fieldable = doc.getFieldable(fieldName);

                        if (fieldable instanceof EmbeddedSortField) {
                            EmbeddedSortField sortField = (EmbeddedSortField) fieldable;

                            int value = -1;
                            if (sortField.stringValue() != null) {
                                value = Integer.parseInt(sortField.stringValue());
                            }
                            int sortSlot = sortField.getSortSlot();

                            sortValues.set(sortSlot - 1, new Integer(value));
                        } else {
                            // TODO: this logic is used for real time indexing.
                            // hacky. depending on order of sort field names in array
                            int value = -1;
                            if (fieldable.stringValue() != null) {
                                value = Integer.parseInt(fieldable.stringValue());
                            }
                            sortValues.set(order++, new Integer(value));
                        }
                    }
                    termInfo.put(TermDocMetadata.sortFieldKeyBytes, sortValues);
                }

                List<Number> bnorm = null;
                if (!field.getOmitNorms()) {
                    bnorm = new ArrayList<Number>();
                    float norm = doc.getBoost();
                    norm *= field.getBoost();
                    norm *= similarity.lengthNorm(field.name(), tokensInField);
                    bnorm.add(Similarity.encodeNorm(norm));
                }

                for (Map.Entry<Term, Map<ByteBuffer, List<Number>>> term : allTermInformation.entrySet()) {
                    Term tempTerm = term.getKey();

                    byte[] fieldTermKeyBytes = SolbaseUtil.generateTermKey(tempTerm);

                    // Mix in the norm for this field alongside each term
                    // more writes but faster on read side.
                    if (!field.getOmitNorms()) {
                        term.getValue().put(TermDocMetadata.normsKeyBytes, bnorm);
                    }

                    TermDocMetadata data = new TermDocMetadata(docNumber, term.getValue(), fieldTermKeyBytes,
                            tempTerm);
                    metadatas.add(data);
                }
            }

            // Untokenized fields go in without a termPosition
            if (field.isIndexed() && !field.isTokenized()) {
                Term term = new Term(field.name(), field.stringValue());
                allIndexedTerms.add(term);

                byte[] fieldTermKeyBytes = SolbaseUtil.generateTermKey(term);

                Map<ByteBuffer, List<Number>> termMap = new ConcurrentSkipListMap<ByteBuffer, List<Number>>();
                termMap.put(TermDocMetadata.termFrequencyKeyBytes, Arrays.asList(new Number[] {}));
                termMap.put(TermDocMetadata.positionVectorKeyBytes, Arrays.asList(new Number[] {}));

                TermDocMetadata data = new TermDocMetadata(docNumber, termMap, fieldTermKeyBytes, term);
                metadatas.add(data);
            }

            // Stores each field as a column under this doc key
            if (field.isStored()) {

                byte[] _value = field.isBinary() ? field.getBinaryValue() : Bytes.toBytes(field.stringValue());

                // first byte flags if binary or not
                byte[] value = new byte[_value.length + 1];
                System.arraycopy(_value, 0, value, 0, _value.length);

                value[value.length - 1] = (byte) (field.isBinary() ? Byte.MAX_VALUE : Byte.MIN_VALUE);

                // logic to handle multiple fields w/ same name
                byte[] currentValue = fieldCache.get(field.name());
                if (currentValue == null) {
                    fieldCache.put(field.name(), value);
                } else {

                    // append new data
                    byte[] newValue = new byte[currentValue.length + SolbaseUtil.delimiter.length + value.length
                            - 1];
                    System.arraycopy(currentValue, 0, newValue, 0, currentValue.length - 1);
                    System.arraycopy(SolbaseUtil.delimiter, 0, newValue, currentValue.length - 1,
                            SolbaseUtil.delimiter.length);
                    System.arraycopy(value, 0, newValue, currentValue.length + SolbaseUtil.delimiter.length - 1,
                            value.length);

                    fieldCache.put(field.name(), newValue);
                }
            }
        }

        Put documentPut = new Put(SolbaseUtil.randomize(docNumber));

        // Store each field as a column under this docId
        for (Map.Entry<String, byte[]> field : fieldCache.entrySet()) {
            documentPut.add(Bytes.toBytes("field"), Bytes.toBytes(field.getKey()), field.getValue());
        }

        // in case of real time update, we need to add back docId field
        if (!documentPut.has(Bytes.toBytes("field"), Bytes.toBytes("docId"))) {

            byte[] docIdStr = Bytes.toBytes(new Integer(docNumber).toString());
            // first byte flags if binary or not
            byte[] value = new byte[docIdStr.length + 1];
            System.arraycopy(docIdStr, 0, value, 0, docIdStr.length);

            value[value.length - 1] = (byte) (Byte.MIN_VALUE);
            documentPut.add(Bytes.toBytes("field"), Bytes.toBytes("docId"), value);
        }

        // Finally, Store meta-data so we can delete this document
        documentPut.add(Bytes.toBytes("allTerms"), Bytes.toBytes("allTerms"),
                SolbaseUtil.toBytes(allIndexedTerms).array());

        ParsedDoc parsedDoc = new ParsedDoc(metadatas, doc, documentPut, fieldCache.entrySet(), allIndexedTerms);
        return parsedDoc;

    }

    public void updateDocument(Term updateTerm, Document doc, Analyzer analyzer, int docNumber,
            List<String> sortFieldNames) throws CorruptIndexException, IOException {
        // we treat add/update same
        throw new UnsupportedOperationException();
    }

    public int docCount() {
        throw new RuntimeException("not supported");
    }

    public String getIndexName() {
        return indexName.get();
    }

    public void setIndexName(String indexName) {
        IndexWriter.indexName.set(indexName);
    }

}