org.paxle.se.index.lucene.impl.Converter.java Source code

Java tutorial

Introduction

Here is the source code for org.paxle.se.index.lucene.impl.Converter.java

Source

/**
 * This file is part of the Paxle project.
 * Visit http://www.paxle.net for more information.
 * Copyright 2007-2010 the original author or authors.
 *
 * Licensed under the terms of the Common Public License 1.0 ("CPL 1.0").
 * Any use, reproduction or distribution of this program constitutes the recipient's acceptance of this agreement.
 * The full license text is available under http://www.opensource.org/licenses/cpl1.0.txt
 * or in the file LICENSE.txt in the root directory of the Paxle distribution.
 *
 * Unless required by applicable law or agreed to in writing, this software is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package org.paxle.se.index.lucene.impl;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.text.ParseException;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.paxle.core.doc.IIndexerDocument;
import org.paxle.se.index.IFieldManager;

public class Converter {

    private static final Log logger = LogFactory.getLog(Converter.class);

    // package private
    static IFieldManager fieldManager = null;

    private final PaxleAnalyzer pa;

    public Converter(final PaxleAnalyzer pa) {
        this.pa = pa;
    }

    public PaxleAnalyzer getAnalyzer() {
        return pa;
    }

    public int getCountersAccumulated() {
        int total = 0;

        final Map<String, TokenCounter> counters = this.pa.getTokenCounters();
        if (counters != null) {
            for (TokenCounter counter : counters.values()) {
                total += counter.tokenCount;
            }
        }

        return total;
    }

    public void resetCounters() {
        this.pa.resetTokenCounters();
    }

    public Document iindexerDoc2LuceneDoc(IIndexerDocument document) {
        final Document doc = new Document();
        for (final Map.Entry<org.paxle.core.doc.Field<?>, Object> entry : document) {
            org.paxle.core.doc.Field<?> key = entry.getKey();
            Fieldable field = any2field(key, entry.getValue());
            if (field == null) {
                logger.error("Found null-field: " + entry.getKey() + " / " + entry.getValue());
            } else {
                doc.add(field);
            }
        }
        return doc;
    }

    public Fieldable any2field(org.paxle.core.doc.Field<?> field, Object data) {
        try {
            if (String.class.isAssignableFrom(field.getType())) {
                return string2field(field, (String) data);

            } else if (Reader.class.isAssignableFrom(field.getType())) {
                return reader2field(field, (Reader) data);

            } else if (File.class.isAssignableFrom(field.getType())) {
                return file2field(field, (File) data);

            } else if (Date.class.isAssignableFrom(field.getType())) {
                return date2field(field, (Date) data);

            } else if (Number.class.isAssignableFrom(field.getType())) {
                return number2field(field, (Number) data);

            } else if (byte[].class.isAssignableFrom(field.getType()) && field.isSavePlain()) {
                return byteArray2field(field, (byte[]) data);

            } else if (field.getType().isArray()) {
                return array2field(field, (Object[]) data);
                /* double?
                } else if (File.class.isAssignableFrom(field.getType())) {
                try {
                   return reader2field(field, new FileReader((File)data));
                } catch (FileNotFoundException e) {
                   // TODO what to do in this situation?
                   logger.error("Backing file for field " + field + " not found!", e);
                   return null;
                }*/
            } else {
                // TODO
                return null;
            }
        } catch (FileNotFoundException e) {
            logger.error("File '" + data + "' not found for field '" + field + "'");
            return null;
        } catch (IOException e) {
            logger.error("I/O exception during conversion of field '" + field + "'");
            return null;
        }
    }

    private static Fieldable string2field(org.paxle.core.doc.Field<?> field, String data) {
        /* ===========================================================
         * Strings
         * - may be stored (if so, then compressed)
         * - may be indexed (for tokenization see LuceneWriter.write(IIndexerDocument))
         * - position term vectors
         * =========================================================== */
        return new Field(field.getName(), data, store(field, true), index(field), termVector(field, TV_POSITIONS));
    }

    private Fieldable array2field(org.paxle.core.doc.Field<?> field, Object[] data) {
        /* ===========================================================
         * Arrays
         * - is not stored
         * - is indexed (for tokenization see LuceneWriter.write(IIndexerDocument))
         * - no term vectors
         * =========================================================== */
        final ArrayTokenStream ats = new ArrayTokenStream(data);
        return new Field(field.getName(), pa.wrapDefaultFilters(field.getName(), ats));
    }

    private static Fieldable number2field(org.paxle.core.doc.Field<?> field, Number data) {
        /* ===========================================================
         * Numbers
         * - may be stored (if so, not compressed)
         * - may be indexed (no tokenization)
         * - no term vectors
         * =========================================================== */
        final long num;
        if (data instanceof Double) {
            num = Double.doubleToLongBits(data.doubleValue());
        } else if (data instanceof Float) {
            num = Float.floatToIntBits(data.floatValue());
        } else {
            num = data.longValue();
        }

        if (field.isIndex()) {
            return new Field(field.getName(), PaxleNumberTools.longToString(num), store(field, false),
                    index(field));
        } else {
            return new Field(field.getName(), PaxleNumberTools.toBytes(num), store(field, false));
        }
    }

    private static Fieldable date2field(org.paxle.core.doc.Field<?> field, Date data) {
        /* ===========================================================
         * Dates
         * - may be stored (if so, then not compressed)
         * - may be indexed (no tokenization)
         * - no term vectors
         * =========================================================== */
        return new Field(field.getName(), DateTools.dateToString(data, DateTools.Resolution.MILLISECOND),
                store(field, false), index(field));
    }

    private Fieldable reader2field(org.paxle.core.doc.Field<?> field, Reader data) throws IOException {
        /* ===========================================================
         * Readers
         * - not stored
         * - indexed (for tokenization see LuceneWriter.write(IIndexerDocument))
         * - position term vectors
         * =========================================================== */
        final Field luceneField = new Field(field.getName(), data, termVector(field, TV_POSITIONS));
        return luceneField;
    }

    private Fieldable file2field(org.paxle.core.doc.Field<?> field, File data) throws IOException {
        /* ===========================================================
         * Files
         * - not stored
         * - indexed (for tokenization see LuceneWriter.write(IIndexerDocument))
         * - position term vectors
         * =========================================================== */
        return reader2field(field, new FileReader(data));
    }

    private static Fieldable byteArray2field(org.paxle.core.doc.Field<?> field, byte[] data) {
        /* ===========================================================
         * byte[]-arrays
         * - is stored (not compressed)
         * - is not indexed
         * =========================================================== */
        return new Field(field.getName(), data, store(field, false));
    }

    private static Field.Index index(org.paxle.core.doc.Field<?> field) {
        return ((field.isIndex()) ? (field.isTokenize()) ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED
                : Field.Index.NO);
    }

    private static Field.Store store(org.paxle.core.doc.Field<?> field, boolean compress) {
        return (field.isSavePlain()) ? (compress) ? Field.Store.COMPRESS : Field.Store.YES : Field.Store.NO;
    }

    private static final int TV_POSITIONS = 1;
    private static final int TV_OFFSETS = 2;

    private static Field.TermVector termVector(org.paxle.core.doc.Field<?> field, int options) {
        if (!field.isIndex())
            return Field.TermVector.NO;

        switch (options) {
        case 0:
            return Field.TermVector.NO;
        case TV_POSITIONS:
            return Field.TermVector.WITH_POSITIONS;
        case TV_OFFSETS:
            return Field.TermVector.WITH_OFFSETS;
        case (TV_OFFSETS | TV_POSITIONS):
            return Field.TermVector.WITH_POSITIONS_OFFSETS;
        default:
            throw new IllegalArgumentException(
                    "illegal value for options " + options + " during processing field '" + field + "'");
        }
    }

    /* ========================================================================== */
    /* ========================================================================== */
    /* ========================================================================== */

    @SuppressWarnings("unchecked")
    public static void luceneDoc2IIndexerDoc(final Document ldoc, final IIndexerDocument idoc)
            throws ParseException, IOException {
        final Iterator<?> it = ldoc.getFields().iterator();
        while (it.hasNext()) {
            final Fieldable field = (Fieldable) it.next();
            if (!field.isStored())
                continue;

            final org.paxle.core.doc.Field pfield = fieldManager.get(field.name());
            if (pfield != null) {
                idoc.set(pfield, field2any(field, pfield));
            }
        }
    }

    public static <E extends Serializable> E field2any(Fieldable lfield, org.paxle.core.doc.Field<E> pfield)
            throws ParseException, IOException {
        if (String.class.isAssignableFrom(pfield.getType())) {
            return pfield.getType().cast(field2string(lfield, pfield));

        } else if (Reader.class.isAssignableFrom(pfield.getType())) {
            return pfield.getType().cast(field2reader(lfield, pfield));

        } else if (File.class.isAssignableFrom(pfield.getType())) {
            return pfield.getType().cast(field2file(lfield, pfield));

        } else if (Date.class.isAssignableFrom(pfield.getType())) {
            return pfield.getType().cast(field2date(lfield, pfield));

        } else if (Number.class.isAssignableFrom(pfield.getType())) {
            return pfield.getType().cast(field2number(lfield, pfield));

        } else if (byte[].class.isAssignableFrom(pfield.getType()) && pfield.isSavePlain()) {
            return pfield.getType().cast(field2byteArray(lfield, pfield));

        } else if (pfield.getType().isArray()) {
            return pfield.getType().cast(field2array(lfield, pfield));

        } else {
            // TODO
            return null;
        }
    }

    private static String field2string(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) {
        return lfield.stringValue();
    }

    private static Reader field2reader(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) {
        return lfield.readerValue();
    }

    private static File field2file(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) throws IOException {
        // TODO: create temp file, copy reader value into and return it
        return null;
    }

    private static Date field2date(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) throws ParseException {
        return DateTools.stringToDate(lfield.stringValue());
    }

    private static Number field2number(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) {
        final long num;
        if (pfield.isIndex()) {
            num = PaxleNumberTools.stringToLong(lfield.stringValue());
        } else {
            num = PaxleNumberTools.toLong(lfield.binaryValue());
        }

        if (Double.class.isAssignableFrom(pfield.getType())) {
            return Double.valueOf(Double.longBitsToDouble(num));
        } else if (Float.class.isAssignableFrom(pfield.getType())) {
            return Float.valueOf(Float.intBitsToFloat((int) num));
        } else {
            return Long.valueOf(num);
        }
    }

    private static byte[] field2byteArray(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) {
        return lfield.binaryValue();
    }

    private static Object[] field2array(Fieldable lfield, org.paxle.core.doc.Field<?> pfield) throws IOException {
        final LinkedList<Object> r = new LinkedList<Object>();
        final TokenStream ts = lfield.tokenStreamValue();
        Token token = new Token();
        while ((token = ts.next(token)) != null)
            r.add(String.valueOf(token.termBuffer(), 0, token.termLength()));
        return r.toArray(new Object[r.size()]);
    }
}