org.ozsoft.xantippe.Indexer.java Source code

Java tutorial

Introduction

Here is the source code for org.ozsoft.xantippe.Indexer.java

Source

// This file is part of the Xantippe XML database.
//
// Copyright 2008 Oscar Stigter
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.ozsoft.xantippe;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * Indexer for XML documents.
 * 
 * Implemented with SAX for high performance.
 * 
 * @author Oscar Stigter
 */
/* package */ class Indexer {

    /** Log */
    private static final Log LOG = LogFactory.getLog(Indexer.class);

    /** SAX parser. */
    private final SAXParser parser;

    /** Content handler to extract the index values. */
    private final IndexHandler handler = new IndexHandler();

    /* package */ Indexer() {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);
        try {
            parser = factory.newSAXParser();
        } catch (Exception e) {
            throw new RuntimeException("Error instantiating SAX parser: " + e.getMessage());
        }
    }

    public void index(Document doc, File file, Collection col) {
        if (col.getIndices(true).size() > 0) {
            LOG.debug(String.format("Indexing document '%s'", doc));

            long startTime = System.currentTimeMillis();

            handler.setDocument(doc);
            handler.setCollection(col);

            try {
                parser.parse(file, handler);
            } catch (Exception e) {
                // Should never happen; document has already been validated.
                String msg = String.format("Error while indexing document '%s': %s", doc, e.getMessage());
                LOG.error(msg, e);
            }

            long duration = System.currentTimeMillis() - startTime;
            LOG.debug(String.format("Indexed document in %d ms.", duration));
        }
    }

    private static class IndexHandler extends DefaultHandler {

        private final List<String> elements;

        private Collection collection;

        private Set<Index> indices;

        private Document document;

        private StringBuilder sb;

        public IndexHandler() {
            elements = new ArrayList<String>();
        }

        public void setCollection(Collection col) {
            collection = col;
            indices = col.getIndices(true);
        }

        public void setDocument(Document doc) {
            document = doc;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException {
            elements.add(localName);
            sb = new StringBuilder();
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            String path = getCurrentPath();
            for (Index index : indices) {
                if (path.equals(index.getPath())
                        || (index.getPath().startsWith("//") && path.endsWith(index.getPath().substring(1)))) {
                    String text = sb.toString().trim();
                    if (text.length() != 0) {
                        Object value = getIndexValue(text, index.getType());
                        if (value != null) {
                            collection.addIndexValue(index.getName(), value, document.getId());
                        }
                    }
                }
            }
            elements.remove(elements.size() - 1);
            sb = new StringBuilder();
        }

        @Override
        public void characters(char[] buffer, int offset, int length) throws SAXException {
            sb.append(buffer, offset, length);
        }

        private String getCurrentPath() {
            StringBuilder sb2 = new StringBuilder();
            for (String element : elements) {
                sb2.append('/');
                sb2.append(element);
            }
            return sb2.toString();
        }

        private Object getIndexValue(String text, IndexType type) {
            Object value = null;

            switch (type) {
            case STRING:
                value = text;
                break;
            case INTEGER:
                try {
                    value = Integer.parseInt(text);
                } catch (Exception e) {
                    // Ignore.
                }
                break;
            case LONG:
                try {
                    value = Long.parseLong(text);
                } catch (Exception e) {
                    // Ignore.
                }
                break;
            case DOUBLE:
                try {
                    value = Double.parseDouble(text);
                } catch (Exception e) {
                    // Ignore.
                }
                break;
            case FLOAT:
                try {
                    value = Float.parseFloat(text);
                } catch (Exception e) {
                    // Ignore.
                }
                break;
            case DATE:
                //TODO: Parse Date index value
                break;
            default:
                // Invalid index type; ignore.
            }

            return value;
        }

    } // IndexHandler

}