com.useekm.indexing.postgis.IndexedStatement.java Source code

Java tutorial

Introduction

Here is the source code for com.useekm.indexing.postgis.IndexedStatement.java

Source

/*
 * Copyright 2010 by TalkingTrends (Amsterdam, The Netherlands)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://opensahara.com/licenses/apache-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package com.useekm.indexing.postgis;

import info.aduna.collections.iterators.CloseableIterator;
import info.aduna.collections.iterators.EmptyIterator;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;
import java.util.Locale;
import java.util.NoSuchElementException;

import org.apache.commons.lang.Validate;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.XMLSchema;
import org.postgis.PGgeometry;
import org.postgis.binary.BinaryParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.useekm.indexing.exception.IndexException;

import edu.ncsa.sstde.indexing.IndexingSail;
import edu.ncsa.sstde.indexing.IndexingSailConnection;
import edu.ncsa.sstde.indexing.postgis.PostgisIndexerSettings;

/**
 * An IndexedStatement is the indexed version of a statement/triple in the
 * {@link IndexingSailConnection}, and is stored in a Postgres database with
 * indexes designed for faster lookup or added search functionality.
 * <p>
 * IndexedStatement has support for geospatial (by using a Postgres based RTree)
 * and free text search (by using a Postgres based inverted index). Whether a
 * triple is stored in the Triple Store, in the Postgres database, or in both is
 * decided by the settings of the {@link IndexingSailConnection}.
 * 
 * @see IndexingSailConnection
 * @see IndexingSail
 * @see PostgisIndexerSettings
 */
public class IndexedStatement {
    private static final Logger LOG = LoggerFactory.getLogger(IndexedStatement.class);

    private static final String EQQ = "=?";
    private static final String EQMD5 = "=md5(?)";
    private static final String WHERE = " WHERE ";
    private static final String DELETE_FROM_STS = "DELETE FROM ";
    private static final String AND = "AND ";
    static final String SUBJECT = "subject";
    public static final String PREDICATE = "predicate";
    static final String OBJECT_URI = "objectUri";
    static final String OBJECT_STRING = "objectString";
    static final String OBJECT_TYPE = "objectType";
    public static final String OBJECT_LANGUAGE = "objectLanguage";
    static final String OBJECT_DATE = "objectDate";
    static final String OBJECT_SPATIAL = "objectSpatial";
    static final String OBJECT_TS_VECTOR_CONFIG = "objectTsVectorConfig";
    static final String OBJECT_TS_VECTOR = "objectTsVector";
    static final String NULL = "$NULL$";
    private static final int MAX_SELECT_LEN = 250; // Maximum length for
    // standard queries (initial
    // memory for StringBuffer)
    private static final String INSERT_INTO = "INSERT INTO ";
    private static final String INSERT_VALUES = "(objectDate,objectLanguage,objectSpatial,objectString,objectTsVectorConfig,objectType,objectUri,predicate,subject)"
            + "VALUES(?,?,?,?,?,?,?,?,?)";
    private static final String SELECT = "SELECT objectDate,objectLanguage,objectSpatial,objectString,objectTsVectorConfig,objectType,objectUri,predicate,subject "
            + "FROM ";

    private static final EmptyIterator<IndexedStatement> EMPTY = new EmptyIterator<IndexedStatement>();

    private String subject;
    String predicate;
    private boolean objectUri;
    private String objectString;
    private Date objectDate;
    private String objectType;
    String objectLanguage;
    private PGgeometry objectSpatial;
    private String objectTsVectorConfig;

    protected IndexedStatement() {
    }

    IndexedStatement(Resource subject, URI predicate, Value object, String tsVectorConfig) throws IndexException {
        Validate.notNull(predicate);
        // Validate.isTrue(subject instanceof URI);
        Validate.isTrue(object instanceof Literal || object instanceof URI);
        this.subject = subject.stringValue();
        this.predicate = predicate.stringValue();
        this.objectUri = object instanceof URI;
        if (this.objectUri)
            initUriObject(object);
        else
            initLiteralObject((Literal) object, tsVectorConfig);
    }

    private void initUriObject(Value object) {
        this.objectType = NULL;
        this.objectLanguage = NULL;
        this.objectString = object.stringValue();
    }

    private void initLiteralObject(Literal object, String tsVectorConfig) throws IndexException {
        this.objectType = getDataTypeAsString(object);
        this.objectLanguage = normalizeLang(object.getLanguage());
        this.objectString = object.stringValue(); // with spatial data we still
        // need the literal string,
        // because the triple-store will consider "POINT(1, 2)" and
        // "POINT (1,2)" as different,
        // therefore the index should too...

        // Can't have both a type an a language in RDF:
        Validate.isTrue(NULL.equals(this.objectType) || NULL.equals(this.objectLanguage));
        this.objectSpatial = asGeometry(object, false);
        if (XMLSchema.DATETIME.stringValue().equals(this.objectType)
                || XMLSchema.DATE.stringValue().equals(this.objectType))
            this.objectDate = (object).calendarValue().toGregorianCalendar().getTime();
        this.objectTsVectorConfig = tsVectorConfig;
    }

    /**
     * Returns the subject of this indexed statement.
     */
    public Resource getSubject(ValueFactory vf) {
        return vf.createURI(subject);
    }

    /**
     * Returns the predicate of this indexed statement.
     */
    public URI getPredicate(ValueFactory vf) {
        return vf.createURI(predicate);
    }

    /**
     * Returns the value of this indexed statement.
     */
    public Value getObject(ValueFactory vf) {
        if (objectUri)
            return vf.createURI(objectString);
        return createLiteral(vf);
    }

    private Value createLiteral(ValueFactory vf) {
        if (NULL.equals(objectLanguage)) {
            URI dataType = NULL.equals(objectType) ? null : vf.createURI(objectType);
            return vf.createLiteral(objectString, dataType);
        }
        return vf.createLiteral(objectString, objectLanguage);
    }

    static String getDataTypeAsString(Value object) {
        URI objectType = ((Literal) object).getDatatype();
        return objectType == null ? NULL : objectType.stringValue();
    }

    static String normalizeLang(String language) {
        return language == null ? NULL : language.toLowerCase(Locale.ROOT);
    }

    public static PGgeometry asGeometry(Literal literal, boolean acceptNoType) throws IndexException {
        PGgeometry result = null;
        //      URI type = literal.getDatatype();
        //      String typeString = type.stringValue();
        // if (GeoConstants.XMLSCHEMA_SPATIAL_BIN.equals(type))
        // result =
        // asGeometry(Base64.decodeBase64(literal.stringValue().getBytes()));
        // else if (GeoConstants.XMLSCHEMA_SPATIAL_BINGZ.equals(type))
        // result =
        // asGeometry(AbstractGeo.gunzip(Base64.decodeBase64(literal.stringValue().getBytes())));
        // else if (GeoConstants.XMLSCHEMA_SPATIAL_TEXT.equals(type) || (type ==
        // null && acceptNoType))
        // result = asGeometry(literal.stringValue());
        // else if (GeoConstants.XMLSCHEMA_SPATIAL_TEXTGZ.equals(type))
        // result = asGeometry(new
        // String(AbstractGeo.gunzip(Base64.decodeBase64(literal.stringValue().getBytes()))));

        result = asGeometry(literal.stringValue());

        return result;
    }

    static PGgeometry asGeometry(String value) throws IndexException {
        try {
            PGgeometry result = new PGgeometry(value.toUpperCase());
            //         org.postgis.Geometry geometry = result.getGeometry();
            // if (geometry.srid == -1)
            // geometry.srid = PostgisIndexerSettings.DEFAULT_SRID;
            return result;
        } catch (SQLException e) {
            throw new IndexException(e);
        }
    }

    static PGgeometry asGeometry(byte[] value) {
        org.postgis.Geometry geometry = new BinaryParser().parse(value);
        if (geometry.srid == -1)
            geometry.srid = PostgisIndexerSettings.DEFAULT_SRID;
        return new PGgeometry(geometry);
    }

    /**
     * Creates and stores an indexed version of the provided statement.
     * 
     * @param subject
     *            The subject, BNode subjects are not allowed.
     * @param predicate
     *            The predicate
     * @param object
     *            The object/value
     * 
     * @throws IllegalArgumentException
     *             If one of the arguments equals null or if one of the
     *             arguments is a {@link BNode}.
     * @throws IndexException
     * @Throws SQLException
     */
    static void add(Connection conn, String table, Resource subject, URI predicate, Value object,
            String tsVectorConfig) throws IndexException, SQLException {
        add(conn, table, new IndexedStatement(subject, predicate, object, tsVectorConfig));
    }

    /**
     * Creates and stores an indexed version of the provided statement. Only use
     * this function if it is safe to assume that the statement is not yet in
     * the database. This method is faster than
     * {@link #addIfNew(Connection, Resource, URI, Value, String)}, but does not
     * check for duplicates.
     * 
     * @param subject
     *            The subject, BNode subjects are not allowed.
     * @param predicate
     *            The predicate
     * @param object
     *            The object/value
     * 
     * @return The IndexedStatement to add to the index.
     * 
     * @throws IllegalArgumentException
     *             If one of the arguments equals null or if one of the
     *             arguments is a {@link BNode}.
     * @throws IndexException
     * @Throws SQLException
     */
    static void add(Connection conn, String table, IndexedStatement indexedStatement) throws SQLException {
        PreparedStatement stat = addPrepare(conn, table);
        try {
            addNewBatch(stat, indexedStatement);
            int count = stat.executeUpdate();
            Validate.isTrue(count == 1);
        } finally {
            stat.close();
        }
    }

    public static PreparedStatement addPrepare(Connection conn, String tableName) throws SQLException {
        return conn.prepareStatement(INSERT_INTO + tableName + INSERT_VALUES);
    }

    public static void addNewBatch(PreparedStatement stat, IndexedStatement indexedStatement) throws SQLException {
        int idx = 1;
        if (indexedStatement.objectDate != null)
            stat.setDate(idx++, new java.sql.Date(indexedStatement.objectDate.getTime()));
        else
            stat.setDate(idx++, null);
        stat.setString(idx++, indexedStatement.objectLanguage);
        stat.setObject(idx++, indexedStatement.objectSpatial);
        stat.setString(idx++, indexedStatement.objectString);
        stat.setString(idx++, indexedStatement.objectTsVectorConfig);
        stat.setString(idx++, indexedStatement.objectType);
        stat.setBoolean(idx++, indexedStatement.objectUri);
        stat.setString(idx++, indexedStatement.predicate);
        stat.setString(idx++, indexedStatement.subject);
        stat.addBatch();
    }

    /**
     * Search for a specific statement.
     * 
     * @return Null if that statement does not exists in the index, the first
     *         found {@link IndexedStatement} if it does exist. Note that there
     *         might be duplicated statements in the database, the uniqueness is
     *         therefore not validated.
     */
    static IndexedStatement findUnique(Connection conn, String table, Resource subject, URI predicate, Value object)
            throws SQLException, IndexException {
        validateNotNull(conn, subject, predicate, object);
        CloseableIterator<IndexedStatement> results = find(conn, table, subject, predicate, object);
        try {
            if (!results.hasNext())
                return null;
            return results.next();
        } finally {
            results.close();
        }
    }

    /**
     * Search for indexed statements.
     * 
     * @return A closable iterator to the found results.
     */
    static CloseableIterator<IndexedStatement> find(Connection conn, String table, Resource subject, URI predicate,
            Value object) throws SQLException, IndexException {
        if (subject instanceof BNode || object instanceof BNode)
            // BNodes are not indexed, so there is no result:
            return EMPTY;
        String sql = createQuery(SELECT + table, subject, predicate, object);
        StatementIterator result = null;
        PreparedStatement stat = conn.prepareStatement(sql);
        try {
            addBindings(stat, subject, predicate, object);
            result = new StatementIterator(stat);
        } finally {
            if (result == null) // else StatementIterator should close the stat
                stat.close();
        }
        return result;
    }

    /**
     * Search for a statements with a given text predicate. For testing purposes
     * only.
     * 
     * @param vectorConfig
     *            Name of the configuration to use for building the query.
     * @param query
     *            The search query, e.g. "apples & oranges".
     * @return An closeable iteration to the resulting {@link IndexedStatement}
     *         s.
     */
    static CloseableIterator<IndexedStatement> search(Connection conn, String table, String vectorConfig,
            String query) throws SQLException, IndexException {
        Validate.notEmpty(query);
        validateConfig(vectorConfig);
        StringBuffer sql = new StringBuffer(MAX_SELECT_LEN).append(SELECT + table).append(WHERE);
        sql.append(OBJECT_TS_VECTOR);
        sql.append("@@to_tsquery('").append(vectorConfig).append("',?)");
        PreparedStatement stat = conn.prepareStatement(sql.toString());
        StatementIterator result = null;
        try {
            stat.setString(1, query);
            result = new StatementIterator(stat);
        } finally {
            if (result == null) // else StatementIterator should close the stat
                stat.close();
        }
        return result;
    }

    /**
     * Deletes matching indexed statements.
     * 
     * @param subject
     *            The subject, BNode subjects are ignored.
     * @param predicate
     *            The predicate
     * @param object
     *            The object/value
     * 
     * @return The number of matching (hence deleted) statements.
     * 
     * @throws IndexException
     * @Throws SQLException
     */
    static int delete(Connection conn, String table, Resource subject, URI predicate, Value object)
            throws SQLException {
        if (subject instanceof BNode || object instanceof BNode)
            return 0; // // BNodes are not indexed, so there is nothing to
                      // delete
        String sql = createQuery(DELETE_FROM_STS + table, subject, predicate, object);
        PreparedStatement stat = conn.prepareStatement(sql);
        try {
            addBindings(stat, subject, predicate, object);
            return stat.executeUpdate();
        } finally {
            stat.close();
        }
    }

    /**
     * Since the configuration of text searches is inlined in the query (because
     * we can not bind a variable of type regconfig) this method checks that the
     * config is valid. This prevents SQL-injection security issues.
     * 
     * @param config
     *            The text search configuration
     */
    protected static void validateConfig(String config) {
        Validate.isTrue(config != null && config.matches("[a-zA-Z0-9_]+"),
                "Textsearch config should match regex: [a-zA-Z0-9_]+");
    }

    private static void addBindings(PreparedStatement stat, Resource subject, URI predicate, Value object)
            throws SQLException {
        int idx = 1;
        if (subject != null)
            stat.setString(idx++, subject.stringValue());
        if (predicate != null)
            stat.setString(idx++, predicate.stringValue());
        if (object != null) {
            stat.setString(idx++, object.stringValue());
            stat.setString(idx++, object.stringValue());
            if (object instanceof URI)
                stat.setBoolean(idx++, true);
            else {
                stat.setBoolean(idx++, false);
                stat.setString(idx++, getDataTypeAsString(object));
                stat.setString(idx++, normalizeLang(((Literal) object).getLanguage()));
            }
        }
    }

    private static String createQuery(String base, Resource subject, URI predicate, Value object) {
        StringBuffer sql = new StringBuffer(MAX_SELECT_LEN).append(base);
        if (subject != null || predicate != null || object != null)
            sql.append(WHERE);
        int startLen = sql.length();
        if (subject != null)
            append(sql, startLen, SUBJECT, EQQ);
        if (predicate != null)
            append(sql, startLen, PREDICATE, EQQ);
        if (object != null)
            createObjectEqualClauses(sql, startLen, object);
        return sql.toString();
    }

    private static void createObjectEqualClauses(StringBuffer sql, int startLen, Value object) {
        append(sql, startLen, "md5(" + OBJECT_STRING + ")", EQMD5);
        append(sql, startLen, OBJECT_STRING, EQQ);
        append(sql, startLen, OBJECT_URI, EQQ);
        if (!(object instanceof URI)) {
            append(sql, startLen, OBJECT_TYPE, EQQ);
            append(sql, startLen, OBJECT_LANGUAGE, EQQ);
        }
    }

    private static void append(StringBuffer sql, int startLen, String... values) {
        if (sql.length() > startLen)
            sql.append(AND);
        for (int i = 0; i != values.length; ++i)
            sql.append(values[i]);
    }

    private static IndexedStatement convert(ResultSet results) throws SQLException {
        IndexedStatement result = new IndexedStatement();
        int idx = 1;
        result.objectDate = results.getDate(idx++);
        result.objectLanguage = results.getString(idx++);
        result.objectSpatial = (PGgeometry) results.getObject(idx++);
        result.objectString = results.getString(idx++);
        result.objectTsVectorConfig = results.getString(idx++);
        result.objectType = results.getString(idx++);
        result.objectUri = results.getBoolean(idx++);
        result.predicate = results.getString(idx++);
        result.subject = results.getString(idx++);
        return result;
    }

    private static void validateNotNull(Connection conn, Resource subject, URI predicate, Value object) {
        Validate.isTrue(conn != null && subject != null && predicate != null && object != null);
    }

    /**
     * Close, but ignore null input and catch and log resulting
     * {@link SQLException}s.
     */
    public static void closeQuietly(Connection conn) {
        try {
            if (conn != null)
                conn.close();
        } catch (SQLException e) {
            LOG.error("could not close connection", e);
        }
    }

    /**
     * Close, but ignore null input and catch and log resulting
     * {@link SQLException}s.
     */
    public static void closeQuietly(PreparedStatement stat) {
        try {
            if (stat != null) // when preparedstatements are pooled, we can't
                              // check for isClosed()
                stat.close();
        } catch (SQLException e) {
            LOG.error("could not close jdbc statement", e);
        }
    }

    /**
     * Close, but ignore null input and catch and log resulting
     * {@link SQLException}s.
     */
    public static void closeQuietly(ResultSet results) {
        try {
            if (results != null) // can't check isClosed when using dbcp
                                 // connection pooling
                results.close();
        } catch (SQLException e) {
            LOG.error("could not close jdbc resultset", e);
        }
    }

    static class StatementIterator implements CloseableIterator<IndexedStatement> {
        private ResultSet results;
        private PreparedStatement stat;
        private boolean knowHasNext;
        private boolean hasNext;
        private boolean closed;

        public StatementIterator(PreparedStatement stat) throws IndexException {
            this.stat = stat;
            try {
                this.results = stat.executeQuery();
            } catch (SQLException e) {
                closeQuietly(stat);
                throw new IndexException(e);
            }
        }

        @Override
        public boolean hasNext() throws IndexException {
            if (closed)
                return false;
            if (!knowHasNext) {
                try {
                    hasNext = results.next();
                    knowHasNext = true;
                } catch (SQLException e) {
                    throw new IndexException(e);
                }
            }
            return hasNext;
        }

        @Override
        public IndexedStatement next() {
            if (hasNext()) {
                knowHasNext = false;
                try {
                    return convert(results);
                } catch (SQLException e) {
                    throw new IndexException(e);
                }
            }
            throw new NoSuchElementException();
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("remove by StatementIterator not allowed");
        }

        @Override
        public void close() {
            closed = true;
            try {
                try {
                    results.close();
                } finally {
                    stat.close();
                }
            } catch (SQLException e) {
                throw new IndexException(e);
            }
        }
    }

    public static String escapedString(String string) {
        return '\'' + string.replace("'", "''") + '\'';
    }
}