Java tutorial
/* * Copyright 2010 by TalkingTrends (Amsterdam, The Netherlands) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://opensahara.com/licenses/apache-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.useekm.indexing.postgis; import info.aduna.collections.iterators.CloseableIterator; import info.aduna.collections.iterators.EmptyIterator; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Date; import java.util.Locale; import java.util.NoSuchElementException; import org.apache.commons.lang.Validate; import org.openrdf.model.BNode; import org.openrdf.model.Literal; import org.openrdf.model.Resource; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.ValueFactory; import org.openrdf.model.vocabulary.XMLSchema; import org.postgis.PGgeometry; import org.postgis.binary.BinaryParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.useekm.indexing.exception.IndexException; import edu.ncsa.sstde.indexing.IndexingSail; import edu.ncsa.sstde.indexing.IndexingSailConnection; import edu.ncsa.sstde.indexing.postgis.PostgisIndexerSettings; /** * An IndexedStatement is the indexed version of a statement/triple in the * {@link IndexingSailConnection}, and is stored in a Postgres database with * indexes designed for faster lookup or added search functionality. * <p> * IndexedStatement has support for geospatial (by using a Postgres based RTree) * and free text search (by using a Postgres based inverted index). Whether a * triple is stored in the Triple Store, in the Postgres database, or in both is * decided by the settings of the {@link IndexingSailConnection}. * * @see IndexingSailConnection * @see IndexingSail * @see PostgisIndexerSettings */ public class IndexedStatement { private static final Logger LOG = LoggerFactory.getLogger(IndexedStatement.class); private static final String EQQ = "=?"; private static final String EQMD5 = "=md5(?)"; private static final String WHERE = " WHERE "; private static final String DELETE_FROM_STS = "DELETE FROM "; private static final String AND = "AND "; static final String SUBJECT = "subject"; public static final String PREDICATE = "predicate"; static final String OBJECT_URI = "objectUri"; static final String OBJECT_STRING = "objectString"; static final String OBJECT_TYPE = "objectType"; public static final String OBJECT_LANGUAGE = "objectLanguage"; static final String OBJECT_DATE = "objectDate"; static final String OBJECT_SPATIAL = "objectSpatial"; static final String OBJECT_TS_VECTOR_CONFIG = "objectTsVectorConfig"; static final String OBJECT_TS_VECTOR = "objectTsVector"; static final String NULL = "$NULL$"; private static final int MAX_SELECT_LEN = 250; // Maximum length for // standard queries (initial // memory for StringBuffer) private static final String INSERT_INTO = "INSERT INTO "; private static final String INSERT_VALUES = "(objectDate,objectLanguage,objectSpatial,objectString,objectTsVectorConfig,objectType,objectUri,predicate,subject)" + "VALUES(?,?,?,?,?,?,?,?,?)"; private static final String SELECT = "SELECT objectDate,objectLanguage,objectSpatial,objectString,objectTsVectorConfig,objectType,objectUri,predicate,subject " + "FROM "; private static final EmptyIterator<IndexedStatement> EMPTY = new EmptyIterator<IndexedStatement>(); private String subject; String predicate; private boolean objectUri; private String objectString; private Date objectDate; private String objectType; String objectLanguage; private PGgeometry objectSpatial; private String objectTsVectorConfig; protected IndexedStatement() { } IndexedStatement(Resource subject, URI predicate, Value object, String tsVectorConfig) throws IndexException { Validate.notNull(predicate); // Validate.isTrue(subject instanceof URI); Validate.isTrue(object instanceof Literal || object instanceof URI); this.subject = subject.stringValue(); this.predicate = predicate.stringValue(); this.objectUri = object instanceof URI; if (this.objectUri) initUriObject(object); else initLiteralObject((Literal) object, tsVectorConfig); } private void initUriObject(Value object) { this.objectType = NULL; this.objectLanguage = NULL; this.objectString = object.stringValue(); } private void initLiteralObject(Literal object, String tsVectorConfig) throws IndexException { this.objectType = getDataTypeAsString(object); this.objectLanguage = normalizeLang(object.getLanguage()); this.objectString = object.stringValue(); // with spatial data we still // need the literal string, // because the triple-store will consider "POINT(1, 2)" and // "POINT (1,2)" as different, // therefore the index should too... // Can't have both a type an a language in RDF: Validate.isTrue(NULL.equals(this.objectType) || NULL.equals(this.objectLanguage)); this.objectSpatial = asGeometry(object, false); if (XMLSchema.DATETIME.stringValue().equals(this.objectType) || XMLSchema.DATE.stringValue().equals(this.objectType)) this.objectDate = (object).calendarValue().toGregorianCalendar().getTime(); this.objectTsVectorConfig = tsVectorConfig; } /** * Returns the subject of this indexed statement. */ public Resource getSubject(ValueFactory vf) { return vf.createURI(subject); } /** * Returns the predicate of this indexed statement. */ public URI getPredicate(ValueFactory vf) { return vf.createURI(predicate); } /** * Returns the value of this indexed statement. */ public Value getObject(ValueFactory vf) { if (objectUri) return vf.createURI(objectString); return createLiteral(vf); } private Value createLiteral(ValueFactory vf) { if (NULL.equals(objectLanguage)) { URI dataType = NULL.equals(objectType) ? null : vf.createURI(objectType); return vf.createLiteral(objectString, dataType); } return vf.createLiteral(objectString, objectLanguage); } static String getDataTypeAsString(Value object) { URI objectType = ((Literal) object).getDatatype(); return objectType == null ? NULL : objectType.stringValue(); } static String normalizeLang(String language) { return language == null ? NULL : language.toLowerCase(Locale.ROOT); } public static PGgeometry asGeometry(Literal literal, boolean acceptNoType) throws IndexException { PGgeometry result = null; // URI type = literal.getDatatype(); // String typeString = type.stringValue(); // if (GeoConstants.XMLSCHEMA_SPATIAL_BIN.equals(type)) // result = // asGeometry(Base64.decodeBase64(literal.stringValue().getBytes())); // else if (GeoConstants.XMLSCHEMA_SPATIAL_BINGZ.equals(type)) // result = // asGeometry(AbstractGeo.gunzip(Base64.decodeBase64(literal.stringValue().getBytes()))); // else if (GeoConstants.XMLSCHEMA_SPATIAL_TEXT.equals(type) || (type == // null && acceptNoType)) // result = asGeometry(literal.stringValue()); // else if (GeoConstants.XMLSCHEMA_SPATIAL_TEXTGZ.equals(type)) // result = asGeometry(new // String(AbstractGeo.gunzip(Base64.decodeBase64(literal.stringValue().getBytes())))); result = asGeometry(literal.stringValue()); return result; } static PGgeometry asGeometry(String value) throws IndexException { try { PGgeometry result = new PGgeometry(value.toUpperCase()); // org.postgis.Geometry geometry = result.getGeometry(); // if (geometry.srid == -1) // geometry.srid = PostgisIndexerSettings.DEFAULT_SRID; return result; } catch (SQLException e) { throw new IndexException(e); } } static PGgeometry asGeometry(byte[] value) { org.postgis.Geometry geometry = new BinaryParser().parse(value); if (geometry.srid == -1) geometry.srid = PostgisIndexerSettings.DEFAULT_SRID; return new PGgeometry(geometry); } /** * Creates and stores an indexed version of the provided statement. * * @param subject * The subject, BNode subjects are not allowed. * @param predicate * The predicate * @param object * The object/value * * @throws IllegalArgumentException * If one of the arguments equals null or if one of the * arguments is a {@link BNode}. * @throws IndexException * @Throws SQLException */ static void add(Connection conn, String table, Resource subject, URI predicate, Value object, String tsVectorConfig) throws IndexException, SQLException { add(conn, table, new IndexedStatement(subject, predicate, object, tsVectorConfig)); } /** * Creates and stores an indexed version of the provided statement. Only use * this function if it is safe to assume that the statement is not yet in * the database. This method is faster than * {@link #addIfNew(Connection, Resource, URI, Value, String)}, but does not * check for duplicates. * * @param subject * The subject, BNode subjects are not allowed. * @param predicate * The predicate * @param object * The object/value * * @return The IndexedStatement to add to the index. * * @throws IllegalArgumentException * If one of the arguments equals null or if one of the * arguments is a {@link BNode}. * @throws IndexException * @Throws SQLException */ static void add(Connection conn, String table, IndexedStatement indexedStatement) throws SQLException { PreparedStatement stat = addPrepare(conn, table); try { addNewBatch(stat, indexedStatement); int count = stat.executeUpdate(); Validate.isTrue(count == 1); } finally { stat.close(); } } public static PreparedStatement addPrepare(Connection conn, String tableName) throws SQLException { return conn.prepareStatement(INSERT_INTO + tableName + INSERT_VALUES); } public static void addNewBatch(PreparedStatement stat, IndexedStatement indexedStatement) throws SQLException { int idx = 1; if (indexedStatement.objectDate != null) stat.setDate(idx++, new java.sql.Date(indexedStatement.objectDate.getTime())); else stat.setDate(idx++, null); stat.setString(idx++, indexedStatement.objectLanguage); stat.setObject(idx++, indexedStatement.objectSpatial); stat.setString(idx++, indexedStatement.objectString); stat.setString(idx++, indexedStatement.objectTsVectorConfig); stat.setString(idx++, indexedStatement.objectType); stat.setBoolean(idx++, indexedStatement.objectUri); stat.setString(idx++, indexedStatement.predicate); stat.setString(idx++, indexedStatement.subject); stat.addBatch(); } /** * Search for a specific statement. * * @return Null if that statement does not exists in the index, the first * found {@link IndexedStatement} if it does exist. Note that there * might be duplicated statements in the database, the uniqueness is * therefore not validated. */ static IndexedStatement findUnique(Connection conn, String table, Resource subject, URI predicate, Value object) throws SQLException, IndexException { validateNotNull(conn, subject, predicate, object); CloseableIterator<IndexedStatement> results = find(conn, table, subject, predicate, object); try { if (!results.hasNext()) return null; return results.next(); } finally { results.close(); } } /** * Search for indexed statements. * * @return A closable iterator to the found results. */ static CloseableIterator<IndexedStatement> find(Connection conn, String table, Resource subject, URI predicate, Value object) throws SQLException, IndexException { if (subject instanceof BNode || object instanceof BNode) // BNodes are not indexed, so there is no result: return EMPTY; String sql = createQuery(SELECT + table, subject, predicate, object); StatementIterator result = null; PreparedStatement stat = conn.prepareStatement(sql); try { addBindings(stat, subject, predicate, object); result = new StatementIterator(stat); } finally { if (result == null) // else StatementIterator should close the stat stat.close(); } return result; } /** * Search for a statements with a given text predicate. For testing purposes * only. * * @param vectorConfig * Name of the configuration to use for building the query. * @param query * The search query, e.g. "apples & oranges". * @return An closeable iteration to the resulting {@link IndexedStatement} * s. */ static CloseableIterator<IndexedStatement> search(Connection conn, String table, String vectorConfig, String query) throws SQLException, IndexException { Validate.notEmpty(query); validateConfig(vectorConfig); StringBuffer sql = new StringBuffer(MAX_SELECT_LEN).append(SELECT + table).append(WHERE); sql.append(OBJECT_TS_VECTOR); sql.append("@@to_tsquery('").append(vectorConfig).append("',?)"); PreparedStatement stat = conn.prepareStatement(sql.toString()); StatementIterator result = null; try { stat.setString(1, query); result = new StatementIterator(stat); } finally { if (result == null) // else StatementIterator should close the stat stat.close(); } return result; } /** * Deletes matching indexed statements. * * @param subject * The subject, BNode subjects are ignored. * @param predicate * The predicate * @param object * The object/value * * @return The number of matching (hence deleted) statements. * * @throws IndexException * @Throws SQLException */ static int delete(Connection conn, String table, Resource subject, URI predicate, Value object) throws SQLException { if (subject instanceof BNode || object instanceof BNode) return 0; // // BNodes are not indexed, so there is nothing to // delete String sql = createQuery(DELETE_FROM_STS + table, subject, predicate, object); PreparedStatement stat = conn.prepareStatement(sql); try { addBindings(stat, subject, predicate, object); return stat.executeUpdate(); } finally { stat.close(); } } /** * Since the configuration of text searches is inlined in the query (because * we can not bind a variable of type regconfig) this method checks that the * config is valid. This prevents SQL-injection security issues. * * @param config * The text search configuration */ protected static void validateConfig(String config) { Validate.isTrue(config != null && config.matches("[a-zA-Z0-9_]+"), "Textsearch config should match regex: [a-zA-Z0-9_]+"); } private static void addBindings(PreparedStatement stat, Resource subject, URI predicate, Value object) throws SQLException { int idx = 1; if (subject != null) stat.setString(idx++, subject.stringValue()); if (predicate != null) stat.setString(idx++, predicate.stringValue()); if (object != null) { stat.setString(idx++, object.stringValue()); stat.setString(idx++, object.stringValue()); if (object instanceof URI) stat.setBoolean(idx++, true); else { stat.setBoolean(idx++, false); stat.setString(idx++, getDataTypeAsString(object)); stat.setString(idx++, normalizeLang(((Literal) object).getLanguage())); } } } private static String createQuery(String base, Resource subject, URI predicate, Value object) { StringBuffer sql = new StringBuffer(MAX_SELECT_LEN).append(base); if (subject != null || predicate != null || object != null) sql.append(WHERE); int startLen = sql.length(); if (subject != null) append(sql, startLen, SUBJECT, EQQ); if (predicate != null) append(sql, startLen, PREDICATE, EQQ); if (object != null) createObjectEqualClauses(sql, startLen, object); return sql.toString(); } private static void createObjectEqualClauses(StringBuffer sql, int startLen, Value object) { append(sql, startLen, "md5(" + OBJECT_STRING + ")", EQMD5); append(sql, startLen, OBJECT_STRING, EQQ); append(sql, startLen, OBJECT_URI, EQQ); if (!(object instanceof URI)) { append(sql, startLen, OBJECT_TYPE, EQQ); append(sql, startLen, OBJECT_LANGUAGE, EQQ); } } private static void append(StringBuffer sql, int startLen, String... values) { if (sql.length() > startLen) sql.append(AND); for (int i = 0; i != values.length; ++i) sql.append(values[i]); } private static IndexedStatement convert(ResultSet results) throws SQLException { IndexedStatement result = new IndexedStatement(); int idx = 1; result.objectDate = results.getDate(idx++); result.objectLanguage = results.getString(idx++); result.objectSpatial = (PGgeometry) results.getObject(idx++); result.objectString = results.getString(idx++); result.objectTsVectorConfig = results.getString(idx++); result.objectType = results.getString(idx++); result.objectUri = results.getBoolean(idx++); result.predicate = results.getString(idx++); result.subject = results.getString(idx++); return result; } private static void validateNotNull(Connection conn, Resource subject, URI predicate, Value object) { Validate.isTrue(conn != null && subject != null && predicate != null && object != null); } /** * Close, but ignore null input and catch and log resulting * {@link SQLException}s. */ public static void closeQuietly(Connection conn) { try { if (conn != null) conn.close(); } catch (SQLException e) { LOG.error("could not close connection", e); } } /** * Close, but ignore null input and catch and log resulting * {@link SQLException}s. */ public static void closeQuietly(PreparedStatement stat) { try { if (stat != null) // when preparedstatements are pooled, we can't // check for isClosed() stat.close(); } catch (SQLException e) { LOG.error("could not close jdbc statement", e); } } /** * Close, but ignore null input and catch and log resulting * {@link SQLException}s. */ public static void closeQuietly(ResultSet results) { try { if (results != null) // can't check isClosed when using dbcp // connection pooling results.close(); } catch (SQLException e) { LOG.error("could not close jdbc resultset", e); } } static class StatementIterator implements CloseableIterator<IndexedStatement> { private ResultSet results; private PreparedStatement stat; private boolean knowHasNext; private boolean hasNext; private boolean closed; public StatementIterator(PreparedStatement stat) throws IndexException { this.stat = stat; try { this.results = stat.executeQuery(); } catch (SQLException e) { closeQuietly(stat); throw new IndexException(e); } } @Override public boolean hasNext() throws IndexException { if (closed) return false; if (!knowHasNext) { try { hasNext = results.next(); knowHasNext = true; } catch (SQLException e) { throw new IndexException(e); } } return hasNext; } @Override public IndexedStatement next() { if (hasNext()) { knowHasNext = false; try { return convert(results); } catch (SQLException e) { throw new IndexException(e); } } throw new NoSuchElementException(); } @Override public void remove() { throw new UnsupportedOperationException("remove by StatementIterator not allowed"); } @Override public void close() { closed = true; try { try { results.close(); } finally { stat.close(); } } catch (SQLException e) { throw new IndexException(e); } } } public static String escapedString(String string) { return '\'' + string.replace("'", "''") + '\''; } }