Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.rya.indexing.accumulo.entity; import static org.apache.rya.api.RdfCloudTripleStoreConstants.DELIM_BYTE; import static org.apache.rya.api.RdfCloudTripleStoreConstants.TYPE_DELIM_BYTE; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Set; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; import org.apache.hadoop.io.Text; import org.apache.rya.accumulo.AccumuloRdfConfiguration; import org.apache.rya.accumulo.documentIndex.DocIndexIteratorUtil; import org.apache.rya.accumulo.documentIndex.DocumentIndexIntersectingIterator; import org.apache.rya.api.RdfCloudTripleStoreConfiguration; import org.apache.rya.api.domain.RyaURI; import org.apache.rya.api.resolver.RyaContext; import org.apache.rya.api.resolver.RyaToRdfConversions; import org.apache.rya.api.resolver.RyaTypeResolverException; import org.apache.rya.indexing.DocIdIndexer; import org.apache.rya.indexing.accumulo.ConfigUtils; import org.openrdf.query.BindingSet; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.StatementPattern; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.evaluation.QueryBindingSet; import org.openrdf.query.algebra.helpers.StatementPatternCollector; import org.openrdf.query.parser.ParsedQuery; import org.openrdf.query.parser.sparql.SPARQLParser; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; import com.google.common.collect.Sets; import com.google.common.primitives.Bytes; import info.aduna.iteration.CloseableIteration; public class AccumuloDocIdIndexer implements DocIdIndexer { private BatchScanner bs; private final AccumuloRdfConfiguration conf; public AccumuloDocIdIndexer(final RdfCloudTripleStoreConfiguration conf) throws AccumuloException, AccumuloSecurityException { Preconditions.checkArgument(conf instanceof RdfCloudTripleStoreConfiguration, "conf must be isntance of RdfCloudTripleStoreConfiguration"); this.conf = (AccumuloRdfConfiguration) conf; //Connector conn = ConfigUtils.getConnector(conf); } public CloseableIteration<BindingSet, QueryEvaluationException> queryDocIndex(final String sparqlQuery, final Collection<BindingSet> constraints) throws TableNotFoundException, QueryEvaluationException { final SPARQLParser parser = new SPARQLParser(); ParsedQuery pq1 = null; try { pq1 = parser.parseQuery(sparqlQuery, null); } catch (final MalformedQueryException e) { e.printStackTrace(); } final TupleExpr te1 = pq1.getTupleExpr(); final List<StatementPattern> spList1 = StatementPatternCollector.process(te1); if (StarQuery.isValidStarQuery(spList1)) { final StarQuery sq1 = new StarQuery(spList1); return queryDocIndex(sq1, constraints); } else { throw new IllegalArgumentException("Invalid star query!"); } } @Override public CloseableIteration<BindingSet, QueryEvaluationException> queryDocIndex(final StarQuery query, final Collection<BindingSet> constraints) throws TableNotFoundException, QueryEvaluationException { final StarQuery starQ = query; final Iterator<BindingSet> bs = constraints.iterator(); final Iterator<BindingSet> bs2 = constraints.iterator(); final Set<String> unCommonVarNames; final Set<String> commonVarNames; if (bs2.hasNext()) { final BindingSet currBs = bs2.next(); commonVarNames = StarQuery.getCommonVars(query, currBs); unCommonVarNames = Sets.difference(currBs.getBindingNames(), commonVarNames); } else { commonVarNames = Sets.newHashSet(); unCommonVarNames = Sets.newHashSet(); } if (commonVarNames.size() == 1 && !query.commonVarConstant() && commonVarNames.contains(query.getCommonVarName())) { final HashMultimap<String, BindingSet> map = HashMultimap.create(); final String commonVar = starQ.getCommonVarName(); final Iterator<Entry<Key, Value>> intersections; final BatchScanner scan; final Set<Range> ranges = Sets.newHashSet(); while (bs.hasNext()) { final BindingSet currentBs = bs.next(); if (currentBs.getBinding(commonVar) == null) { continue; } final String row = currentBs.getBinding(commonVar).getValue().stringValue(); ranges.add(new Range(row)); map.put(row, currentBs); } scan = runQuery(starQ, ranges); intersections = scan.iterator(); return new CloseableIteration<BindingSet, QueryEvaluationException>() { private QueryBindingSet currentSolutionBs = null; private boolean hasNextCalled = false; private boolean isEmpty = false; private Iterator<BindingSet> inputSet = new ArrayList<BindingSet>().iterator(); private BindingSet currentBs; private Key key; @Override public boolean hasNext() throws QueryEvaluationException { if (!hasNextCalled && !isEmpty) { while (inputSet.hasNext() || intersections.hasNext()) { if (!inputSet.hasNext()) { key = intersections.next().getKey(); inputSet = map.get(key.getRow().toString()).iterator(); } currentBs = inputSet.next(); currentSolutionBs = deserializeKey(key, starQ, currentBs, unCommonVarNames); if (currentSolutionBs.size() == unCommonVarNames.size() + starQ.getUnCommonVars().size() + 1) { hasNextCalled = true; return true; } } isEmpty = true; return false; } else if (isEmpty) { return false; } else { return true; } } @Override public BindingSet next() throws QueryEvaluationException { if (hasNextCalled) { hasNextCalled = false; } else if (isEmpty) { throw new NoSuchElementException(); } else { if (this.hasNext()) { hasNextCalled = false; } else { throw new NoSuchElementException(); } } return currentSolutionBs; } @Override public void remove() throws QueryEvaluationException { throw new UnsupportedOperationException(); } @Override public void close() throws QueryEvaluationException { scan.close(); } }; } else { return new CloseableIteration<BindingSet, QueryEvaluationException>() { @Override public void remove() throws QueryEvaluationException { throw new UnsupportedOperationException(); } private Iterator<Entry<Key, Value>> intersections = null; private QueryBindingSet currentSolutionBs = null; private boolean hasNextCalled = false; private boolean isEmpty = false; private boolean init = false; private BindingSet currentBs; private StarQuery sq = new StarQuery(starQ); private final Set<Range> emptyRangeSet = Sets.newHashSet(); private BatchScanner scan; @Override public BindingSet next() throws QueryEvaluationException { if (hasNextCalled) { hasNextCalled = false; } else if (isEmpty) { throw new NoSuchElementException(); } else { if (this.hasNext()) { hasNextCalled = false; } else { throw new NoSuchElementException(); } } return currentSolutionBs; } @Override public boolean hasNext() throws QueryEvaluationException { if (!init) { if (intersections == null && bs.hasNext()) { currentBs = bs.next(); sq = StarQuery.getConstrainedStarQuery(sq, currentBs); scan = runQuery(sq, emptyRangeSet); intersections = scan.iterator(); // binding set empty } else if (intersections == null && !bs.hasNext()) { currentBs = new QueryBindingSet(); scan = runQuery(starQ, emptyRangeSet); intersections = scan.iterator(); } init = true; } if (!hasNextCalled && !isEmpty) { while (intersections.hasNext() || bs.hasNext()) { if (!intersections.hasNext()) { scan.close(); currentBs = bs.next(); sq = StarQuery.getConstrainedStarQuery(sq, currentBs); scan = runQuery(sq, emptyRangeSet); intersections = scan.iterator(); } if (intersections.hasNext()) { currentSolutionBs = deserializeKey(intersections.next().getKey(), sq, currentBs, unCommonVarNames); } else { continue; } if (sq.commonVarConstant() && currentSolutionBs.size() == unCommonVarNames.size() + sq.getUnCommonVars().size()) { hasNextCalled = true; return true; } else if (currentSolutionBs.size() == unCommonVarNames.size() + sq.getUnCommonVars().size() + 1) { hasNextCalled = true; return true; } } isEmpty = true; return false; } else if (isEmpty) { return false; } else { return true; } } @Override public void close() throws QueryEvaluationException { scan.close(); } }; } } private QueryBindingSet deserializeKey(final Key key, final StarQuery sq, final BindingSet currentBs, final Set<String> unCommonVar) { final QueryBindingSet currentSolutionBs = new QueryBindingSet(); final Text row = key.getRow(); final Text cq = key.getColumnQualifier(); final String[] cqArray = cq.toString().split(DocIndexIteratorUtil.DOC_ID_INDEX_DELIM); boolean commonVarSet = false; //if common Var is constant there is no common variable to assign a value to if (sq.commonVarConstant()) { commonVarSet = true; } if (!commonVarSet && sq.isCommonVarURI()) { final RyaURI rURI = new RyaURI(row.toString()); currentSolutionBs.addBinding(sq.getCommonVarName(), RyaToRdfConversions.convertValue(rURI)); commonVarSet = true; } for (final String s : sq.getUnCommonVars()) { final byte[] cqBytes = cqArray[sq.getVarPos().get(s)].getBytes(StandardCharsets.UTF_8); final int firstIndex = Bytes.indexOf(cqBytes, DELIM_BYTE); final int secondIndex = Bytes.lastIndexOf(cqBytes, DELIM_BYTE); final int typeIndex = Bytes.indexOf(cqBytes, TYPE_DELIM_BYTE); final String tripleComponent = new String(Arrays.copyOfRange(cqBytes, firstIndex + 1, secondIndex), StandardCharsets.UTF_8); final byte[] cqContent = Arrays.copyOfRange(cqBytes, secondIndex + 1, typeIndex); final byte[] objType = Arrays.copyOfRange(cqBytes, typeIndex, cqBytes.length); if (tripleComponent.equals("object")) { final byte[] object = Bytes.concat(cqContent, objType); org.openrdf.model.Value v = null; try { v = RyaToRdfConversions.convertValue(RyaContext.getInstance().deserialize(object)); } catch (final RyaTypeResolverException e) { e.printStackTrace(); } currentSolutionBs.addBinding(s, v); } else if (tripleComponent.equals("subject")) { if (!commonVarSet) { final byte[] object = Bytes.concat(row.getBytes(), objType); org.openrdf.model.Value v = null; try { v = RyaToRdfConversions.convertValue(RyaContext.getInstance().deserialize(object)); } catch (final RyaTypeResolverException e) { e.printStackTrace(); } currentSolutionBs.addBinding(sq.getCommonVarName(), v); commonVarSet = true; } final RyaURI rURI = new RyaURI(new String(cqContent, StandardCharsets.UTF_8)); currentSolutionBs.addBinding(s, RyaToRdfConversions.convertValue(rURI)); } else { throw new IllegalArgumentException("Invalid row."); } } for (final String s : unCommonVar) { currentSolutionBs.addBinding(s, currentBs.getValue(s)); } return currentSolutionBs; } private BatchScanner runQuery(final StarQuery query, Collection<Range> ranges) throws QueryEvaluationException { try { if (ranges.size() == 0) { final String rangeText = query.getCommonVarValue(); Range r; if (rangeText != null) { r = new Range(new Text(query.getCommonVarValue())); } else { r = new Range(); } ranges = Collections.singleton(r); } final Connector accCon = ConfigUtils.getConnector(conf); final IteratorSetting is = new IteratorSetting(30, "fii", DocumentIndexIntersectingIterator.class); DocumentIndexIntersectingIterator.setColumnFamilies(is, query.getColumnCond()); if (query.hasContext()) { DocumentIndexIntersectingIterator.setContext(is, query.getContextURI()); } final Authorizations auths; final String authsStr = conf.get(ConfigUtils.CLOUDBASE_AUTHS); if (authsStr == null || authsStr.isEmpty()) { auths = new Authorizations(); } else { auths = new Authorizations(authsStr); } bs = accCon.createBatchScanner(EntityCentricIndex.getTableName(conf), auths, 15); bs.addScanIterator(is); bs.setRanges(ranges); return bs; } catch (TableNotFoundException | AccumuloException | AccumuloSecurityException e) { throw new QueryEvaluationException(e); } } @Override public void close() throws IOException { //TODO generate an exception when BS passed in -- scanner closed // if (bs != null) { // bs.close(); // } } }