Java tutorial
package com.yahoo.glimmer.web; /* * Copyright (c) 2012 Yahoo! Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ import it.unimi.di.big.mg4j.index.Index; import it.unimi.di.big.mg4j.query.SelectedInterval; import it.unimi.di.big.mg4j.query.nodes.Query; import it.unimi.di.big.mg4j.query.nodes.QueryBuilderVisitorException; import it.unimi.di.big.mg4j.search.score.DocumentScoreInfo; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.Reference2ObjectMap; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.mutable.MutableInt; import org.apache.log4j.Logger; import org.semanticweb.yars.nx.Node; import org.semanticweb.yars.nx.parser.NxParser; import com.yahoo.glimmer.query.QueryLogger; import com.yahoo.glimmer.query.QueryLogger.QueryTimer; import com.yahoo.glimmer.query.RDFIndex; import com.yahoo.glimmer.util.BySubjectRecord; import com.yahoo.glimmer.util.BySubjectRecord.BySubjectRecordException; import com.yahoo.glimmer.util.Util; /** * Wraps the details of doing a query against an RDFIndex. * */ public class Querier { private final static Logger LOGGER = Logger.getLogger(Querier.class); private static final String DEFAULT_CONTEXT = "default:"; private static final int CACHE_SIZE = 10000; private final Map<String, Long> objectsSubjectsIdCache; private final Map<Long, String> objectLabelCache; private QueryLogger queryLogger = new QueryLogger(); public Querier() { LinkedHashMap<String, Long> idCache = new LinkedHashMap<String, Long>(CACHE_SIZE + 1, 1.1f, true) { private static final long serialVersionUID = -8171861525079261380L; protected boolean removeEldestEntry(java.util.Map.Entry<String, Long> eldest) { return size() > CACHE_SIZE; }; }; objectsSubjectsIdCache = Collections.synchronizedMap(idCache); LinkedHashMap<Long, String> labelCache = new LinkedHashMap<Long, String>(CACHE_SIZE + 1, 1.1f, true) { private static final long serialVersionUID = -6916960713013021549L; protected boolean removeEldestEntry(java.util.Map.Entry<Long, String> eldest) { return size() > CACHE_SIZE; }; }; objectLabelCache = Collections.synchronizedMap(labelCache); } public QueryResult doQuery(RDFIndex index, Query query, int startItem, int maxNumItems, boolean deref, Integer objectLengthLimit) throws QueryBuilderVisitorException, IOException { if (startItem < 0 || maxNumItems < 0 || maxNumItems > 10000) { throw new IllegalArgumentException( "Bad item range - start:" + startItem + " maxNumItems:" + maxNumItems); } QueryTimer timer = queryLogger.start(); ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>>> results; int numResults; results = new ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>>>(); numResults = index.process(startItem, maxNumItems, results, query); timer.endSearch(); ObjectArrayList<QueryResultItem> resultItems = new ObjectArrayList<QueryResultItem>(); if (!results.isEmpty()) { for (int i = 0; i < results.size(); i++) { DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>> dsi = results.get(i); LOGGER.debug("Intervals for item " + i); LOGGER.debug("score " + dsi.score); QueryResultItem item = createRdfResultItem(index, dsi.document, dsi.score, deref, objectLengthLimit); if (item == null) { LOGGER.error("Document id " + dsi.document + " isn't in collection(or has null content)."); } else { resultItems.add(item); } } } queryLogger.endQuery(timer, query.toString(), numResults); QueryResult result = new QueryResult(null, query != null ? query.toString() : "", numResults, startItem, maxNumItems, resultItems, timer.getDuration(), timer.getSearchDuration()); return result; } public QueryResult doQueryForDocId(RDFIndex index, long id, boolean deref, Integer objectLengthLimit) throws IOException { QueryTimer timer = queryLogger.start(); timer.endSearch(); QueryResultItem resultItem = createRdfResultItem(index, id, 1.0d, deref, objectLengthLimit); queryLogger.endQuery(timer, "getDoc " + Long.toString(id), 1); List<QueryResultItem> results; if (resultItem != null) { results = Collections.singletonList(resultItem); } else { results = Collections.emptyList(); } return new QueryResult("", null, results.size(), 0, 1, results, timer.getDuration(), timer.getSearchDuration()); } private QueryResultItem createRdfResultItem(RDFIndex index, long docId, double score, boolean lookupObjectLabels, Integer objectLengthLimit) throws IOException { InputStream docInputStream; try { docInputStream = index.getDocumentInputStream(docId); } catch (IOException e) { // TODO fix end of stream errors on BZip2. return null; } BySubjectRecord record = new BySubjectRecord(); try { record.readFrom(new InputStreamReader(docInputStream)); } catch (BySubjectRecordException e) { LOGGER.warn("Failed to read doc with ID " + docId, e); return null; // throw new RuntimeException("Couldn't parse doc with id:" + docId); } if (docId != record.getId()) { LOGGER.error("Wanted doc id:" + docId + " but got doc id:" + record.getId()); } QueryResultItem item = new QueryResultItem(); item.setSubjectId(record.getId()); item.setSubject(record.getSubject()); item.setScore(score); Map<String, MutableInt> predicateToAccumulatedOjectLengthMap = new HashMap<String, MutableInt>(); for (String relationString : record.getRelations()) { Node[] predicateObjectContext; try { predicateObjectContext = NxParser.parseNodes(relationString); } catch (Exception e) { throw new RuntimeException("Error parsing tuple: " + relationString); } String predicate = predicateObjectContext[0].toString(); String object = predicateObjectContext[1].toString().trim(); if (objectLengthLimit != null) { MutableInt accumulatedOjectLength = predicateToAccumulatedOjectLengthMap.get(predicate); if (accumulatedOjectLength == null) { accumulatedOjectLength = new MutableInt(0); predicateToAccumulatedOjectLengthMap.put(predicate, accumulatedOjectLength); } else if (accumulatedOjectLength.intValue() >= objectLengthLimit) { continue; } // If the new accumulated length will be more than the limit(plus a bit). if (accumulatedOjectLength.intValue() + object.length() > objectLengthLimit + 20) { object = object.substring(0, objectLengthLimit - accumulatedOjectLength.intValue()) + "..."; } accumulatedOjectLength.add(object.length()); } String context; if (predicateObjectContext.length > 2) { context = predicateObjectContext[2].toString(); } else { context = DEFAULT_CONTEXT; } boolean indexed = index.getIndexedPredicates().contains(Util.encodeFieldName(predicate)); String label = null; // if predicate is an rdfs:label or woo:label, assign the object as // the // items label // TODO. Consider ...name too. if (predicate.endsWith("label") || predicate.endsWith("name")) { item.setLabel(object); label = object; } Long subjectIdOfObject; if (objectsSubjectsIdCache.containsKey(object)) { subjectIdOfObject = objectsSubjectsIdCache.get(object); } else { subjectIdOfObject = index.getSubjectId(object); objectsSubjectsIdCache.put(object, subjectIdOfObject); } if (label == null && subjectIdOfObject != null && lookupObjectLabels) { if (objectLabelCache.containsKey(subjectIdOfObject)) { label = objectLabelCache.get(subjectIdOfObject); } else { // If the object is also a subject Resource/BNode this // will return that subjects id which is the same as the // docId. Parse the subject doc that this object refers // too.. QueryResultItem objectItem = createRdfResultItem(index, subjectIdOfObject, 0.0d, false, null); if (objectItem != null) { label = objectItem.getLabel(); } objectLabelCache.put(subjectIdOfObject, label); } } // If the final label is the same as the object we just use the object. if (object.equals(label)) { label = null; } item.addRelation(predicate, object, subjectIdOfObject, context, indexed, label); } docInputStream.close(); return item; } }