com.yahoo.glimmer.web.Querier.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.glimmer.web.Querier.java

Source

package com.yahoo.glimmer.web;

/*
 * Copyright (c) 2012 Yahoo! Inc. All rights reserved.
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software distributed under the License is 
 *  distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and limitations under the License.
 *  See accompanying LICENSE file.
 */

import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.query.SelectedInterval;
import it.unimi.di.big.mg4j.query.nodes.Query;
import it.unimi.di.big.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.di.big.mg4j.search.score.DocumentScoreInfo;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.mutable.MutableInt;
import org.apache.log4j.Logger;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.parser.NxParser;

import com.yahoo.glimmer.query.QueryLogger;
import com.yahoo.glimmer.query.QueryLogger.QueryTimer;
import com.yahoo.glimmer.query.RDFIndex;
import com.yahoo.glimmer.util.BySubjectRecord;
import com.yahoo.glimmer.util.BySubjectRecord.BySubjectRecordException;
import com.yahoo.glimmer.util.Util;

/**
 * Wraps the details of doing a query against an RDFIndex.
 * 
 */
public class Querier {
    private final static Logger LOGGER = Logger.getLogger(Querier.class);
    private static final String DEFAULT_CONTEXT = "default:";
    private static final int CACHE_SIZE = 10000;

    private final Map<String, Long> objectsSubjectsIdCache;
    private final Map<Long, String> objectLabelCache;

    private QueryLogger queryLogger = new QueryLogger();

    public Querier() {
        LinkedHashMap<String, Long> idCache = new LinkedHashMap<String, Long>(CACHE_SIZE + 1, 1.1f, true) {
            private static final long serialVersionUID = -8171861525079261380L;

            protected boolean removeEldestEntry(java.util.Map.Entry<String, Long> eldest) {
                return size() > CACHE_SIZE;
            };
        };
        objectsSubjectsIdCache = Collections.synchronizedMap(idCache);

        LinkedHashMap<Long, String> labelCache = new LinkedHashMap<Long, String>(CACHE_SIZE + 1, 1.1f, true) {
            private static final long serialVersionUID = -6916960713013021549L;

            protected boolean removeEldestEntry(java.util.Map.Entry<Long, String> eldest) {
                return size() > CACHE_SIZE;
            };
        };
        objectLabelCache = Collections.synchronizedMap(labelCache);
    }

    public QueryResult doQuery(RDFIndex index, Query query, int startItem, int maxNumItems, boolean deref,
            Integer objectLengthLimit) throws QueryBuilderVisitorException, IOException {
        if (startItem < 0 || maxNumItems < 0 || maxNumItems > 10000) {
            throw new IllegalArgumentException(
                    "Bad item range - start:" + startItem + " maxNumItems:" + maxNumItems);
        }

        QueryTimer timer = queryLogger.start();

        ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>>> results;
        int numResults;

        results = new ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>>>();
        numResults = index.process(startItem, maxNumItems, results, query);

        timer.endSearch();

        ObjectArrayList<QueryResultItem> resultItems = new ObjectArrayList<QueryResultItem>();
        if (!results.isEmpty()) {
            for (int i = 0; i < results.size(); i++) {
                DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>> dsi = results.get(i);
                LOGGER.debug("Intervals for item " + i);
                LOGGER.debug("score " + dsi.score);
                QueryResultItem item = createRdfResultItem(index, dsi.document, dsi.score, deref,
                        objectLengthLimit);
                if (item == null) {
                    LOGGER.error("Document id " + dsi.document + " isn't in collection(or has null content).");
                } else {
                    resultItems.add(item);
                }
            }
        }

        queryLogger.endQuery(timer, query.toString(), numResults);
        QueryResult result = new QueryResult(null, query != null ? query.toString() : "", numResults, startItem,
                maxNumItems, resultItems, timer.getDuration(), timer.getSearchDuration());
        return result;
    }

    public QueryResult doQueryForDocId(RDFIndex index, long id, boolean deref, Integer objectLengthLimit)
            throws IOException {
        QueryTimer timer = queryLogger.start();
        timer.endSearch();
        QueryResultItem resultItem = createRdfResultItem(index, id, 1.0d, deref, objectLengthLimit);
        queryLogger.endQuery(timer, "getDoc " + Long.toString(id), 1);

        List<QueryResultItem> results;
        if (resultItem != null) {
            results = Collections.singletonList(resultItem);
        } else {
            results = Collections.emptyList();
        }
        return new QueryResult("", null, results.size(), 0, 1, results, timer.getDuration(),
                timer.getSearchDuration());
    }

    private QueryResultItem createRdfResultItem(RDFIndex index, long docId, double score,
            boolean lookupObjectLabels, Integer objectLengthLimit) throws IOException {
        InputStream docInputStream;
        try {
            docInputStream = index.getDocumentInputStream(docId);
        } catch (IOException e) {
            // TODO fix end of stream errors on BZip2.
            return null;
        }

        BySubjectRecord record = new BySubjectRecord();

        try {
            record.readFrom(new InputStreamReader(docInputStream));
        } catch (BySubjectRecordException e) {
            LOGGER.warn("Failed to read doc with ID " + docId, e);
            return null;
            // throw new RuntimeException("Couldn't parse doc with id:" + docId);
        }

        if (docId != record.getId()) {
            LOGGER.error("Wanted doc id:" + docId + " but got doc id:" + record.getId());
        }

        QueryResultItem item = new QueryResultItem();
        item.setSubjectId(record.getId());
        item.setSubject(record.getSubject());
        item.setScore(score);

        Map<String, MutableInt> predicateToAccumulatedOjectLengthMap = new HashMap<String, MutableInt>();

        for (String relationString : record.getRelations()) {
            Node[] predicateObjectContext;
            try {
                predicateObjectContext = NxParser.parseNodes(relationString);
            } catch (Exception e) {
                throw new RuntimeException("Error parsing tuple: " + relationString);
            }

            String predicate = predicateObjectContext[0].toString();
            String object = predicateObjectContext[1].toString().trim();

            if (objectLengthLimit != null) {
                MutableInt accumulatedOjectLength = predicateToAccumulatedOjectLengthMap.get(predicate);
                if (accumulatedOjectLength == null) {
                    accumulatedOjectLength = new MutableInt(0);
                    predicateToAccumulatedOjectLengthMap.put(predicate, accumulatedOjectLength);
                } else if (accumulatedOjectLength.intValue() >= objectLengthLimit) {
                    continue;
                }

                // If the new accumulated length will be more than the limit(plus a bit).
                if (accumulatedOjectLength.intValue() + object.length() > objectLengthLimit + 20) {
                    object = object.substring(0, objectLengthLimit - accumulatedOjectLength.intValue()) + "...";
                }
                accumulatedOjectLength.add(object.length());
            }

            String context;
            if (predicateObjectContext.length > 2) {
                context = predicateObjectContext[2].toString();
            } else {
                context = DEFAULT_CONTEXT;
            }

            boolean indexed = index.getIndexedPredicates().contains(Util.encodeFieldName(predicate));

            String label = null;
            // if predicate is an rdfs:label or woo:label, assign the object as
            // the
            // items label
            // TODO. Consider ...name too.
            if (predicate.endsWith("label") || predicate.endsWith("name")) {
                item.setLabel(object);
                label = object;
            }

            Long subjectIdOfObject;

            if (objectsSubjectsIdCache.containsKey(object)) {
                subjectIdOfObject = objectsSubjectsIdCache.get(object);
            } else {
                subjectIdOfObject = index.getSubjectId(object);
                objectsSubjectsIdCache.put(object, subjectIdOfObject);
            }

            if (label == null && subjectIdOfObject != null && lookupObjectLabels) {
                if (objectLabelCache.containsKey(subjectIdOfObject)) {
                    label = objectLabelCache.get(subjectIdOfObject);
                } else {
                    // If the object is also a subject Resource/BNode this
                    // will return that subjects id which is the same as the
                    // docId. Parse the subject doc that this object refers
                    // too..
                    QueryResultItem objectItem = createRdfResultItem(index, subjectIdOfObject, 0.0d, false, null);
                    if (objectItem != null) {
                        label = objectItem.getLabel();
                    }
                    objectLabelCache.put(subjectIdOfObject, label);
                }
            }

            // If the final label is the same as the object we just use the object.
            if (object.equals(label)) {
                label = null;
            }

            item.addRelation(predicate, object, subjectIdOfObject, context, indexed, label);
        }

        docInputStream.close();

        return item;
    }
}