annis.service.internal.QueryServiceImpl.java Source code

Introduction

Here is the source code for annis.service.internal.QueryServiceImpl.java
Source

/*
 * Copyright 2012 SFB 632.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.service.internal;

import annis.CommonHelper;
import annis.GraphHelper;
import static java.util.Arrays.asList;
import annis.WekaHelper;
import annis.dao.AnnisDao;
import annis.examplequeries.ExampleQuery;
import annis.service.objects.Match;
import annis.model.QueryNode;
import annis.ql.parser.QueryData;
import annis.resolver.ResolverEntry;
import annis.resolver.SingleResolverRequest;
import annis.service.QueryService;
import annis.service.objects.AnnisAttribute;
import annis.service.objects.AnnisBinaryMetaData;
import annis.service.objects.AnnisCorpus;
import annis.service.objects.CorpusConfig;
import annis.service.objects.FrequencyTable;
import annis.service.objects.FrequencyTableEntry;
import annis.service.objects.FrequencyTableEntryType;
import annis.service.objects.CorpusConfigMap;
import annis.service.objects.DocumentBrowserConfig;
import annis.service.objects.MatchAndDocumentCount;
import annis.service.objects.MatchGroup;
import annis.service.objects.RawTextWrapper;
import annis.service.objects.SegmentationList;
import annis.service.objects.SubgraphFilter;
import annis.sqlgen.MatrixQueryData;
import annis.sqlgen.extensions.AnnotateQueryData;
import annis.service.objects.FrequencyTableQuery;
import annis.service.objects.OrderType;
import annis.sqlgen.extensions.LimitOffsetQueryData;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
import com.google.mimeparse.MIMEParse;
import com.sun.jersey.api.core.ResourceConfig;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.SaltProject;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.Consumes;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.GenericEntity;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import javax.ws.rs.core.UriInfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.shiro.SecurityUtils;
import org.apache.shiro.subject.Subject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Component;

/**
 * Methods for querying the database.
 *
 * @author Thomas Krause <krauseto@hu-berlin.de>
 * @author Benjamin Weienfels <b.pixeldrama@gmail.com>
 */
@Component
@Path("annis/query")
public class QueryServiceImpl implements QueryService {

    private final static Logger log = LoggerFactory.getLogger(QueryServiceImpl.class);

    private final static Logger queryLog = LoggerFactory.getLogger("QueryLog");

    private AnnisDao annisDao;

    private WekaHelper wekaHelper;

    private int port = 5711;

    private CorpusConfig defaultCorpusConfig;

    @Context
    private UriInfo uriInfo;

    @Context
    private HttpServletRequest request;

    @Context
    private ResourceConfig rc;

    /**
     * Log the successful initialization of this bean.
     *
     * <p> XXX: This should be a private method annotated with
     * <tt>@PostConstruct</tt>, but that doesn't seem to work. As a work-around,
     * the method is called by Spring as an init-method.
     */
    public void init() {
        // check version of PostgreSQL
        annisDao.checkDatabaseVersion();

        // log a message after successful startup
        log.info("ANNIS QueryService loaded.");
    }

    @GET
    @Path("search/count")
    @Produces("application/xml")
    @Override
    public Response count(@QueryParam("q") String query, @QueryParam("corpora") String rawCorpusNames) {

        requiredParameter(query, "q", "AnnisQL query");
        requiredParameter(rawCorpusNames, "corpora", "comma separated list of corpus names");

        Subject user = SecurityUtils.getSubject();
        List<String> corpusNames = splitCorpusNamesFromRaw(rawCorpusNames);
        for (String c : corpusNames) {
            user.checkPermission("query:count:" + c);
        }

        QueryData data = queryDataFromParameters(query, rawCorpusNames);
        long start = new Date().getTime();
        MatchAndDocumentCount count = annisDao.countMatchesAndDocuments(data);
        long end = new Date().getTime();

        logQuery("COUNT", query, splitCorpusNamesFromRaw(rawCorpusNames), end - start);

        return Response.ok(count).type(MediaType.APPLICATION_XML_TYPE).build();
    }

    private StreamingOutput findRaw(final QueryData data, final String rawCorpusNames, final String query)
            throws IOException {
        return new StreamingOutput() {
            @Override
            public void write(OutputStream output) throws IOException, WebApplicationException {
                long start = new Date().getTime();
                annisDao.find(data, output);
                long end = new Date().getTime();
                logQuery("FIND", query, splitCorpusNamesFromRaw(rawCorpusNames), end - start);
            }
        };

    }

    private List<Match> findXml(QueryData data, final String rawCorpusNames, final String query)
            throws IOException {
        long start = new Date().getTime();
        List<Match> result = annisDao.find(data);
        long end = new Date().getTime();
        logQuery("FIND", query, splitCorpusNamesFromRaw(rawCorpusNames), end - start);
        return result;
    }

    @GET
    @Path("search/find")
    @Produces({ "application/xml", "text/plain" })
    @Override
    public Response find(@QueryParam("q") String query, @QueryParam("corpora") String rawCorpusNames,
            @DefaultValue("0") @QueryParam("offset") String offsetRaw,
            @DefaultValue("-1") @QueryParam("limit") String limitRaw,
            @DefaultValue("ascending") @QueryParam("order") String orderRaw) throws IOException {
        requiredParameter(query, "q", "AnnisQL query");
        requiredParameter(rawCorpusNames, "corpora", "comma separated list of corpus names");

        Subject user = SecurityUtils.getSubject();
        List<String> corpusNames = splitCorpusNamesFromRaw(rawCorpusNames);
        for (String c : corpusNames) {
            user.checkPermission("query:find:" + c);
        }

        int offset = Integer.parseInt(offsetRaw);
        int limit = Integer.parseInt(limitRaw);

        OrderType order;
        try {
            order = OrderType.valueOf(orderRaw.toLowerCase());
        } catch (IllegalArgumentException ex) {
            throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST)
                    .type(MediaType.TEXT_PLAIN).entity("parameter 'order' has the invalid value '" + orderRaw
                            + "'. It should be one of" + " 'ascending', 'random' or 'descending")
                    .build());
        }

        final QueryData data = queryDataFromParameters(query, rawCorpusNames);
        data.setCorpusConfiguration(annisDao.getCorpusConfiguration());
        data.addExtension(new LimitOffsetQueryData(offset, limit, order));

        String acceptHeader = request.getHeader(HttpHeaders.ACCEPT);
        if (acceptHeader == null || acceptHeader.trim().isEmpty()) {
            acceptHeader = "*/*";
        }

        List<String> knownTypes = Lists.newArrayList("text/plain", "application/xml");

        // find the best matching mime type
        String bestMediaTypeMatch = MIMEParse.bestMatch(knownTypes, acceptHeader);

        if ("text/plain".equals(bestMediaTypeMatch)) {
            return Response.ok(findRaw(data, rawCorpusNames, query), "text/plain").build();
        } else {
            List<Match> result = findXml(data, rawCorpusNames, query);
            return Response.ok().type("application/xml")
                    .entity(new GenericEntity<MatchGroup>(new MatchGroup(result)) {
                    }).build();
        }

    }

    /**
     * Get result as matrix in WEKA (ARFF) format.
     * @param query
     * @param rawCorpusNames
     * @param rawMetaKeys
     * @param rawCsv
     * @return 
     */
    @GET
    @Path("search/matrix")
    @Produces("text/plain")
    public StreamingOutput matrix(final @QueryParam("q") String query,
            final @QueryParam("corpora") String rawCorpusNames, @QueryParam("metakeys") String rawMetaKeys,
            @DefaultValue("false") @QueryParam("csv") String rawCsv) {
        requiredParameter(query, "q", "AnnisQL query");
        requiredParameter(rawCorpusNames, "corpora", "comma separated list of corpus names");

        final boolean outputCsv = Boolean.parseBoolean(rawCsv);

        Subject user = SecurityUtils.getSubject();
        List<String> corpusNames = splitCorpusNamesFromRaw(rawCorpusNames);
        for (String c : corpusNames) {
            user.checkPermission("query:matrix:" + c);
        }

        final QueryData data = queryDataFromParameters(query, rawCorpusNames);

        MatrixQueryData ext = new MatrixQueryData();
        if (rawMetaKeys != null) {
            ext.setMetaKeys(splitMatrixKeysFromRaw(rawMetaKeys));
        }
        if (ext.getMetaKeys() != null && ext.getMetaKeys().isEmpty()) {
            ext.setMetaKeys(null);
        }

        data.addExtension(ext);

        StreamingOutput result = new StreamingOutput() {
            @Override
            public void write(OutputStream output) throws IOException, WebApplicationException {
                long start = new Date().getTime();
                annisDao.matrix(data, outputCsv, output);
                long end = new Date().getTime();
                logQuery("MATRIX", query, splitCorpusNamesFromRaw(rawCorpusNames), end - start);
            }
        };

        return result;
    }

    /**
     * Frequency analysis.
     * 
     * @param  rawFields Comma seperated list of result vector elements. 
     *                   Each element has the form <node-nr>:<anno-name>. The
     *                   annotation name can be set to "tok" to indicate that the
     *                   span should be used instead of an annotation.
     */
    @GET
    @Path("search/frequency")
    @Produces("application/xml")
    public FrequencyTable frequency(@QueryParam("q") String query, @QueryParam("corpora") String rawCorpusNames,
            @QueryParam("fields") String rawFields) {
        requiredParameter(query, "q", "AnnisQL query");
        requiredParameter(rawCorpusNames, "corpora", "comma separated list of corpus names");
        requiredParameter(rawFields, "fields", "Comma seperated list of result vector elements.");

        Subject user = SecurityUtils.getSubject();
        List<String> corpusNames = splitCorpusNamesFromRaw(rawCorpusNames);
        for (String c : corpusNames) {
            user.checkPermission("query:matrix:" + c);
        }

        QueryData data = queryDataFromParameters(query, rawCorpusNames);
        FrequencyTableQuery ext = FrequencyTableQuery.parse(rawFields);
        data.addExtension(ext);

        long start = new Date().getTime();
        FrequencyTable freqTable = annisDao.frequency(data);
        long end = new Date().getTime();
        logQuery("FREQUENCY", query, splitCorpusNamesFromRaw(rawCorpusNames), end - start);

        return freqTable;
    }

    @POST
    @Path("search/subgraph")
    @Consumes({ "application/xml", "text/plain" })
    @Produces({ "application/xml", "application/xmi+xml", "application/xmi+binary" })
    @Override
    public SaltProject subgraph(MatchGroup matches, @QueryParam("segmentation") String segmentation,
            @DefaultValue("0") @QueryParam("left") String leftRaw,
            @DefaultValue("0") @QueryParam("right") String rightRaw,
            @DefaultValue("all") @QueryParam("filter") String filterRaw) {

        Subject user = SecurityUtils.getSubject();

        // some robustness stuff
        if (matches == null) {
            throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST)
                    .type(MediaType.TEXT_PLAIN).entity("missing required request body").build());
        }

        int left = Integer.parseInt(leftRaw);
        int right = Integer.parseInt(rightRaw);
        SubgraphFilter filter = SubgraphFilter.valueOf(filterRaw);

        QueryData data = GraphHelper.createQueryData(matches, annisDao);

        data.addExtension(new AnnotateQueryData(left, right, segmentation, filter));

        Set<String> corpusNames = new TreeSet<>();

        for (Match singleMatch : matches.getMatches()) {
            // collect list of used corpora
            for (java.net.URI u : singleMatch.getSaltIDs()) {
                corpusNames.add(CommonHelper.getCorpusPath(u).get(0));
            }
        }

        for (String c : corpusNames) {
            user.checkPermission("query:subgraph:" + c);
        }

        List<String> corpusNamesList = new LinkedList<>(corpusNames);

        if (data.getCorpusList() == null || data.getCorpusList().isEmpty()) {
            throw new WebApplicationException(Response.Status.BAD_REQUEST.getStatusCode());
        }

        long start = new Date().getTime();
        SaltProject p = annisDao.graph(data);
        long end = new Date().getTime();
        String options = "matches: " + matches.toString() + ", seg: " + segmentation + ", left: " + left
                + ", right: " + right + ", filter: " + filter;
        logQuery("SUBGRAPH", "", corpusNamesList, end - start, options);

        return p;
    }

    @GET
    @Path("graph/{top}/{doc}")
    @Produces({ "application/xml", "application/xmi+xml", "application/xmi+binary" })
    @Override
    public SaltProject graph(@PathParam("top") String toplevelCorpusName, @PathParam("doc") String documentName,
            @QueryParam("filternodeanno") String filternodeanno) {

        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:subgraph:" + toplevelCorpusName);

        List<String> nodeAnnotationFilter = null;
        if (filternodeanno != null) {
            nodeAnnotationFilter = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(filternodeanno);
        }

        try {
            long start = new Date().getTime();
            SaltProject p = annisDao.retrieveAnnotationGraph(toplevelCorpusName, documentName,
                    nodeAnnotationFilter);
            long end = new Date().getTime();
            logQuery("GRAPH", toplevelCorpusName, documentName, end - start);
            return p;
        } catch (Exception ex) {
            log.error("error when accessing graph " + toplevelCorpusName + "/" + documentName, ex);
            throw new WebApplicationException(ex, 500);
        }
    }

    @GET
    @Path("resolver/{corpusName}/{namespace}/{type}")
    @Produces("application/xml")
    public List<ResolverEntry> resolver(@PathParam("corpusName") String corpusName,
            @PathParam("namespace") String namespace, @PathParam("type") String type) {
        ResolverEntry.ElementType enumType = ResolverEntry.ElementType.valueOf(type);
        SingleResolverRequest r = new SingleResolverRequest(corpusName, namespace, enumType);
        return annisDao.getResolverEntries(r);
    }

    @GET
    @Path("corpora")
    @Produces("application/xml")
    public List<AnnisCorpus> corpora() {
        List<AnnisCorpus> allCorpora = annisDao.listCorpora();

        List<AnnisCorpus> allowedCorpora = new LinkedList<>();

        // filter by which corpora the user is allowed to access
        Subject user = SecurityUtils.getSubject();
        for (AnnisCorpus c : allCorpora) {
            if (user.isPermitted("query:*:" + c.getName())) {
                allowedCorpora.add(c);
            }
        }

        return allowedCorpora;
    }

    @GET
    @Path("corpora/config")
    @Produces("application/xml")
    public CorpusConfigMap corpusConfigs() {
        CorpusConfigMap corpusConfigs = annisDao.getCorpusConfigurations();
        CorpusConfigMap result = new CorpusConfigMap();
        Subject user = SecurityUtils.getSubject();

        if (corpusConfigs != null) {
            for (String c : corpusConfigs.getCorpusConfigs().keySet()) {
                if (user.isPermitted("query:*:" + c)) {
                    result.put(c, corpusConfigs.get(c));
                }
            }
        }

        return result;
    }

    @GET
    @Path("corpora/default-config")
    @Produces("application/xml")
    public CorpusConfig corpusDefaultConfig() {
        return defaultCorpusConfig;
    }

    @GET
    @Path("corpora/{top}")
    @Produces("application/xml")
    public List<AnnisCorpus> singleCorpus(@PathParam("top") String toplevelName) {
        List<AnnisCorpus> result = new ArrayList<>();

        Subject user = SecurityUtils.getSubject();

        LinkedList<String> originalCorpusNames = new LinkedList<>();
        originalCorpusNames.add(toplevelName);
        List<Long> ids = annisDao.mapCorpusNamesToIds(originalCorpusNames);

        // also add all corpora that match the alias name
        ids.addAll(annisDao.mapCorpusAliasToIds(toplevelName));
        if (!ids.isEmpty()) {
            List<AnnisCorpus> allCorpora = annisDao.listCorpora(ids);
            for (AnnisCorpus c : allCorpora) {
                if (user.isPermitted("query:*:" + c.getName())) {
                    result.add(c);
                }
            }
        }

        return result;
    }

    @GET
    @Path("corpora/{top}/config")
    @Produces("application/xml")
    public CorpusConfig corpusConfig(@PathParam("top") String toplevelName) {
        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:config:" + toplevelName);

        try {
            Properties tmp = annisDao.getCorpusConfigurationSave(toplevelName);

            CorpusConfig corpusConfig = new CorpusConfig();
            corpusConfig.setConfig(tmp);

            return corpusConfig;
        } catch (Exception ex) {
            log.error("problems with reading config", ex);
            throw new WebApplicationException(ex, 500);
        }
    }

    @GET
    @Path("corpora/{top}/annotations")
    @Produces("application/xml")
    public List<AnnisAttribute> annotations(@PathParam("top") String toplevelCorpus,
            @DefaultValue("false") @QueryParam("fetchvalues") String fetchValues,
            @DefaultValue("false") @QueryParam("onlymostfrequentvalues") String onlyMostFrequentValues)
            throws WebApplicationException {
        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:annotations:" + toplevelCorpus);

        List<Long> corpusList = new ArrayList<>();
        corpusList.add(annisDao.mapCorpusNameToId(toplevelCorpus));

        return annisDao.listAnnotations(corpusList, Boolean.parseBoolean(fetchValues),
                Boolean.parseBoolean(onlyMostFrequentValues));

    }

    @GET
    @Path("corpora/{top}/segmentation-names")
    @Produces("application/xml")
    public SegmentationList segmentationNames(@PathParam("top") String toplevelCorpus)
            throws WebApplicationException {

        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:annotations:" + toplevelCorpus);

        List<Long> corpusList = new ArrayList<>();
        corpusList.add(annisDao.mapCorpusNameToId(toplevelCorpus));

        return new SegmentationList(annisDao.listSegmentationNames(corpusList));

    }

    /**
     * Return true if this is a valid query or throw exception when invalid
     *
     * @param query Query to check for validity
     * @return Either "ok" or an error message.
     */
    @GET
    @Produces("text/plain")
    @Path("check")
    public String check(@QueryParam("q") String query) {
        annisDao.parseAQL(query, new LinkedList<Long>());
        return "ok";
    }

    /**
     * Return the list of the query nodes if this is a valid query 
     * or throw exception when invalid
     *
     * @param query Query to get the query nodes for
     * @return
     */
    @GET
    @Path("parse/nodes")
    @Produces("application/xml")
    public Response parseNodes(@QueryParam("q") String query) {
        QueryData data = annisDao.parseAQL(query, new LinkedList<Long>());
        List<QueryNode> nodes = new LinkedList<>();
        int i = 0;
        for (List<QueryNode> alternative : data.getAlternatives()) {
            for (QueryNode n : alternative) {
                n.setAlternativeNumber(i);
                nodes.add(n);
            }
            i++;
        }
        return Response.ok(new GenericEntity<List<QueryNode>>(nodes) {
        }).build();
    }

    @GET
    @Path("corpora/{top}/{document}/binary/{offset}/{length}")
    public Response binary1(@PathParam("top") String toplevelCorpusName, @PathParam("document") String corpusName,
            @PathParam("offset") String rawOffset, @PathParam("length") String rawLength) {
        return binary(toplevelCorpusName, corpusName, rawOffset, rawLength, null);
    }

    @GET
    @Path("corpora/{top}/{document}/binary")
    public Response binary2(@PathParam("top") String toplevelCorpusName, @PathParam("document") String corpusName) {
        return binary(toplevelCorpusName, corpusName, null, null, null);
    }

    @GET
    @Path("corpora/{top}/{document}/binary/{file}/{offset}/{length}")
    public Response binary3(@PathParam("top") String toplevelCorpusName, @PathParam("document") String corpusName,
            @PathParam("file") String file, @PathParam("offset") String rawOffset,
            @PathParam("length") String rawLength) {
        return binary(toplevelCorpusName, corpusName, rawOffset, rawLength, file);
    }

    @GET
    @Path("corpora/{top}/{document}/binary/{file}")
    public Response binary4(@PathParam("top") String toplevelCorpusName, @PathParam("document") String corpusName,
            @PathParam("file") String file) {
        return binary(toplevelCorpusName, corpusName, null, null, file);
    }

    /**
     * Get an Annis Binary object identified by its id.
     *
     * @param id
     * @param rawOffset the part we want to start from, we start from 0
     * @param rawLength how many bytes we take
     * @return AnnisBinary
     */
    @Override
    public Response binary(String toplevelCorpusName, String corpusName, String rawOffset, String rawLength,
            String fileName) {
        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:binary:" + toplevelCorpusName);

        String acceptHeader = request.getHeader(HttpHeaders.ACCEPT);
        if (acceptHeader == null || acceptHeader.trim().isEmpty()) {
            acceptHeader = "*/*";
        }

        List<AnnisBinaryMetaData> meta = annisDao.getBinaryMeta(toplevelCorpusName, corpusName);
        HashMap<String, AnnisBinaryMetaData> matchedMetaByType = new LinkedHashMap<>();

        for (AnnisBinaryMetaData m : meta) {
            if (fileName == null) {
                // just add all available media types
                if (!matchedMetaByType.containsKey(m.getMimeType())) {
                    matchedMetaByType.put(m.getMimeType(), m);
                }
            } else {
                // check if this binary has the right title/file name
                if (fileName.equals(m.getFileName())) {
                    matchedMetaByType.put(m.getMimeType(), m);
                }
            }
        }

        if (matchedMetaByType.isEmpty()) {
            return Response.status(Response.Status.NOT_FOUND).entity("Requested binary not found").build();
        }

        // find the best matching mime type
        String bestMediaTypeMatch = MIMEParse.bestMatch(matchedMetaByType.keySet(), acceptHeader);
        if (bestMediaTypeMatch.isEmpty()) {
            return Response.status(Response.Status.NOT_ACCEPTABLE)
                    .entity("Client must accept one of the following media types: "
                            + StringUtils.join(matchedMetaByType.keySet(), ", "))
                    .build();
        }
        MediaType mediaType = MediaType.valueOf(bestMediaTypeMatch);

        int offset = 0;
        int length = 0;

        if (rawLength == null || rawOffset == null) {
            // use matched binary meta data to get the complete file size
            AnnisBinaryMetaData matchedBinary = matchedMetaByType.get(mediaType.toString());
            if (matchedBinary != null) {
                length = matchedBinary.getLength();
            }
        } else {
            // use the provided information
            offset = Integer.parseInt(rawOffset);
            length = Integer.parseInt(rawLength);
        }

        log.debug("fetching  " + (length / 1024) + "kb (" + offset + "-" + (offset + length) + ") from binary "
                + toplevelCorpusName + "/" + corpusName + (fileName == null ? "" : fileName) + " "
                + mediaType.toString());

        final InputStream stream = annisDao.getBinary(toplevelCorpusName, corpusName, mediaType.toString(),
                fileName, offset, length);

        log.debug("fetch successfully");
        StreamingOutput result = new StreamingOutput() {
            @Override
            public void write(OutputStream output) throws IOException, WebApplicationException {
                try {
                    ByteStreams.copy(stream, output);
                    output.close();
                } finally {
                    stream.close();
                }
            }
        };

        return Response.ok(result, mediaType).build();
    }

    /**
     * Fetches the example queries for a specific corpus.
     *
     * @param rawCorpusNames specifies the corpora the examples are fetched from.
     *
     */
    @GET
    @Path("corpora/example-queries/")
    @Produces(MediaType.APPLICATION_XML)
    public List<ExampleQuery> getExampleQueries(@QueryParam("corpora") String rawCorpusNames)
            throws WebApplicationException {

        Subject user = SecurityUtils.getSubject();

        try {
            String[] corpusNames;
            if (rawCorpusNames != null) {
                corpusNames = rawCorpusNames.split(",");
            } else {
                List<AnnisCorpus> allCorpora = annisDao.listCorpora();
                corpusNames = new String[allCorpora.size()];
                for (int i = 0; i < corpusNames.length; i++) {
                    corpusNames[i] = allCorpora.get(i).getName();
                }
            }

            List<String> allowedCorpora = new ArrayList<>();

            // filter by which corpora the user is allowed to access
            for (String c : corpusNames) {
                if (user.isPermitted("query:*:" + c)) {
                    allowedCorpora.add(c);
                }
            }

            List<Long> corpusIDs = annisDao.mapCorpusNamesToIds(allowedCorpora);
            return annisDao.getExampleQueries(corpusIDs);
        } catch (Exception ex) {
            log.error("Problem accessing example queries", ex);
            throw new WebApplicationException(ex, 500);
        }
    }

    private void logQuery(String queryFunction, String toplevelCorpus, String documentName, long runtime) {
        logQuery(queryFunction, null, asList(toplevelCorpus), runtime, "document: " + documentName);
    }

    private void logQuery(String queryFunction, String annisQuery, List<String> corpusNames, long runtime) {
        logQuery(queryFunction, annisQuery, corpusNames, runtime, null);
    }

    private void logQuery(String queryFunction, String annisQuery, List<String> corpusNames, long runtime,
            String options) {
        StringBuilder sb = new StringBuilder();
        sb.append("function: ");
        sb.append(queryFunction);
        sb.append(", ");
        if (annisQuery != null && !annisQuery.isEmpty()) {
            sb.append("query: ");
            sb.append(annisQuery);
            sb.append(", ");
        }
        sb.append("corpus: ");
        sb.append(corpusNames);
        sb.append(", ");
        sb.append("runtime: ");
        sb.append(runtime);
        sb.append(" ms");
        if (options != null && !options.isEmpty()) {
            sb.append(", ");
            sb.append(options);
        }
        String message = sb.toString();
        queryLog.info(message);
    }

    /**
     * Throw an exception if the parameter is missing.
     *
     * @param value Value which is checked for null.
     * @param name The short name of parameter.
     * @param description A one line description of the meaing of the parameter.
     */
    private void requiredParameter(String value, String name, String description) throws WebApplicationException {
        if (value == null) {
            throw new WebApplicationException(
                    Response.status(Response.Status.BAD_REQUEST).type(MediaType.TEXT_PLAIN)
                            .entity("missing required parameter '" + name + "' (" + description + ")").build());
        }
    }

    /**
     * Get the {@link QueryData} from a query and the corpus names
     *
     * @param query The AQL query.
     * @param rawCorpusNames The name of the toplevel corpus names seperated by
     * ",".
     * @return calculated {@link QueryData} for the given parametes.
     *
     * @throws WebApplicationException Thrown if some corpora are unknown to the
     * system.
     */
    private QueryData queryDataFromParameters(String query, String rawCorpusNames) throws WebApplicationException {
        List<String> corpusNames = splitCorpusNamesFromRaw(rawCorpusNames);

        // we don't care in which order the corpus list was given, we always sort the names
        // this ensures a stable ordering and less surprises when the UI changes it's behavior
        Collections.sort(corpusNames);

        List<Long> corpusIDs = annisDao.mapCorpusNamesToIds(corpusNames);
        if (corpusIDs.size() != corpusNames.size()) {
            throw new WebApplicationException(Response.status(Response.Status.NOT_FOUND).type("text/plain")
                    .entity("one ore more corpora are unknown to the system").build());
        }
        return annisDao.parseAQL(query, corpusIDs);
    }

    /**
     * Splits a list of corpus names into a proper java list.
     *
     * @param rawCorpusNames The corpus names separated by ",".
     * @return
     */
    private List<String> splitCorpusNamesFromRaw(String rawCorpusNames) {
        return Arrays.asList(rawCorpusNames.split(","));
    }

    /**
     * Splits a list of qualified (meta-) annotation names into a proper java
     * list.
     *
     * @param rawCorpusNames The qualified names separated by ",".
     * @return
     */
    private List<MatrixQueryData.QName> splitMatrixKeysFromRaw(String raw) {
        LinkedList<MatrixQueryData.QName> result = new LinkedList<>();

        String[] split = raw.split(",");
        for (String s : split) {
            String[] nameSplit = s.trim().split(":", 2);
            MatrixQueryData.QName qname = new MatrixQueryData.QName();
            if (nameSplit.length == 2) {
                qname.namespace = nameSplit[0].trim();
                qname.name = nameSplit[1].trim();
            } else {
                qname.name = nameSplit[0].trim();
            }
            result.add(qname);
        }

        return result;
    }

    public AnnisDao getAnnisDao() {
        return annisDao;
    }

    public void setAnnisDao(AnnisDao annisDao) {
        this.annisDao = annisDao;
    }

    /**
     * Retrieves the max right context.
     *
     * @return The context is read from the annis-service.properties file and
     * should be >= 0.
     */
    public int getContextRight() {
        return getCorpusConfigIntValues("max-context-right");
    }

    /**
     * Retrieves the max left context.
     *
     * @return The context is read from the annis-service.properties file and
     * should be >= 0.
     */
    public int getContextLeft() {
        return getCorpusConfigIntValues("max-context-left");
    }

    /**
     * Extract corpus configurations values with numeric values.
     *
     * @param context Must be a valid key of the corpus configuration section in
     * the annis-service.properties file.
     * @return Parses the String representation of the value to int and returns
     * it.
     *
     */
    private int getCorpusConfigIntValues(String context) {
        int value = Integer.parseInt(defaultCorpusConfig.getConfig().getProperty(context));

        if (value < 0) {
            throw new IllegalStateException("the value must be > 0");
        }

        return value;
    }

    public WekaHelper getWekaHelper() {
        return wekaHelper;
    }

    public void setWekaHelper(WekaHelper wekaHelper) {
        this.wekaHelper = wekaHelper;
    }

    public int getPort() {
        return port;
    }

    public void setPort(int port) {
        this.port = port;
    }

    /**
     * @return the defaultCorpusConfig
     */
    public CorpusConfig getDefaultCorpusConfig() {
        return defaultCorpusConfig;
    }

    /**
     * @param defaultCorpusConfig the defaultCorpusConfig to set
     */
    public void setDefaultCorpusConfig(CorpusConfig defaultCorpusConfig) {
        this.defaultCorpusConfig = defaultCorpusConfig;
    }

    /**
     * Fetches the raw text from the text.tab file.
     *
     * @param top the name of the top level corpus.
     * @param docname the name of the document.
     *
     * @return Can be empty, if the corpus only contains media data or
     * segmentations.
     */
    @GET
    @Path("rawtext/{top}/{docname}")
    @Produces(MediaType.APPLICATION_XML)
    public RawTextWrapper getRawText(@PathParam("top") String top, @PathParam("docname") String docname) {
        Subject user = SecurityUtils.getSubject();
        user.checkPermission("query:raw_text:" + top);

        RawTextWrapper result = new RawTextWrapper();
        result.setTexts(annisDao.getRawText(top, docname));
        return result;
    }

    @GET
    @Path("corpora/doc_browser_config/{corpus}")
    @Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
    public DocumentBrowserConfig getDocumentBrowserConfig(@PathParam("corpus") String corpus) {
        DocumentBrowserConfig config = annisDao.getDocBrowserConfiguration(corpus);
        if (config == null) {
            config = annisDao.getDefaultDocBrowserConfiguration();
        }

        return (config != null) ? config : null;
    }
}