de.tudarmstadt.ukp.csniper.webapp.search.cqp.CqpEngine.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.csniper.webapp.search.cqp.CqpEngine.java

Source

/*******************************************************************************
 * Copyright 2013
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.csniper.webapp.search.cqp;

import static org.apache.commons.io.IOUtils.closeQuietly;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.dao.DataAccessResourceFailureException;

import de.tudarmstadt.ukp.csniper.webapp.search.CorpusService;
import de.tudarmstadt.ukp.csniper.webapp.search.SearchEngine;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;

public class CqpEngine implements SearchEngine, Serializable {
    private static final long serialVersionUID = -4853538198064093163L;

    private static final String REGISTRY = "registry";

    private String name;
    private File cqpExecutable;
    private static String macrosLocation;
    private CorpusService corpusService;

    @Required
    public void setCqpExecutable(File aCqpExecutable) {
        cqpExecutable = aCqpExecutable;
    }

    public File getCqpExecutable() {
        return cqpExecutable;
    }

    public void setMacrosLocation(String aMacrosLocation) {
        macrosLocation = aMacrosLocation;
    }

    public String getMacrosLocation() {
        return macrosLocation;
    }

    @Override
    public void setBeanName(String aName) {
        name = aName;
    }

    @Override
    public String getName() {
        return name;
    }

    @Override
    public void setCorpusService(CorpusService aCorpusService) {
        corpusService = aCorpusService;
    }

    @Override
    public CqpQuery createQuery(String aType, String aCollection, String aQuery) {
        CqpQuery query = null;
        try {
            query = new CqpQuery(this, aType, aCollection);
            query.setContext(1, 1, ContextUnit.SENTENCE);
            query.setMacrosLocation(macrosLocation);
            query.runQuery(aQuery);
            return query;
        } catch (RuntimeException e) {
            if (query != null) {
                IOUtils.closeQuietly(query);
            }
            throw e;
        }
    }

    public File getRegistryPath() {
        return new File(corpusService.getRepositoryPath(), REGISTRY);
    }

    public String getEncoding(String aCollectionId) {
        try {
            List<String> lines = FileUtils.readLines(new File(getRegistryPath(), aCollectionId.toLowerCase()),
                    "UTF-8");
            for (String line : lines) {
                line = line.toLowerCase();
                if (line.startsWith("##:: charset")) {
                    if (line.contains("iso-8859-1") || line.contains("latin1")) {
                        return "ISO-8859-1";
                    }
                    break;
                }
            }
            return "UTF-8";
        } catch (IOException e) {
            throw new DataAccessResourceFailureException("Unable to read registry file", e);
        }
    }

    public static List<CqpMacro> getMacros() {
        List<CqpMacro> macros = new ArrayList<CqpMacro>();

        boolean open = false;
        CqpMacro currentMacro = null;
        String lastComment = "";

        InputStream is = null;
        try {
            is = ResourceUtils.resolveLocation(macrosLocation, null, null).openStream();
            for (LineIterator li = IOUtils.lineIterator(is, "UTF-8"); li.hasNext();) {

                String line = li.next();
                String n = line.toLowerCase().trim();

                // comment
                if (n.startsWith("#") && !open) {
                    lastComment = line;
                    continue;
                }

                if (n.startsWith("macro") && !open) {
                    currentMacro = new CqpMacro();
                    Pattern p = Pattern.compile("MACRO\\s+(\\w+)\\s*\\((\\d+)\\)");
                    Matcher m = p.matcher(line.trim());
                    if (m.matches() && m.groupCount() >= 2) {
                        currentMacro.setName(m.group(1));
                        currentMacro.setParamCount(Integer.parseInt(m.group(2)));
                        currentMacro.setComment(lastComment);
                        currentMacro.setBody(new ArrayList<String>());
                    } else {
                        // throw new
                    }
                    continue;
                }

                if (n.startsWith("(") && !open) {
                    open = true;
                    continue;
                }

                if (n.startsWith(")") && open) {
                    if (n.startsWith(");") || (li.hasNext() && li.next().trim().startsWith(";"))) {
                        open = false;
                        macros.add(currentMacro);
                        continue;
                    }
                }

                if (open) {
                    currentMacro.getBody().add(line.trim());
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            closeQuietly(is);
        }

        return macros;
    }
}