ie.pars.nlp.sketchengine.interactions.ViewSKEInteractions.java Source code

Java tutorial

Introduction

Here is the source code for ie.pars.nlp.sketchengine.interactions.ViewSKEInteractions.java

Source

/* 
 * Copyright (C) 2016 Behrang QasemiZadeh <zadeh at phil.hhu.de>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package ie.pars.nlp.sketchengine.interactions;

import ie.pars.noske.json.parsers.ViewMethodJsonParser;
import ie.pars.noske.parse.obj.ConcordanceLine;
import ie.pars.noske.parse.obj.WLTGD;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpClient;
import org.json.JSONObject;

/**
 *
 * @author Behrang QasemiZadeh @ One cold winter day
 */
public class ViewSKEInteractions extends SKEInteractionsBase {

    private final String leftContextSize;
    private final String rightContextSize;
    private final String attribKwic;
    private final String attribCtx;
    private final int maxPageSize;
    private final String runCGIPath;
    private final File outputFile;
    private final SkeJsonResultParser parser;
    private int async = 0; // def value 0;
    private final boolean writeParsedData;

    //    public ViewSKEInteractions(
    //            int conetxtLeft,
    //            int contexRight,
    //            String attribKwic,
    //            String attribCtx,
    //            int maxPageSize,
    //            File outputFile,
    //            String rootURL,
    //            String runCGIPath,
    //            String corpus,
    //            String query,
    //            boolean writeParsedData
    //    ) {
    //        super(outputFile, rootURL, corpus, query);
    //        this.runCGIPath = runCGIPath;
    //        this.parser = null;
    //        this.conetxtLeft = conetxtLeft;
    //        this.contexRight = contexRight;
    //        this.attribKwic = attribKwic;
    //        this.attribCtx = attribCtx;
    //        this.maxPageSize = maxPageSize;
    //        this.writeParsedData = writeParsedData;
    //    }
    /**
     * Set async value 0 or 1
     *
     * @param async
     */
    public void setAsync(int async) throws Exception {
        if (async == 0 || async == 1) {
            this.async = async;
        } else {
            throw new Exception("Unexpected value for async, 0 or 1 is allows");
        }

    }

    public ViewSKEInteractions(File file, String baseurl, String runCGIPath, String corpus, String query,
            String leftContextSize, String rightContextSize, boolean writeParsedData) {
        super(baseurl, corpus, query);
        this.runCGIPath = runCGIPath;
        this.parser = null;
        this.leftContextSize = leftContextSize;
        this.rightContextSize = rightContextSize;
        // word,word is added to ease parsing!!!
        this.attribKwic = "word,word,lemma,tag,gword,glemma,gtag,depr,dist";
        this.attribCtx = "word,word,lemma,tag,gword,glemma,gtag,depr,dist";
        this.maxPageSize = 10000;
        this.writeParsedData = writeParsedData;
        this.outputFile = file;
    }

    public ViewSKEInteractions(SkeJsonResultParser parser, String baseurl, String runCGIPath, File file,
            String corpus, String query, String leftContextSize, String rightContextSize, boolean writeParsedData) {
        super(baseurl, corpus, query);
        this.runCGIPath = runCGIPath;
        this.parser = parser;
        this.leftContextSize = leftContextSize;
        this.rightContextSize = rightContextSize;
        this.attribKwic = "word, word,lemma,tag,gword,glemma,gtag,depr,dist";
        this.attribCtx = "word, word,lemma,tag,gword,glemma,gtag,depr,dist";
        this.maxPageSize = 10000;
        this.writeParsedData = writeParsedData;
        this.outputFile = file;
    }

    /**
     * Make query understandable for the server. Convert it according to the
     * give guidelines
     *
     * @param corpusName
     * @param cqlQuery
     * @param contextSize
     * @param fromPage
     * @param pageSize
     * @return
     * @throws UnsupportedEncodingException
     */
    private String encodeQuery(int fromPage) throws UnsupportedEncodingException {

        String secondAndAfter = runCGIPath + "/view?" + "corpname=" + URLEncoder.encode(this.corpus, "UTF-8")
                + "&q=q" + URLEncoder.encode(this.query, "UTF-8") + ";" + "&attrs="
                + URLEncoder.encode(attribKwic, "UTF-8") + "&ctxattrs=" + URLEncoder.encode(attribCtx, "UTF-8")
                + "&kwicleftctx=" + URLEncoder.encode(this.leftContextSize, "UTF-8") + "&kwicrightctx="
                + URLEncoder.encode(this.rightContextSize, "UTF-8") + "&pagesize=" + maxPageSize + "&fromp="
                + fromPage + "&async=" + async + "&format=json";
        return secondAndAfter;

    }

    /**
     * Get all the results and dump it into files this method can be changed so
     * that it resembles a stream on demand
     *
     * @param output
     * @param corpusName
     * @param cqlQuery
     * @param contextSizeLeft
     * @throws UnsupportedEncodingException
     * @throws IOExceptionsket
     * @throws Exception
     */
    private void getStroConcordanceFile() throws UnsupportedEncodingException, IOException, Exception {
        HttpClient sessionID = super.getSessionID();
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(this.outputFile),
                StandardCharsets.UTF_8);
        PrintWriter printer = new PrintWriter(outputStreamWriter);
        if (!writeParsedData) {
            printer.println("{\"results\": [");
        }
        int pageNumer = 0;
        JSONObject jsonObjP = null;
        boolean loopCondition = true;
        while (loopCondition) {
            jsonObjP = super.getHTTP(sessionID, encodeQuery(++pageNumer));
            if (this.writeParsedData) {
                ViewMethodJsonParser vmParser = new ViewMethodJsonParser(jsonObjP.toString());
                ConcordanceLine cl;
                while ((cl = vmParser.getNext()) != null) {
                    printer.println(cl.toStringLine());
                }

            } else {
                printer.println(jsonObjP.toString(1));
            }
            loopCondition = loopCondition(jsonObjP, pageNumer);
            if (loopCondition) {
                if (!writeParsedData) {
                    printer.println(" ,");
                }
            }

        }
        ;

        if (!writeParsedData) {
            printer.println("]" + "}"); // to the end the json file}
        }
        //printer.flush();
        //outputStreamWriter.flush();
        printer.close();
        outputStreamWriter.close();

    }

    /**
     * Get all the results and dump it into files this method can be changed
     * so that it resembles a stream on demand
     *
     * @param output
     * @param corpusName
     * @param cqlQuery
     * @param contextSizeLeft
     * @throws UnsupportedEncodingException
     * @throws IOExceptionsket
     * @throws Exception
     */

    private boolean loopCondition(JSONObject jsonObjP, int pageNumer) throws Exception {
        boolean has = jsonObjP.has("error");
        if (has) {
            String message = (String) jsonObjP.get("error");
            if ("Empty result".equals(message)) {
                System.out.println("No result for current query ..."); // retrun null etc
                return false;
            } else {
                throw new Exception("not seen");
            }
        } else {
            // that is there is no error
            // fetch the rest page by page
            int concsize = (int) jsonObjP.get("concsize");
            int pageToGo = 0;
            int finished = (int) jsonObjP.get("finished");
            if (jsonObjP.has("numofpages")) {
                pageToGo = (int) jsonObjP.get("numofpages");
            }
            if (pageToGo < pageNumer) {
                if (finished == 1) {
                    System.out.println(
                            "* Done fetching concordances of total size " + concsize + " for query " + this.query);
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * Parsing method
     *
     * @param parser
     * @param corpusName
     * @param cqlQuery
     * @param contextSizeLeft
     * @param contextSizeRight
     * @return
     * @throws UnsupportedEncodingException
     * @throws IOException
     * @throws Exception
     */
    private List<WLTGD> getParseConcordance() throws UnsupportedEncodingException, IOException, Exception {
        throw new Exception("to implement");

    }

    @Override
    public void run() {
        System.out.println("Q: " + this.query);
        if (this.parser != null) {
            try {
                this.getParseConcordance();
            } catch (Exception ex) {
                Logger.getLogger(ViewSKEInteractions.class.getName()).log(Level.SEVERE, null, ex);
            }

        } else {
            try {
                this.getStroConcordanceFile();
            } catch (Exception ex) {
                Logger.getLogger(ViewSKEInteractions.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

}