Java tutorial
/* * Copyright (C) 2016 Behrang QasemiZadeh <zadeh at phil.hhu.de> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package ie.pars.nlp.sketchengine.interactions; import ie.pars.noske.json.parsers.FreqMethodJsonParser; import ie.pars.noske.parse.obj.FrequencyLine; import ie.pars.noske.parse.obj.WLTGD; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.json.JSONObject; /** * The class constructor are the only methods available to call (apart from * start etc.) I think of three 1) get the result and dump into a file 2) get * the result, parse and dump into a file 3) get the result and store in kind of * object for passing to the next step * * @author Behrang QasemiZadeh @ One cold winter day */ public class FreqSKEInteraction extends SKEInteractionsBase { private final int maxPageSize; private final SkeJsonResultParser parser; private final String fcritQuery; private final int minFreqFilter; private final int async = 0; private final int exampleNumber; private final List returnResutlWhatEver; private final String runCGIPath; private final boolean writeParsed; private final boolean appendWritingMode; private final Writer writer; private final static int DEFAULT_MAX_MAX_PAGE_SIZE = 100000; private File fileOutput; /** * * @param writer * @param outputFile * @param baseURL * @param runCGIPath * @param corpus * @param query * @param fcritQuery * @param minFreqFilter * @param exampleNumner * @param writeParsed * @param appendWritingMode */ public FreqSKEInteraction(Writer writer, String baseURL, String runCGIPath, String corpus, String query, String fcritQuery, int minFreqFilter, int exampleNumner, boolean writeParsed, boolean appendWritingMode) throws Exception { super(baseURL, corpus, query); this.writer = writer; this.runCGIPath = runCGIPath; this.maxPageSize = DEFAULT_MAX_MAX_PAGE_SIZE; this.parser = null; this.fcritQuery = fcritQuery; this.exampleNumber = exampleNumner; this.minFreqFilter = minFreqFilter; returnResutlWhatEver = null; this.writeParsed = writeParsed; this.appendWritingMode = appendWritingMode; if (appendWritingMode && !writeParsed) { throw new Exception("You can only append parsed data to files ... no json! "); } } /** * * @param file * @param maxPageSize * @param parser * @param outputFile * @param rootURL * @param runCGIPath * @param corpus * @param query * @param fcritQuery * @param minFreqFilter * @param exampleNumner * @param writeParsed * @param appendWritingMode */ public FreqSKEInteraction( //int maxPageSize, //SkeJsonResultParser parser, File file, String rootURL, String runCGIPath, String corpus, String query, String fcritQuery, int minFreqFilter, int exampleNumner, boolean writeParsed, boolean appendWritingMode) throws Exception { super(rootURL, corpus, query); this.writer = null; this.fileOutput = file; this.runCGIPath = runCGIPath; this.maxPageSize = DEFAULT_MAX_MAX_PAGE_SIZE; this.fcritQuery = fcritQuery; this.parser = null; this.exampleNumber = exampleNumner; this.minFreqFilter = minFreqFilter; returnResutlWhatEver = null; this.writeParsed = writeParsed; this.appendWritingMode = appendWritingMode; if (appendWritingMode && !writeParsed) { throw new Exception("to implement ... the proble was" + " that I have wanted to write in the same " + "file in a multi-threaded setting" + "this constructor is going to be used for the time" + " to implement for the time that this class is do its IO" + " within the thread in its own dedicated IO channel" + " ... no json! "); } } /** * * @param maxPageSize * @param parser * @param rootURL * @param corpus * @param query * @param fcritQuery * @param minFreqFilter * @param exampleNumner */ public FreqSKEInteraction(int maxPageSize, SkeJsonResultParser parser, String rootURL, String corpus, String query, String fcritQuery, int minFreqFilter, int exampleNumner) { super(rootURL, corpus, query); this.maxPageSize = maxPageSize; this.fcritQuery = fcritQuery; this.parser = parser; this.exampleNumber = exampleNumner; this.minFreqFilter = minFreqFilter; returnResutlWhatEver = new ArrayList(); this.writeParsed = false; throw new UnsupportedOperationException("to be implemented in the future"); // if(appendWritingMode&&!writeParsed){ // throw new Exception("You can only append parsed data to files ... no json! "); // // } } private String encodeFreqQuery(int fromPage) throws UnsupportedEncodingException { //private String encodeFreqQuery(int fromPage) throws UnsupportedEncodingException { //System.out.println(fcritQuery); String secondAndAfter = runCGIPath + "/freqs?" + "q=q" + URLEncoder.encode(this.query, "UTF-8") + ";" + "corpname=" + URLEncoder.encode(this.corpus, "UTF-8") + ";" //+ "fcrit=" + URLEncoder.encode(this.fcritQuery, "UTF-8") +";" + "fcrit=" + URLEncoder.encode(this.fcritQuery, "UTF-8") + ";" + "flimit=" + this.minFreqFilter + ";" + "fpage=" + fromPage + ";" + "fpage=" + (fromPage + 1) + ";" + "examples=" + this.exampleNumber + ";" + "&pagesize=" + maxPageSize //+ "&fromp=" + fromPage + "&async=" + async + "&format=json"; // System.out.println(URLDecoder.decode(secondAndAfter, "UTF-8")); return secondAndAfter; } /** * Get all the results and dump it into files this method can be changed so * that it resembles a stream on demand * * @param output * @param corpusName * @param cqlQuery * @param contextSizeLeft * @throws UnsupportedEncodingException * @throws IOExceptionsket * @throws Exception */ private void getFrequencyContext() throws UnsupportedEncodingException, IOException, Exception { HttpClient sessionID = super.getSessionID(); if (!writeParsed) { writer.append("{\"results\": [\n"); } int pageNumer = 0; while (true) { JSONObject jsonObjP = super.getHTTP(sessionID, encodeFreqQuery(pageNumer)); if (!writeParsed) { writer.append(jsonObjP.toString(1)); } else { FreqMethodJsonParser fjpm = new FreqMethodJsonParser(jsonObjP.toString()); FrequencyLine fl; synchronized (writer) { while ((fl = fjpm.getNext()) != null) { // just to make sure that the output won't be messed up // I may need to change this to a better solution // at least with the current hardware the writer sis not a bottleneck writer.append(fl.toStringLine()).append("\n"); } // writer.flush(); } } boolean hasError = jsonObjP.has("error"); if (hasError) { String message = (String) jsonObjP.get("error"); if ("Empty list".equals(message)) { System.out.println("No result for current query: " + this.query); // retrun null etc break; } else { System.out.println("* NOT SEEN * " + jsonObjP.toString(1)); throw new Exception("not seen " + jsonObjP.toString(1)); } } else { int isLastPage = 0; // int finished = (int) jsonObjP.get("finished"); if (jsonObjP.has("lastpage")) { isLastPage = (int) jsonObjP.get("lastpage"); //System.out.println("** IS Last TO GO " + isLastPage); if (isLastPage == 0) { if (!writeParsed) { writer.append(","); } pageNumer++; } else { //System.out.println("Going to break because last page is not 0"); break; } } } } if (!writeParsed) { writer.append("]" + "}"); // to the end the json file} } writer.flush(); writer.close(); } /** * Parsing method * * @param parser * @param corpusName * @param cqlQuery * @param contextSizeLeft * @param contextSizeRight * @return * @throws UnsupportedEncodingException * @throws IOException * @throws Exception */ private List<WLTGD> getParseFreqList() throws UnsupportedEncodingException, IOException, Exception { throw new Exception("to implement"); } @Override public void run() { System.out.println("\tQ: " + this.query + " for " + this.fcritQuery); // write everything into one file using one writer if (this.writer != null && this.parser == null) { try { this.getFrequencyContext(); } catch (Exception ex) { System.err.println(ex); } } // for the case that one context is written to one file else if (this.writer == null && this.fileOutput != null) { try { getFrequencyContextSingle(); } catch (Exception ex) { System.err.println(ex); Logger.getLogger(FreqSKEInteraction.class.getName()).log(Level.SEVERE, null, ex); } } else { try { // this.getFrequencyContext(); System.err.println("NOT IMPLEMENT YET"); } catch (Exception ex) { System.err.println(ex); //Logger.getLogger(FreqSKEInteraction.class.getName()).log(Level.SEVERE, null, ex); } } } /** * Get all the results and dump it into files this method can be changed so * Here the difference is that all the res are written into one * * * @throws UnsupportedEncodingException * @throws IOExceptionsket * @throws Exception */ public void getFrequencyContextSingle() throws UnsupportedEncodingException, IOException, Exception { HttpClient sessionID = super.getSessionID(); BufferedWriter writer;// this.writer; //append to the end of file OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(this.fileOutput), StandardCharsets.UTF_8); writer = new BufferedWriter(outputStreamWriter); if (!writeParsed) { writer.append("{\"results\": [\n"); } int pageNumer = 0; while (true) { JSONObject jsonObjP = super.getHTTP(sessionID, encodeFreqQuery(pageNumer)); if (!writeParsed) { writer.append(jsonObjP.toString(1)); } else { FreqMethodJsonParser fjpm = new FreqMethodJsonParser(jsonObjP.toString()); FrequencyLine fl; while ((fl = fjpm.getNext()) != null) { // just to make sure that the output won't be messed up // I may need to change this to a better solution // at least with the current hardware the writer sis not a bottleneck writer.append(fl.toStringLine()).append("\n"); } // writer.flush(); } boolean hasError = jsonObjP.has("error"); if (hasError) { String message = (String) jsonObjP.get("error"); if ("Empty list".equals(message)) { System.out.println("No result for current query: " + this.query); // retrun null etc break; } else { System.out.println("* NOT SEEN * " + jsonObjP.toString(1)); throw new Exception("not seen " + jsonObjP.toString(1)); } } else { int isLastPage = 0; // int finished = (int) jsonObjP.get("finished"); if (jsonObjP.has("lastpage")) { isLastPage = (int) jsonObjP.get("lastpage"); //System.out.println("** IS Last TO GO " + isLastPage); if (isLastPage == 0) { if (!writeParsed) { writer.append(","); } pageNumer++; } else { //System.out.println("Going to break because last page is not 0"); break; } } } } if (!writeParsed) { writer.append("]" + "}"); // to the end the json file} } writer.flush(); writer.close(); } }