org.ssc.txtproc.TextProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.ssc.txtproc.TextProcessor.java

Source

/*
   jtxtproc
   Copyright (C) 2014  Edward Sargsyan
   avr_libs is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   jtxtproc is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with avr_libs.  If not, see <http://www.gnu.org/licenses/>.
*/

package org.ssc.txtproc;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.sun.istack.internal.NotNull;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by eduards on 5/26/2015.
 */
public class TextProcessor {

    private String mHost = "localhost";
    private Integer mPort = new Integer(8080);

    public TextProcessor(@NotNull String host, @NotNull Integer port) {
        mHost = host;
        mPort = port;
    }

    private HttpGet buildRequest(@NotNull Options opt) {
        HttpGet connMethod = new HttpGet(URI.create(mHost + ":" + mPort.toString()));

        if (opt.GetOriginalText)
            connMethod.setHeader("include_original_doc", "true");

        if (opt.GetTokens)
            connMethod.setHeader("include_tokens", "true");

        if (!opt.GetStops)
            connMethod.setHeader("exclude_stops", "true");

        if (opt.GetNonStops)
            connMethod.setHeader("include_non_stops", "true");

        if (opt.StemByPorter)
            connMethod.setHeader("run_porter", "true");

        if (opt.StemByLancaster)
            connMethod.setHeader("run_lancaster", "true");

        if (opt.MeasureByLeveshtineDistance)
            connMethod.setHeader("calculate_levenshtine", "true");

        if (opt.MeasureByJaroWinkler)
            connMethod.setHeader("calculate_jaro_winkler", "true");

        if (opt.GetNGrams > 0)
            connMethod.setHeader("include_n_grams", (new Integer(opt.GetNGrams)).toString());

        return connMethod;
    }

    public Result process(@NotNull String text, @NotNull Options opt) throws IOException {
        HttpGet connMethod = buildRequest(opt);

        connMethod.setHeader("doc", text);

        HttpClient client = HttpClientBuilder.create().build();
        HttpResponse response = client.execute(connMethod);

        BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));

        String output = "";
        String line = null;
        while ((line = rd.readLine()) != null) {
            output += line;
        }

        JsonParser parser = new JsonParser();
        JsonElement rootElement = parser.parse(output);
        JsonObject obj = rootElement.getAsJsonObject();
        if (obj == null)
            return null;

        Result res = new Result();

        JsonElement origMbr = obj.get("original");
        if (origMbr != null) {
            res.setOriginalText(origMbr.getAsString());
        }

        res.setTokens(getAsStringArray(obj.get("tokens")));
        res.setTokens(getAsStringArray(obj.get("stops")));
        res.setTokens(getAsStringArray(obj.get("non_stops")));

        JsonObject stemmedMbr = obj.getAsJsonObject("stemmed");
        if (stemmedMbr != null) {
            res.setPorterStems(getAsStringArray(stemmedMbr.get("porter")));
            res.setLancasterStems(getAsStringArray(stemmedMbr.get("lancaster")));
        }

        JsonObject normalizedMbr = obj.getAsJsonObject("normalized");
        if (normalizedMbr != null) {
            JsonElement porterChild = normalizedMbr.get("porter");
            if (porterChild != null)
                res.setNormalizedByPorter(porterChild.getAsString());
            JsonElement lancasterChild = normalizedMbr.get("lancaster");
            if (lancasterChild != null)
                res.setNormalizedByLancaster(lancasterChild.getAsString());
        }

        JsonObject distancesMbr = obj.getAsJsonObject("distances");
        if (distancesMbr != null) {
            res.setLevenshtineDist(getAsDoubleArray(distancesMbr.get("levenshtine")));
            res.setJaroWinkler(getAsDoubleArray(distancesMbr.get("jaro_winkler")));
        }

        JsonArray ngramsMbr = obj.getAsJsonArray("ngrams");
        if (ngramsMbr != null) {
            ArrayList<List<String>> arrayOfNGrams = new ArrayList<List<String>>();
            for (JsonElement singleNGram : ngramsMbr) {
                List<String> words = getAsStringArray(singleNGram);
                arrayOfNGrams.add(words);
            }
            res.setNGrams(arrayOfNGrams);
        }

        return res;
    }

    private List<String> getAsStringArray(JsonElement element) {
        if (element == null)
            return null;
        JsonArray childElems = element.getAsJsonArray();
        if (childElems == null)
            return null;
        ArrayList<String> values = new ArrayList<String>(childElems.size());
        for (JsonElement elem : childElems) {
            values.add(elem.getAsString());
        }

        return values;
    }

    private List<Double> getAsDoubleArray(JsonElement element) {
        if (element == null)
            return null;
        JsonArray childElems = element.getAsJsonArray();
        if (childElems == null)
            return null;
        ArrayList<Double> values = new ArrayList<Double>(childElems.size());
        for (JsonElement elem : childElems) {
            values.add(new Double(elem.getAsDouble()));
        }

        return values;
    }
}