com.isi.master.meaningcloudAPI.topicsextraction.TopicsClient.java Source code

Java tutorial

Introduction

Here is the source code for com.isi.master.meaningcloudAPI.topicsextraction.TopicsClient.java

Source

package com.isi.master.meaningcloudAPI.topicsextraction;
/**
 * Topics Extraction 2.0 starting client for Java.
 *
 * In order to run this example, the license key must be included in the key variable.
 * If you don't know your key, check your personal area at MeaningCloud (https://www.meaningcloud.com/developer/account/licenses)
 *
 * Once you have the key, edit the parameters and call "javac *.java; java TopicsClient"
 *
 * You can find more information at http://www.meaningcloud.com/developer/topics-extraction/doc/2.0
 *
 * @author     MeaningCloud
 * @contact    http://www.meaningcloud.com 
 * @copyright  Copyright (c) 2015, MeaningCloud LLC All rights reserved.
 */

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
//import javax.xml.parsers.DocumentBuilder;
//import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
//import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import com.isi.master.funciones.Funciones;
import twitter4j.JSONArray;
import twitter4j.JSONException;
import twitter4j.JSONObject;
//import java.io.ByteArrayInputStream;
import org.w3c.dom.*;

/**
 * This class implements a starting client for Topics Extraction 
 */
public class TopicsClient {

    /**
     * print the general information of a topic
     * @param response main element of the response
     * @param nameNode name of the node to process
     * @return the information of the nodes
     */
    public static String printInfoGeneral(Element response, String nameNode) {
        String output = "";
        NodeList topic_list = response.getElementsByTagName(nameNode + "_list");
        if (topic_list.getLength() > 0) {
            Node topics = topic_list.item(0);
            NodeList topic = topics.getChildNodes();
            for (int i = 0; i < topic.getLength(); i++) {
                Node topic_item = topic.item(i);
                NodeList child_topic = topic_item.getChildNodes();
                String form = "";
                String type = "";
                for (int j = 0; j < child_topic.getLength(); j++) {
                    Node info_topic = child_topic.item(j);
                    if (info_topic.getNodeName().equals("form"))
                        form = info_topic.getTextContent();
                    else if (info_topic.getNodeName().equals("type"))
                        type = info_topic.getTextContent();
                }
                output += " - " + form;
                if (!type.isEmpty())
                    output += " (" + type + ")";
                output += "\n";
            }
        }
        if (output.isEmpty())
            output = "Not found\n";
        return output;
    } //printInfoGeneral

    /**
     * print the information of the entities and concepts
     * @param response main element of the response
     * @param nameNode name of the node to process (entity or concept)
     * @return the information of the nodes
     */
    public static String printInfoEntityConcept(Element response, String nameNode) {
        String output = "";
        NodeList topic_list = response.getElementsByTagName(nameNode + "_list");
        if (topic_list.getLength() > 0) {
            Node topics = topic_list.item(0);
            NodeList topic = topics.getChildNodes();
            for (int i = 0; i < topic.getLength(); i++) {
                Node topic_item = topic.item(i);
                NodeList child_topic = topic_item.getChildNodes();
                String form = "";
                String type = "";
                for (int j = 0; j < child_topic.getLength(); j++) {
                    Node info_topic = child_topic.item(j);
                    String name = info_topic.getNodeName();
                    if (name.equals("form"))
                        form = info_topic.getTextContent();
                    else if (name.equals("sementity")) {
                        NodeList child_sementity = info_topic.getChildNodes();
                        for (int l = 0; l < child_sementity.getLength(); l++) {
                            Node info_sementity = child_sementity.item(l);
                            if (info_sementity.getNodeName().equals("type")) {
                                type += info_sementity.getTextContent() + "|";
                            }
                        }
                    }
                }
                output += " - " + form;
                if (!type.isEmpty())
                    output += " (" + type.substring(0, type.length() - 1) + ")";
                output += "\n";
            }
        }
        if (output.isEmpty())
            output = "Not found\n";
        return output;
    } //printInfoEntityConcept

    /**
     * print the information of the quotes
     * @param response main element of the response
     * @return the information of the nodes
     */
    public static String printInfoQuotes(Element response) {
        String output = "";
        NodeList quotation_list = response.getElementsByTagName("quotation_list");
        if (quotation_list.getLength() > 0) {
            Node quotations = quotation_list.item(0);
            NodeList quotation = quotations.getChildNodes();
            for (int i = 0; i < quotation.getLength(); i++) {
                Node quotation_item = quotation.item(i);
                NodeList child_quotation = quotation_item.getChildNodes();
                String form = "";
                String who = "";
                String who_lemma = "";
                String verb = "";
                String verb_lemma = "";
                for (int j = 0; j < child_quotation.getLength(); j++) {
                    Node info_quotation = child_quotation.item(j);
                    String name = info_quotation.getNodeName();
                    if (name.equals("form"))
                        form = info_quotation.getTextContent();
                    else if (name.equals("who")) {
                        NodeList childWho = info_quotation.getChildNodes();
                        for (int k = 0; k < childWho.getLength(); k++) {
                            Node infoWho = childWho.item(k);
                            String nameWho = infoWho.getNodeName();
                            if (nameWho.equals("form"))
                                who = infoWho.getTextContent();
                            else if (nameWho.equals("lemma"))
                                who_lemma = infoWho.getTextContent();
                        }
                    } else if (name.equals("verb")) {
                        NodeList childVerb = info_quotation.getChildNodes();
                        for (int k = 0; k < childVerb.getLength(); k++) {
                            Node infoVerb = childVerb.item(k);
                            String nameVerb = infoVerb.getNodeName();
                            if (nameVerb.equals("form"))
                                verb = infoVerb.getTextContent();
                            else if (nameVerb.equals("lemma"))
                                verb_lemma = infoVerb.getTextContent();
                        }
                    }
                }
                output += " - " + form;
                output += "\n";
                if (!who.isEmpty())
                    output += "   + who: " + who + " (" + who_lemma + ")\n";
                if (!verb.isEmpty())
                    output += "   + verb: " + verb + " (" + verb_lemma + ")\n";
            }
        }
        if (output.isEmpty())
            output = "Not found\n";
        return output;
    } //printInfoQuotes

    /**
     * print the information of the relations
     * @param response main element of the response
     * @return the information of the nodes
     */
    public static String printInfoRelation(Element response) {
        String output = "";
        NodeList relation_list = response.getElementsByTagName("relation_list");
        if (relation_list.getLength() > 0) {
            Node relations = relation_list.item(0);
            NodeList relation = relations.getChildNodes();
            for (int i = 0; i < relation.getLength(); i++) {
                Node relation_item = relation.item(i);
                NodeList child_relation = relation_item.getChildNodes();
                String form = "";
                String subject = "";
                String subject_lemmas = "";
                String verb = "";
                String verb_lemmas = "";
                String complement_info = "";
                for (int j = 0; j < child_relation.getLength(); j++) {
                    Node info_relation = child_relation.item(j);
                    String name = info_relation.getNodeName();
                    if (name.equals("form"))
                        form = info_relation.getTextContent();
                    else if (name.equals("subject")) {
                        NodeList child_subject = info_relation.getChildNodes();
                        for (int k = 0; k < child_subject.getLength(); k++) {
                            Node info_subject = child_subject.item(k);
                            if (info_subject.getNodeName() == "form") {
                                subject = info_subject.getTextContent();
                            } else if (info_subject.getNodeName() == "lemma_list") {
                                NodeList child_lemma_subject = info_subject.getChildNodes();
                                for (int m = 0; m < child_lemma_subject.getLength(); m++) {
                                    Node info_lemma_subject = child_lemma_subject.item(m);
                                    if (info_lemma_subject.getNodeName() == "lemma")
                                        subject_lemmas = info_lemma_subject.getTextContent() + "|";
                                }
                            }
                        }
                    } else if (name.equals("verb")) {
                        NodeList child_verb = info_relation.getChildNodes();
                        for (int k = 0; k < child_verb.getLength(); k++) {
                            Node info_verb = child_verb.item(k);
                            if (info_verb.getNodeName() == "form") {
                                verb = info_verb.getTextContent();
                            } else if (info_verb.getNodeName() == "lemma_list") {
                                NodeList child_lemma_verb = info_verb.getChildNodes();
                                for (int m = 0; m < child_lemma_verb.getLength(); m++) {
                                    Node info_lemma_verb = child_lemma_verb.item(m);
                                    if (info_lemma_verb.getNodeName() == "lemma")
                                        verb_lemmas = info_lemma_verb.getTextContent() + "|";
                                }
                            }
                        }
                    } else if (name.equals("complement_list")) {
                        NodeList child_complement = info_relation.getChildNodes();
                        for (int k = 0; k < child_complement.getLength(); k++) {
                            Node complement_item = child_complement.item(k);
                            if (complement_item.getNodeName() == "complement") {
                                NodeList complement = complement_item.getChildNodes();
                                for (int m = 0; m < complement.getLength(); m++) {
                                    Node info_complement = complement.item(m);
                                    if (info_complement.getNodeName() == "form")
                                        complement_info += "    * " + info_complement.getTextContent();
                                    if (info_complement.getNodeName() == "type")
                                        complement_info += " (" + info_complement.getTextContent() + ")";
                                }
                                complement_info += "\n";
                            }
                        }
                    }
                }
                output += " - " + form;
                output += "\n";
                if (!subject.isEmpty()) {
                    output += "   + subject: " + subject;
                    if (!subject_lemmas.isEmpty())
                        output += " (" + subject_lemmas.substring(0, subject_lemmas.length() - 1) + ")";
                    output += "\n";
                }
                if (!verb.isEmpty()) {
                    output += "   + verb: " + verb;
                    if (!verb_lemmas.isEmpty())
                        output += " (" + verb_lemmas.substring(0, verb_lemmas.length() - 1) + ")";
                    output += "\n";
                }
                if (!complement_info.isEmpty())
                    output += "   + complements: \n" + complement_info + "\n";
            }
        }
        if (output.isEmpty())
            output = "Not found\n";
        return output;
    } //printInfoRelation

    /**
     * 
     * @param contenido
     * @param body
     * @return
     */
    public static List<String> recibirTweet(String contenido, boolean body) {
        String api = "http://api.meaningcloud.com/topics-2.0";
        String key = "c4b916eee1c44d9d6cd670b19ce6470e";
        String txt = contenido;
        String lang = "es"; // es/en/fr/it/pt/ca
        List<String> provincia = new ArrayList<String>();
        try {
            Post post = new Post(api);
            post.addParameter("key", key);
            post.addParameter("txt", txt);
            post.addParameter("lang", lang);
            if (body) {
                post.addParameter("tt", "ec");
            } else {
                post.addParameter("tt", "e");
            }
            post.addParameter("uw", "y");
            post.addParameter("cont", "City");
            post.addParameter("of", "json");

            JSONObject jsonObj = null;
            try {
                jsonObj = new JSONObject(post.getResponse());
                boolean seguir = false;
                if (body) {
                    seguir = conceptTopics(jsonObj);
                    if (!seguir) {//quitamos '#' de los hashtag
                        Post post2 = new Post(api);
                        post2.addParameter("key", key);
                        post2.addParameter("txt", txt.replaceAll("#", ""));
                        post2.addParameter("lang", lang);
                        post2.addParameter("tt", "ec");
                        post2.addParameter("uw", "y");
                        post2.addParameter("cont", "City");
                        post2.addParameter("of", "json");
                        JSONObject jsonObj2 = new JSONObject(post.getResponse());
                        seguir = conceptTopics(jsonObj2);
                    }
                }
                if (seguir || !body) {//la parte del tweet analizada es el cuerpo y tiene contenido relacionado con contaminacion o no es el cuerpo y se quiere buscar localizacion
                    provincia = entidadTopics(jsonObj);
                    if (provincia.size() == 0) {
                        if (body) {
                            provincia = analisisRegex(txt, true);
                            if (provincia.size() == 0) {
                                Post post2 = new Post(api);
                                post2.addParameter("key", key);
                                post2.addParameter("txt", txt.replaceAll("#", ""));
                                post2.addParameter("lang", lang);
                                post2.addParameter("tt", "ec");
                                post2.addParameter("uw", "y");
                                post2.addParameter("cont", "City");
                                post2.addParameter("of", "json");
                                JSONObject jsonObj2 = new JSONObject(post.getResponse());
                                provincia = entidadTopics(jsonObj2);
                            }
                        } else {
                            provincia = analisisRegex(txt, false);
                        }
                    }
                } else {//la parte del tweet analizada es el cuerpo y no tiene ningun contenido relacionado con contaminacion
                    return null;
                }
            } catch (JSONException e) {
                // TODO Auto-generated catch block
                //            System.err.println("No es posible analizar '"+contenido+"' en base a entidades\n");
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        } catch (MalformedURLException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        } catch (UnsupportedEncodingException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }
        return provincia;
    }

    /**
     * 
     * @param provincia
     * @param txt
     * @param body
     */
    private static List<String> analisisRegex(String txt, boolean body) {
        String sCurrentLine;//string que almacena los paises y zonas de habla hispana de los que puede haber tweets
        BufferedReader br = null;
        List<String> provincia = new ArrayList<String>();
        try {
            br = new BufferedReader(new FileReader("./documentos/Personas_/zonas.txt"));//cogemos los paises y zonas de habla hispana
            while ((sCurrentLine = br.readLine()) != null) {//borramos tweets de fuera de Espana
                if (body) {
                    if (Funciones.quitarTildes(txt).matches(".*#" + sCurrentLine.toUpperCase().split("-")[0])) {
                        provincia.add(sCurrentLine.split("-")[1]);
                    }
                } else {
                    if (Funciones.quitarTildes(txt).matches(sCurrentLine.toUpperCase().split("-")[0])) {
                        provincia.add(sCurrentLine.split("-")[1]);
                    }
                }
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return provincia;
    }

    /**
     * 
     * @param jsonObj
     * @return
     */
    private static boolean conceptTopics(JSONObject jsonObj) {
        try {
            JSONArray array = jsonObj.getJSONArray("concept_list");

            for (int i = 0; i < array.length(); i++) {
                JSONObject doc = (JSONObject) array.getJSONObject(i);
                JSONObject doc1 = (JSONObject) doc.get("sementity");
                if (doc1.getString("type").contains("Top>")) {
                    return true;
                }
            }
        } catch (JSONException e) {
            // TODO Auto-generated catch block
            //         e.printStackTrace();
        }
        return false;
    }

    private static List<String> entidadTopics(JSONObject jsonObj) {
        List<String> provincia = new ArrayList<String>();
        try {
            JSONArray array = jsonObj.getJSONArray("entity_list");
            for (int i = 0; i < array.length(); i++) {
                try {
                    JSONObject doc = (JSONObject) array.getJSONObject(i);
                    JSONObject doc1 = (JSONObject) doc.get("sementity");
                    if (doc1.getString("id").equals("ODENTITY_CITY")
                            && doc1.getString("type").equals("Top>Location>GeoPoliticalEntity>City")) {
                        JSONArray doc2 = (JSONArray) doc.get("semgeo_list");
                        JSONObject doc21 = (JSONObject) doc2.get(0);
                        if (((JSONObject) doc21.get("country")).getString("form").equals("Espaa")) {
                            try {
                                provincia.add(((JSONObject) doc21.get("adm2")).getString("form"));
                            } catch (JSONException e) {
                                provincia.add(((JSONObject) doc21.get("adm1")).getString("form"));
                            }
                        } else {
                            //                     System.err.println(((JSONObject)array.get(i)).get("form")+" en el texto se refiere a un lugar de "+((JSONObject)doc21.get("country")).getString("form"));
                        }
                    } else if (doc1.getString("id").equals("ODENTITY_ADM2")
                            && doc1.getString("type").equals("Top>Location>GeoPoliticalEntity>Adm2")) {
                        JSONArray doc2 = (JSONArray) doc.get("semgeo_list");
                        JSONObject doc21 = (JSONObject) doc2.get(0);
                        if (((JSONObject) doc21.get("country")).getString("form").equals("Espaa")) {//insertamos la entidad
                            provincia.add(doc.getString("form"));
                        } else {
                            //                     System.err.println(((JSONObject)array.get(i)).get("form")+" en el texto se refiere a un lugar de "+((JSONObject)doc21.get("country")).getString("form"));
                        }
                    } else {
                        //                  System.err.println(((JSONObject)array.get(i)).get("form")+" no es una ciudad\n");
                    }
                } catch (JSONException e) {
                    //               System.err.println(((JSONObject)array.get(i)).get("form")+" no es una ciudad\n");
                }
            }
        } catch (JSONException e1) {
            // TODO Auto-generated catch block
            //         e1.printStackTrace();
        }
        return provincia;
    }

    public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
        // We define the variables needed to call the API
        String api = "http://api.meaningcloud.com/topics-2.0";
        String key = "67d2d31e37c2ba1d032188b1233f19bf";
        String txt = "Los vehculos AUDI con tres ocupantes podrn circular por Real Madrid, Alcobendas y Miranda de Ebro los das de contaminacin porque Carmena no les deja debido al aire sucio";
        //      String txt = "Madrid est con altos niveles de contaminacin, como el NO2";
        //      String txt = "La ciudad Madrid est con altos niveles de contaminacin, como el NO2";
        //      String txt = "Avils";
        String lang = "es"; // es/en/fr/it/pt/ca

        Post post = new Post(api);
        post.addParameter("key", key);
        post.addParameter("txt", txt);
        post.addParameter("lang", lang);
        post.addParameter("tt", "ec");
        post.addParameter("uw", "y");
        post.addParameter("cont", "City");
        post.addParameter("of", "json");

        //      String response = post.getResponse();
        JSONObject jsonObj = null;
        try {
            jsonObj = new JSONObject(post.getResponse());
            System.out.println("AAAAAAAAAAAAAAAAAAAAAAAA");
            System.out.println(jsonObj.toString());
            JSONArray array2 = jsonObj.getJSONArray("concept_list");
            System.out.println(array2);
        } catch (JSONException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        // Show response
        System.out.println("Response");
        System.out.println("============");
        try {
            System.out.println(jsonObj);
            JSONArray array = jsonObj.getJSONArray("entity_list");
            System.out.println(array);
            for (int i = 0; i < array.length(); i++) {
                try {
                    //            System.out.println("_--------------------_");
                    JSONObject doc = (JSONObject) array.getJSONObject(i);
                    System.out.println(doc);
                    //                           System.out.println("_---------------------_");
                    JSONObject doc1 = (JSONObject) doc.get("sementity");
                    System.out.println(doc1);
                    //               System.out.println("_A---------------------_");
                    if (doc1.getString("id").equals("ODENTITY_CITY")
                            && doc1.getString("type").equals("Top>Location>GeoPoliticalEntity>City")) {
                        JSONArray doc2 = (JSONArray) doc.get("semgeo_list");
                        JSONObject doc21 = (JSONObject) doc2.get(0);
                        if (((JSONObject) doc21.get("country")).getString("form").equals("Espaa")) {
                            //                     System.out.println("Entidad_: "+((JSONObject)array.get(i)).get("form"));
                            //                     System.out.println("IDENTIFICADORES DE ENTIDAD CIUDAD_: "+doc1.getString("id")+" - "+doc1.getString("type"));
                            //                     System.out.println("PAIS_: "+((JSONObject)doc21.get("country")).get("form"));
                            try {
                                System.out.println(
                                        "PROVINCIA_: " + ((JSONObject) doc21.get("adm2")).get("form") + "\n");
                            } catch (JSONException e) {
                                System.out.println(
                                        "PROVINCIA_: " + ((JSONObject) doc21.get("adm1")).get("form") + "\n");
                            }
                        } else {
                            System.err.println(((JSONObject) array.get(i)).get("form")
                                    + " en el texto se refiere a un lugar de "
                                    + ((JSONObject) doc21.get("country")).getString("form"));
                        }
                    } else if (doc1.getString("id").equals("ODENTITY_ADM2")
                            && doc1.getString("type").equals("Top>Location>GeoPoliticalEntity>Adm1")) {
                        System.out.println(doc.get("form"));
                        JSONArray doc2 = (JSONArray) doc.get("semgeo_list");
                        JSONObject doc21 = (JSONObject) doc2.get(0);
                        if (((JSONObject) doc21.get("country")).getString("form").equals("Espaa")) {
                            System.out.println("PAIS_: " + ((JSONObject) doc21.get("country")).get("form"));
                        }
                    } else {
                        System.err.println(((JSONObject) array.get(i)).get("form") + " no es una ciudad\n");
                    }
                } catch (JSONException e) {
                    System.err.println(((JSONObject) array.get(i)).get("form") + " no es una ciudad\n");
                }

            }

        } catch (JSONException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        // Prints the specific fields in the response (topics)
        //      DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        //      DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        //      Document doc = docBuilder.parse(new ByteArrayInputStream(response.getBytes("UTF-8")));
        //      doc.getDocumentElement().normalize();
        //      Element response_node = doc.getDocumentElement();
        //      System.out.println("\nInformation:");
        //      System.out.println("----------------\n");
        //      try {
        //        NodeList status_list = response_node.getElementsByTagName("status");
        //        Node status = status_list.item(0);
        //        NamedNodeMap attributes = status.getAttributes();
        //        Node code = attributes.item(0);
        //        if(!code.getTextContent().equals("0")) {
        //          System.out.println("Not found");
        //        } else {
        //          String output = "";
        //          output += "Entities:\n";
        //          output += "=============\n";
        //          output += printInfoEntityConcept(response_node, "entity");
        ////          output += "\n";
        ////          output += "Concepts:\n";
        ////          output += "============\n";
        ////          output += printInfoEntityConcept(response_node, "concept");
        ////          output += "\n";
        ////          output += "Time expressions:\n";
        ////          output += "==========\n";
        ////          output += printInfoGeneral(response_node, "time_expression");
        ////          output += "\n";
        ////          output += "Money expressions:\n";
        ////          output += "===========\n";
        ////          output += printInfoGeneral(response_node, "money_expression");
        ////          output += "\n";
        ////          output += "Quantity expressions:\n";
        ////          output += "======================\n";
        ////          output += printInfoGeneral(response_node, "quantity_expression");
        ////          output += "\n";
        ////          output += "Other expressions:\n";
        ////          output += "====================\n";
        ////          output += printInfoGeneral(response_node, "other_expression");
        ////          output += "\n";
        ////          output += "Quotations:\n";
        ////          output += "====================\n";
        ////          output += printInfoQuotes(response_node);
        ////          output += "\n";
        ////          output += "Relations:\n";
        ////          output += "====================\n";
        ////          output += printInfoRelation(response_node);
        //          output += "\n";
        //          if(output.isEmpty())
        //            System.out.println("Not found");
        //          else
        //            System.out.print(output);
        //        }
        //      } catch (Exception e) {
        //        System.out.println("Not found");
        //      }
    }
}