epsi.i5.datamining.Traitement.java Source code

Java tutorial

Introduction

Here is the source code for epsi.i5.datamining.Traitement.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package epsi.i5.datamining;

import epsi.i5.datamining.repustate.RepustateClient;
import epsi.i5.datamining.repustate.RepustateException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

/**
 *
 * @author Sinys
 */
public class Traitement {

    JsonBuilder builder = new JsonBuilder();
    StopWords stopWords = new StopWords();
    private final HashMap<String, HashMap<String, Integer>> mapCategorie = new HashMap();
    private final List<String> words = new ArrayList<>();
    private List<List> jsonListCat = new ArrayList();
    private List<String> jsonSimpleCat = new ArrayList();
    private final List<String> polarites = new ArrayList();
    private final List<List> findCom = new ArrayList();
    private float fiabilite = 0;

    Weka arff = new Weka();
    List<DataEntity> ListDE = new ArrayList();

    public void traitement() throws IOException, MalformedURLException, RepustateException, ParseException {
        StopWords stopword = new StopWords();
        boolean bStopWord = false;
        for (DataEntity entity : builder.getFullCommentaires()) {
            if (entity.getListeCategorie() == null) {
                jsonSimpleCat.add(entity.getSimpleCategorie());
                jsonListCat = null;
            } else {
                jsonListCat.add(entity.getListeCategorie());
                jsonSimpleCat = null;
            }

            String lTrie = "";

            if (entity.getSimpleCategorie() == null) {
                for (String cat : entity.getListeCategorie()) {
                    if (!mapCategorie.containsKey(cat)) {
                        mapCategorie.put(cat, new HashMap<String, Integer>());
                    }
                }
            } else {
                if (!mapCategorie.containsKey(entity.getSimpleCategorie())) {
                    mapCategorie.put(entity.getSimpleCategorie(), new HashMap<String, Integer>());
                }
            }

            for (String word : entity.getCommentaires().split(" ")) {
                //System.out.println(stopword.getRegEx());
                for (String stopApo : stopword.getRegExApos().replace("|", " ").split(" ")) {
                    word = word.replaceAll(stopApo, "");
                    //                    System.out.println(stopApo);
                }

                word = word.replace(".", " ").replace(",", " ").replace("!", " ").replace("(", "").replace(")", "")
                        .replace("'", "").replace(":", "").trim();

                for (String stop : stopword.getRegEx().replace("|", " ").split(" ")) {
                    //System.out.println(stop);
                    if ((word.equalsIgnoreCase(stop)) && !word.equalsIgnoreCase("")) {
                        bStopWord = true;
                    }
                }
                if (bStopWord == false && !"".equals(word)) {
                    lTrie = lTrie + " " + word;

                    if (entity.getSimpleCategorie() == null) {
                        //Remplissage de la map
                        for (String cat : entity.getListeCategorie()) {
                            if (mapCategorie.get(cat).containsKey(word)) {
                                Integer occurs = mapCategorie.get(cat).get(word);
                                occurs++;
                                mapCategorie.get(cat).remove(word);
                                mapCategorie.get(cat).put(word, occurs);
                            } else {
                                mapCategorie.get(cat).put(word, 1);
                            }
                        }
                    } else {
                        //                        if (mapCategorie.get(entity.getSimpleCategorie()).containsKey(word)) {
                        //                            Integer occurs = mapCategorie.get(entity.getSimpleCategorie()).get(word);
                        //                            occurs++;
                        //                            mapCategorie.get(entity.getSimpleCategorie()).remove(word);
                        //                            mapCategorie.get(entity.getSimpleCategorie()).put(word, occurs);
                        //                        } else {
                        //                            mapCategorie.get(entity.getSimpleCategorie()).put(word, 1);
                        //                        }
                    }

                    if (!words.contains(word)) {
                        words.add(word);
                    }

                }
                bStopWord = false;
            }
            entity.setCommentaireTrie(lTrie);
            ListDE.add(entity);
            //            System.out.println(entity.getCommentaires());
            //            System.out.println(entity.getCommentaireTrie());
        }
        arff.chargementData(ListDE);
        //        for (Map.Entry entry : mapCategorie.entrySet()) {
        //            System.out.println(entry);
        //        }
        //Recherche de la valeur max de chaque mots
        for (String word : words) {
            Integer max = 0;
            for (Entry entry : mapCategorie.entrySet()) {
                HashMap mapDonnee = (HashMap) entry.getValue();
                if (mapDonnee.containsKey(word)) {
                    if (max < (Integer) mapDonnee.get(word)) {
                        max = (Integer) mapDonnee.get(word);
                    }
                }
            }

            //            System.out.println(max);
            //Suppression des mots si ce n'est pas al valuer max
            for (Entry entry : mapCategorie.entrySet()) {
                HashMap mapDonnee = (HashMap) entry.getValue();
                if (mapDonnee.get(word) != max) {
                    mapDonnee.remove(word);
                }
                entry.setValue(mapDonnee);
            }
        }

        //        for (Entry entry : mapCategorie.entrySet()) {
        //            System.out.println(entry);
        //        }
        List<DataEntity> commentairesFinaux = builder.getSimpleCommentaires();
        for (DataEntity commentaire : commentairesFinaux) {
            List<String> categorie;
            for (String word : commentaire.getCommentaires().split(" ")) {
                //System.out.println(stopword.getRegEx());
                for (String stopApo : stopword.getRegExApos().replace("|", " ").split(" ")) {
                    word = word.replaceAll(stopApo, "");
                    //                    System.out.println(stopApo);
                }

                word = word.replace(".", " ").replace(",", " ").replace("!", " ").replace("(", "").replace(")", "")
                        .replace("'", "").replace(":", "").trim();

                for (String stop : stopword.getRegEx().replace("|", " ").split(" ")) {
                    //System.out.println(stop);
                    if ((word.equalsIgnoreCase(stop)) && !word.equalsIgnoreCase("")) {
                        bStopWord = true;
                    }
                }
                if (bStopWord == false) {
                    for (Entry entry : mapCategorie.entrySet()) {
                        HashMap mapDonnee = (HashMap) entry.getValue();
                        if (mapDonnee.containsKey(word) && !"".equals(word)) {
                            categorie = commentaire.getListeCategorie();
                            if (!categorie.contains((String) entry.getKey())) {
                                categorie.add((String) entry.getKey());
                            }
                            commentaire.setListeCategorie(categorie);
                        }
                    }
                }
                bStopWord = false;
            }
            findCom.add(commentaire.getListeCategorie());
        }

        calculAllpolarite();

        for (int i = 0; i < findCom.size(); i++) {

            if (jsonListCat == null) {
                System.out.println("Expected : " + jsonSimpleCat.get(i));
            } else {
                System.out.println("Expected : " + jsonListCat.get(i));
            }

            System.out.println("Found    : " + findCom.get(i));

            if (jsonListCat == null) {
                if (findCom.get(i).contains(jsonSimpleCat.get(i))) {
                    System.out.println(true);
                    fiabilite++;
                } else {
                    System.out.println(false);
                }
            } else {
                if (jsonListCat.get(i).containsAll(findCom.get(i))) {
                    System.out.println(true);
                    fiabilite++;
                } else {
                    System.out.println(false);
                }
            }

            //System.out.println("Raiting : " + polarites.get(i));

            //System.out.println("****************************");
        }
        fiabilite = (fiabilite * 100) / findCom.size();
        //System.out.println(fiabilite + "%");
        arff.generationArff("Step_One");
    }

    public String calculPolarite(String commentaire)
            throws IOException, MalformedURLException, RepustateException, ParseException {
        String polarite;
        Double score = null;
        Map map = new HashMap();
        map.put("text1", commentaire);
        //        System.out.println(RepustateClient.getSentimentBulk(map));
        JSONParser jp = new JSONParser();
        JSONObject json = (JSONObject) jp.parse(RepustateClient.getSentimentBulk(map));
        //        System.out.println(json.get("results"));
        JSONArray jsonArray = (JSONArray) json.get("results");
        for (Object obj : jsonArray) {
            JSONObject jsonObject = (JSONObject) obj;
            score = new Double(jsonObject.get("score").toString());
            score = score * 10;
            //            System.out.println("Polarit : " + score);
        }
        polarite = score.toString();
        return polarite;
    }

    public void calculAllpolarite() throws IOException, MalformedURLException, RepustateException, ParseException {

        for (DataEntity de : builder.getSimpleCommentaires()) {
            polarites.add(calculPolarite(de.getCommentaires()));
        }

    }

}