twitterprojectwithoutmaven.Statistics.java Source code

Java tutorial

Introduction

Here is the source code for twitterprojectwithoutmaven.Statistics.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package twitterprojectwithoutmaven;

import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

/**
 *
 * @author Chris
 */
public class Statistics {

    private ArrayList<TweetDist> list;
    private ArrayList<TweetDist> dublist;
    public boolean testMode = false;
    public int MaxTweets = 1000;
    private int lastrow = 0;
    private final String fileName = "UserStats.xls";
    private final String sheet1Name = "allUsers";
    private final String sheet2Name = "Stalked users";
    private final String sheet3Name = "Sources";
    private int start = 0;

    public Statistics(boolean test, int startFrom) {
        testMode = test;

        this.start = startFrom;
        if (testMode) {
            return;
        }
        if (start == 0) {
            this.CreateFile(fileName);
            this.writeRow(fileName, 1, lastrow++, DBUserStat.header);
        }
    }

    public void StatisticsA() {

        //This is actually ready, its  the DBUser.
    }

    public void StatisticsB() {
        //for each stalked user
        DBCursor users = dbAdapter.getInstance().getStalkedUsers();
        System.out.println("users: " + users.size());

        int c = 0;//userCounter
        while (c < start && users.hasNext()) {
            users.next();
            lastrow++;
            c++;//Start analyzing from start User (start 0:users.count)
        }

        while (users.hasNext()) {
            DBObject obj = (DBObject) users.next();
            DBUser user = new DBUser(obj);// Get User obj
            //get user's tweets
            DBUserStat stat = new DBUserStat(user.getID());

            dublist = new ArrayList<>();// simmilar tweets per tweet

            //for each user's tweet
            DBCursor userTweets = dbAdapter.getInstance().getStalkedUserTweets(user.getID());
            System.out.println("user: " + user.getID() + " tweets:" + userTweets.size());
            int count = 0, count2 = 0;// test Counter, and tweet counter
            while (userTweets.hasNext()) {

                DBObject next = userTweets.next();
                DBTweet tweet = new DBTweet(next);

                if (testMode) {

                    if (count >= MaxTweets) {
                        break;
                    }
                    count++;
                }
                // get data from the tweet and add it to userStats
                stat.setNum_simple_tweets(stat.getNum_simple_tweets() + tweet.isASimpleTweet());
                stat.setNum_reTweets(stat.getNum_reTweets() + tweet.isaReTweet());
                stat.setNum_replies(stat.getNum_replies() + tweet.isAReply());
                stat.setNum_mentions(stat.getNum_mentions() + tweet.getMentions());
                stat.setNum_reTweets_recieved(stat.getNum_reTweets_recieved() + tweet.getReTweets());
                stat.setNum_hashTag(stat.getNum_hashTag() + tweet.getHashTags());
                stat.setNum_Urls(stat.getNum_Urls() + tweet.getUrls());
                stat.AddSources(tweet.getSource());
                stat.AddUrls(tweet.getURLS());

                if (tweet.isASimpleTweet() == 1) {
                    //if it's a imple tweet find the tweets tha are similar to this.
                    if (list == null) {
                        list = new ArrayList<>();
                    }

                    list.clear();
                    findDublicates(tweet, user.getID(), count2);
                }
                count2++;

            }
            userTweets.close();

            stat.setSameTweets(dublist);

            BasicDBObject dbObject = stat.getDBObject2();
            dbAdapter.getInstance().insertUserStats(dbObject);

            this.writeRow(fileName, 1, lastrow++, stat.getExelRow());

        }
        users.close();
    }

    private void findDublicates(DBTweet tweet, String id, int c) {

        if (testMode) {
            return;
        }

        //for each  user's tweet
        DBCursor tweets = dbAdapter.getInstance().getStalkedUserTweets(id);
        int max = -1;
        int count = 0;
        while (tweets.hasNext()) {

            if (count >= c) {
                break;
            }

            DBObject obj = tweets.next();
            DBTweet tweet2 = new DBTweet(obj);
            if (tweet2.isASimpleTweet() == 1) {

                if (tweet.getID() == null ? tweet2.getID() != null
                        : (!(tweet.getID().equals(tweet2.getID()) && tweet2.isASimpleTweet() == 1))) {

                    int dist = distance(tweet.getLevText(), tweet2.getLevText());
                    //distance bettwen tweets
                    if (max < dist) {
                        max = dist;//max distance
                    }

                    list.add(new TweetDist(tweet.getID(), tweet2.getID(), dist));

                }
            }

            count++;
        }

        tweets.close();
        NormalizeAndCut(list, max);

    }

    public static int distance(String a, String b) {
        a = a.toLowerCase();
        b = b.toLowerCase();
        // i == 0
        int[] costs = new int[b.length() + 1];
        for (int j = 0; j < costs.length; j++) {
            costs[j] = j;
        }
        for (int i = 1; i <= a.length(); i++) {
            // j == 0; nw = lev(i - 1, j)
            costs[0] = i;
            int nw = i - 1;
            for (int j = 1; j <= b.length(); j++) {
                int cj = Math.min(1 + Math.min(costs[j], costs[j - 1]),
                        a.charAt(i - 1) == b.charAt(j - 1) ? nw : nw + 1);
                nw = costs[j];
                costs[j] = cj;
            }
        }
        return costs[b.length()];
    }

    private void NormalizeAndCut(ArrayList<TweetDist> list, int max) {

        for (TweetDist list1 : list) {
            list1.normalize(max);// based on max distance each distance forim this tweet is now %

            if (list1.getDist() <= 10) {//get add to final list tweets that the distance between them is lower than 10/100
                if (!dublist.contains(list1)) {
                    dublist.add(list1);
                }
            }
        }

    }

    private void CreateFile(String name) {
        try {
            Workbook wb = new HSSFWorkbook(); // or new XSSFWorkbook();
            Sheet sheet1 = wb.createSheet(sheet1Name);//create sheets
            Sheet sheet2 = wb.createSheet(sheet2Name);
            Sheet sheet3 = wb.createSheet(sheet3Name);

            FileOutputStream fileOut = null;
            try {
                fileOut = new FileOutputStream(name); //save file
            } catch (FileNotFoundException ex) {
                Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
            }
            wb.write(fileOut);
            wb.close();

        } catch (IOException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private void writeRow(String name, int sh, int rowCounter, Object[] obj) {
        InputStream inp = null;
        try {
            inp = new FileInputStream(name);//load file
        } catch (FileNotFoundException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
        }

        Workbook wb = null;
        try {
            wb = WorkbookFactory.create(inp);
        } catch (IOException | InvalidFormatException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
            return;
        }
        Sheet sheet = wb.getSheetAt(sh);//load sheet
        if (sheet == null) {
            return;
        }
        Row row = sheet.getRow(rowCounter);//create row at line rowCounter
        if (row == null) {
            row = sheet.createRow((short) rowCounter);
        }
        int col = 0;
        for (Object o : obj) {//write row

            Cell cell = row.createCell(col);
            col++;
            if (o instanceof String) {
                cell.setCellValue((String) o);
            } else if (o instanceof Integer) {
                cell.setCellValue((Integer) o);
            } else if (o instanceof Double) {
                cell.setCellValue((double) o);
            } else if (o instanceof Long) {
                cell.setCellValue((long) o);
            } else {
                cell.setCellValue((double) o);
            }
        }

        FileOutputStream fileOut = null;
        try {
            fileOut = new FileOutputStream(name);//save file
        } catch (FileNotFoundException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
        }

        try {
            wb.write(fileOut);
        } catch (IOException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
        }
        try {
            fileOut.close();//close file
        } catch (IOException ex) {
            Logger.getLogger(Statistics.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void WriteSources(String name, int sh) {

        DBCursor cursor = dbAdapter.getInstance().getAllStatistics();
        HashSet<String> sources = new HashSet<>();
        //Get all unique  tweet's sources from all users
        while (cursor.hasNext()) {
            DBObject next = cursor.next();

            DBUserStat stat = new DBUserStat(next);
            ArrayList<String> s = stat.getSources();
            for (String str : s) {

                sources.add(str);
            }

        }
        cursor.close();

        Object[] header = new Object[sources.size() + 1];
        Iterator<String> iterator = sources.iterator();
        header[0] = "User ID";
        int h = 1;
        while (iterator.hasNext()) {

            String[] split = iterator.next().split("\"");
            header[h] = split[1];
            h++;
        }
        //header created

        int TheRow = 0;
        this.writeRow(name, sh, TheRow++, header);//wite header to file
        cursor = dbAdapter.getInstance().getAllStatistics();

        while (cursor.hasNext()) {

            DBObject next = cursor.next();

            DBUserStat stat = new DBUserStat(next);

            Object[] row = new Object[sources.size() + 1];

            row[0] = stat.getUserId();
            //wite for each user a row with the number of tweets per source 
            int i = 1;
            for (String s : sources) {
                row[i] = stat.getNumofTweetsofSource((String) s);
                i++;
            }
            this.writeRow(name, sh, TheRow++, row);

        }
        cursor.close();

    }

    private void WriteUserStats(String name, int sh) {
        //write for alla stalked user their stats in a file
        DBCursor cursor = dbAdapter.getInstance().getAllStatistics();

        int row = 0;

        this.writeRow(name, sh, row++, DBUserStat.header);

        while (cursor.hasNext()) {
            DBObject next = cursor.next();
            DBUserStat stat = new DBUserStat(next);

            this.writeRow(name, sh, row++, stat.getExelRow());
        }
        cursor.close();
    }

    public void ExtractXLS(String name) {

        this.CreateFile(name);
        this.WriteUserStats(name, 1);
        this.WriteSources(name, 2);
    }
}