edu.uga.cs.fluxbuster.analytics.ClusterSimilarityCalculator.java Source code

Java tutorial

Introduction

Here is the source code for edu.uga.cs.fluxbuster.analytics.ClusterSimilarityCalculator.java

Source

/*
* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package edu.uga.cs.fluxbuster.analytics;

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Formatter;
import java.util.List;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;

import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;

/**
 * This class calculates the ip and domain based similarities between 
 * the clusters generated by two runs of hierarchical clustering
 * 
 * @author Chris Neasbitt
 */
public class ClusterSimilarityCalculator {

    private Properties properties = null;

    private static String IPKEY = "INTERSECTION_QUERY_IP";

    private static String DOMAINSKEY = "INTERSECTION_QUERY_DOMAINNAME";

    private DBInterface db = null;

    private static Log log = LogFactory.getLog(ClusterSimilarityCalculator.class);

    /**
     * The Enum SIM_TYPE represents a type of similarity.
     */
    public static enum SIM_TYPE {
        /** The IP. */
        IP,
        /** The DOMAINNAME. */
        DOMAINNAME
    };

    /**
     * Instantiates a new cluster similarity calculator.
     *
     * @throws IOException if the ClusterSimilarityCalculator.properties file
     *       can not be loaded
     */
    public ClusterSimilarityCalculator() throws IOException {
        properties = PropertiesUtils.loadProperties(this.getClass());
        db = DBInterfaceFactory.loadDBInterface();
    }

    /**
     * Update all cluster similarities in the database between the run on the
     * supplied date and the run one day previous.
     *
     * @param adate the date of the first clustering run
     */
    public void updateClusterSimilarities(Date adate) {
        DateTime adt = new DateTime(adate.getTime());
        Date bdate = new Date(adt.minusDays(1).getMillis());
        this.updateClusterSimilarities(adate, bdate);
    }

    /**
     * Update all cluster similarities in the database between the runs on the
     * two supplied dates.
     *
     * @param adate the date of the first clustering run
     * @param bdate the date of the second clustering run
     */
    public void updateClusterSimilarities(Date adate, Date bdate) {
        String simplename = null;
        if (log.isInfoEnabled()) {
            simplename = this.getClass().getSimpleName();
            log.info(simplename + " Started: " + Calendar.getInstance().getTime());
            log.info("a-date: " + adate + " b-date: " + bdate);
            log.info("Updating ip based cluster similarities.");
        }
        try {
            db.initSimilarityTables(adate);
            updateIpClusterSimilarities(adate, bdate);
            if (log.isInfoEnabled()) {
                log.info("Ip based cluster similarities updated.");
                log.info("Updating domainname based cluster similarities.");
            }
            updateDomainnameClusterSimilarities(adate, bdate);
            if (log.isInfoEnabled()) {
                log.info("Domainname based cluster similarities updated.");
            }
        } catch (Exception e) {
            if (log.isErrorEnabled()) {
                log.error("Error calculating cluster similarities.", e);
            }
        }
        if (log.isInfoEnabled()) {
            log.info(simplename + " Finished: " + Calendar.getInstance().getTime());
        }

    }

    /**
     * Update ip-based cluster similarities in the database between the runs on 
     * the two supplied dates.
     *
     * @param adate the date of the first clustering run
     * @param bdate the date of the second clustering run
     * @throws Exception if unable to calculate or store the similarity results
     */
    public void updateIpClusterSimilarities(Date adate, Date bdate) throws Exception {
        storeIpClusterSimiliarities(calculateIpSimilarities(adate, bdate));
    }

    /**
     * Update domainname-based cluster similarities in the database between the runs 
     * on the two supplied dates.
     *
     * @param adate the date of the first clustering run
     * @param bdate the date of the second clustering run
     * @throws Exception if unable to calculate or store the similarity results
     */
    public void updateDomainnameClusterSimilarities(Date adate, Date bdate) throws Exception {
        storeDomainnameClusterSimiliarities(calculateDomainnameSimilarities(adate, bdate));
    }

    /**
     * Store ip-based cluster similiarities in the database.
     *
     * @param sims the cluster similarities
     */
    public void storeIpClusterSimiliarities(List<ClusterSimilarity> sims) {
        DBInterface db = DBInterfaceFactory.loadDBInterface();
        db.storeIpClusterSimilarities(sims);
    }

    /**
     * Store domainname-based cluster similiarities in the database.
     *
     * @param sims the cluster similarities
     */
    public void storeDomainnameClusterSimiliarities(List<ClusterSimilarity> sims) {
        DBInterface db = DBInterfaceFactory.loadDBInterface();
        db.storeDomainnameClusterSimilarities(sims);
    }

    /**
     * Calculate ip-based cluster similarities between all of the clusters generated
     * during the runs on the two supplied dates.
     * 
     * @param adate the date of the first clustering run
     * @param bdate the date of the second clustering run
     * @return the list of ip-based cluster similarities
     * @throws IOException if the similarities could not be calculated
     */
    public List<ClusterSimilarity> calculateIpSimilarities(Date adate, Date bdate) throws IOException {
        SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
        String adatestr = df.format(adate);
        String bdatestr = df.format(bdate);

        String query = properties.getProperty(IPKEY);
        StringBuffer querybuf = new StringBuffer();
        Formatter formatter = new Formatter(querybuf);
        formatter.format(query, adatestr, adatestr, bdatestr);
        query = querybuf.toString();
        formatter.close();
        return this.executeSimilarityQuery(query, adate, bdate);
    }

    /**
     * Calculate domainname-based cluster similarities between all of the clusters generated
     * during the runs on the two supplied dates.
     *
     * @param adate the date of the first clustering run
     * @param bdate the date of the second clustering run
     * @return the list of domainname-based cluster similarities
     * @throws IOException if the similarities could not be calculated
     */
    public List<ClusterSimilarity> calculateDomainnameSimilarities(Date adate, Date bdate) throws IOException {
        SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
        String adatestr = df.format(adate);
        String bdatestr = df.format(bdate);

        String query = properties.getProperty(DOMAINSKEY);
        StringBuffer querybuf = new StringBuffer();
        Formatter formatter = new Formatter(querybuf);
        formatter.format(query, adatestr, adatestr, adatestr, adatestr, bdatestr, bdatestr);
        query = querybuf.toString();
        formatter.close();
        return this.executeSimilarityQuery(query, adate, bdate);
    }

    /**
     * Executes the similarity query.
     * 
     * @param query the query to execute.
     * @param adate the date of the first clustering run 
     * @param bdate the date of the second clustering run
     * @return the list of cluster similarities
     */
    private List<ClusterSimilarity> executeSimilarityQuery(String query, Date adate, Date bdate) {
        List<ClusterSimilarity> retval = new ArrayList<ClusterSimilarity>();
        ResultSet rs = null;
        try {
            rs = db.executeQueryWithResult(query);
            while (rs.next()) {
                retval.add(new ClusterSimilarity(adate, bdate, rs.getInt(1), rs.getInt(2), rs.getDouble(3)));
            }
        } catch (SQLException e) {
            if (log.isErrorEnabled()) {
                log.error(e);
            }
        } finally {
            try {
                rs.close();
            } catch (SQLException e) {
                if (log.isErrorEnabled()) {
                    log.error(e);
                }
            }
        }
        return retval;
    }
}