edu.uga.cs.fluxbuster.clustering.ClusterGenerator.java Source code

Introduction

Here is the source code for edu.uga.cs.fluxbuster.clustering.ClusterGenerator.java
Source

/*
* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package edu.uga.cs.fluxbuster.clustering;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;

import edu.uga.cs.fluxbuster.classification.ClusterClass;
import edu.uga.cs.fluxbuster.clustering.hierarchicalclustering.Dendrogram;
import edu.uga.cs.fluxbuster.clustering.hierarchicalclustering.DistanceMatrix;
import edu.uga.cs.fluxbuster.clustering.hierarchicalclustering.HCluster;
import edu.uga.cs.fluxbuster.clustering.hierarchicalclustering.HierarchicalClustering;
import edu.uga.cs.fluxbuster.clustering.hierarchicalclustering.HierarchicalClustering.LinkageType;
import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;
import edu.uga.cs.fluxbuster.utils.DomainNameUtils;

/**
 * This class initiates the hierarchical clustering process.
 * 
 * @author Chris Neasbitt
 */
public class ClusterGenerator {

    private ArrayList<String> domainWhitelist = null;

    private Properties localprops = null, appprops = null;

    private static final String WHITELISTKEY = "WHITELIST_FILE";

    private static final String GAMMAKEY = "GAMMA";

    private static final String FLUXFILEREGEXKEY = "CANDIDATE_FLUX_FILE_REGEX";

    private static final String FLUXFILEPARSEREGEXKEY = "CANDIDATE_FLUX_FILE_PARSING_REGEX";

    private static final String FLUXDIRKEY = "CANDIDATE_FLUX_DIR";

    private static final String MINRRSETSIZEKEY = "MIN_TOTAL_RRSET_SIZE";

    private static final String MINDIVERSITYKEY = "MIN_TOTAL_DIVERSITY";

    private static final String SHORTTTLKEY = "VERY_SHORT_TTL";

    private static final String CANDIDATETHRESHKEY = "GOOD_CANDIDATE_THRESHOLD";

    private static final String MAXDOMAINSKEY = "MAX_CANDIDATE_DOMAINS";

    private static final String LINKAGETYPEKEY = "LINKAGE_TYPE";

    private static final String MAXCUTHEIGHTKEY = "MAX_CUT_HEIGHT";

    private static final String DISTMATRIXKEY = "DIST_MATRIX_MULTITHREADED";

    private static final String DISTNUMTHREADSKEY = "DIST_MATRIX_NUMTHREADS";

    private static final String SELECTEDCFDFILEKEY = "SELECTED_CFD_FILE";

    private static Log log = LogFactory.getLog(ClusterGenerator.class);

    /**
     * Instantiates a new cluster generator.
     *
     * @throws IOException if the ClusterGenerator.localprops file can
     *       not be read
     */
    public ClusterGenerator() throws IOException {
        localprops = PropertiesUtils.loadProperties(this.getClass());
        appprops = PropertiesUtils.loadAppWideProperties();
        try {
            loadWhitelist();
        } catch (IOException e) {
            if (log.isErrorEnabled()) {
                log.error("Error loading domain whitelist.", e);
            }
        }
    }

    /**
     * Load the domain whitelist.
     *
     * @throws IOException if the whitelist file can not be read
     */
    private void loadWhitelist() throws IOException {
        domainWhitelist = new ArrayList<String>();
        String whitelistfile = localprops.getProperty(WHITELISTKEY);

        BufferedReader br = new BufferedReader(new FileReader(whitelistfile));
        String line;
        while ((line = br.readLine()) != null) {
            domainWhitelist.add(DomainNameUtils.stripDots(line.trim()));
        }
        br.close();
    }

    /**
     * Compute a distance matrix from a list of candidate flux domains.
     *
     * @param cfds the candidate flux domains
     * @return the vector of values in the distance matrix in row major
     *       order
     */
    private Vector<Float> computeDistanceMatrix(List<CandidateFluxDomain> cfds) {
        boolean multithread = Boolean.parseBoolean(appprops.getProperty(DISTMATRIXKEY));
        if (multithread) {
            int numthreads = Integer.parseInt(appprops.getProperty(DISTNUMTHREADSKEY));
            if (numthreads < 1) {
                numthreads = 1;
            }
            return computeDistanceMatrixMultiThreaded(cfds, numthreads);
        } else {
            return computeDistanceMatrixMultiThreaded(cfds, 1);
        }

    }

    /**
     * Compute a distance matrix from a list of candidate flux domains with
     * a maximum number of calculation threads.
     *
     * @param cfds the list of candidate flux domains
     * @param maxnumthreads the thread ceiling
     * @return the vector of values in the distance matrix in row major
     *       order
     */
    private Vector<Float> computeDistanceMatrixMultiThreaded(List<CandidateFluxDomain> cfds, int maxnumthreads) {
        Vector<Float> retval = new Vector<Float>();
        ThreadFactory tf = Executors.defaultThreadFactory();
        double gamma = Double.parseDouble(localprops.getProperty(GAMMAKEY));
        ArrayList<Thread> threads = new ArrayList<Thread>();
        ArrayList<HashSet<Integer>> threadrows = new ArrayList<HashSet<Integer>>();

        int interval = (int) Math.ceil((cfds.size() - 1) / (double) maxnumthreads);
        int left = 0;
        int right = cfds.size() - 2;
        HashSet<Integer> curset = null;
        boolean addLeftFirst = true;

        while (left <= right) {
            if (curset == null) {
                curset = new HashSet<Integer>();
            }

            if (curset.size() == interval) {
                threadrows.add(curset);
                curset = null;
            } else {
                if (addLeftFirst) {
                    curset.add(left++);
                } else {
                    curset.add(right--);
                }
                addLeftFirst = !addLeftFirst;

                if (curset.size() == interval) {
                    continue;
                }

                if (addLeftFirst) {
                    curset.add(left++);
                } else {
                    curset.add(right--);
                }
            }
        }
        if (curset != null && curset.size() > 0) {
            threadrows.add(curset);
        }

        ArrayList<Vector<Float>> resultsList = new ArrayList<Vector<Float>>(cfds.size());
        // this is necessary to make sure that the proper indexes exist in
        // resultsList before being accessed by the threads
        for (int i = 0; i < cfds.size() - 1; i++) {
            resultsList.add(null);
        }

        for (int i = 0; i < threadrows.size(); i++) {
            Thread t = tf.newThread(new DistanceMatrixCalculator(gamma, threadrows.get(i), cfds, resultsList));
            threads.add(t);
        }

        for (Thread t : threads) {
            t.start();
        }

        for (Thread t : threads) {
            try {
                t.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        for (int i = 0; i < resultsList.size(); i++) {
            retval.addAll(resultsList.get(i));
        }

        return retval;
    }

    /**
     * Determines if a domain name is in the whitelist.
     *
     * @param domainname the domain name
     * @return true, if the domain name is on the whitelist
     */
    private boolean isWhiteListable(String domainname) {
        for (String d : domainWhitelist) {
            if (domainname.endsWith(d)) {
                return true;
            }
        }
        return false;
    }

    // TODO improve the candidate score algorithm
    /**
     * Calculates the candidate flux domains clustering potential.  This
     * value is used to sort which candidate flux domains are the best
     * candidates for clustering.
     *
     * @param cfd the candidate flux domain
     * @return the candidate score
     */
    public double calcCandidateScore(CandidateFluxDomain cfd) {
        int minTotalRrsetSize = Integer.parseInt(localprops.getProperty(MINRRSETSIZEKEY));
        double minTotalDiversity = Double.parseDouble(localprops.getProperty(MINDIVERSITYKEY));
        double veryShortTTL = Double.parseDouble(localprops.getProperty(SHORTTTLKEY));

        double ipDiv = IPDiversityCalculator.ipDiversity(IPDiversityCalculator.getV4Ips(cfd.getIps()));

        if (cfd.getNumIPs() >= minTotalRrsetSize && ipDiv > minTotalDiversity) {
            return 1.0;
        } else if (cfd.getNumIPs() == 1 && cfd.getAvgTTL() <= veryShortTTL) {
            return 1.0;
        }
        return 0.0;
    }

    /**
     * Load candidate flux domains from the data files for the time period
     * between the start and end times.
     *
     * @param startTime the start time in sec.
     * @param endTime the end time in sec.
     * @param domainfile a file containing the list of domains that should
     *       be clustered regardless of the candidate score.  If null the list
     *       is ignored.
     * @return the list of candidate flux domains
     * @throws Exception if there is an error reading the ClusterGenerator.localprops
     *       or data files
     */
    public List<CandidateFluxDomain> loadCandidateFluxDomains(long startTime, long endTime, String domainfile)
            throws Exception {
        ArrayList<CandidateFluxDomain> retval = new ArrayList<CandidateFluxDomain>();
        HashMap<String, CandidateFluxDomain> seenDomains = new HashMap<String, CandidateFluxDomain>();
        Set<String> recentFluxDomains = loadRecentFluxDomains(startTime);
        String dirPath = appprops.getProperty(FLUXDIRKEY);
        double goodCandidateThreshold = Double.parseDouble(appprops.getProperty(CANDIDATETHRESHKEY));
        int maxCandidateDomains = Integer.parseInt(appprops.getProperty(MAXDOMAINSKEY));

        for (String filename : getFileNames(dirPath, startTime, endTime)) {
            BufferedReader br = null;
            try {
                GZIPInputStream gis = new GZIPInputStream(new FileInputStream(filename));
                br = new BufferedReader(new InputStreamReader(gis));
                String line;
                while ((line = br.readLine()) != null) {
                    CandidateFluxDomain cfd = CandidateFluxDomain.parseFromLog(line);

                    if (isWhiteListable(cfd.getDomainName())) {
                        if (log.isDebugEnabled()) {
                            log.debug(cfd.getDomainName() + " is whitelisted.");
                        }
                        continue;
                    }

                    String domainname = cfd.getDomainName();
                    if (seenDomains.containsKey(domainname)) {
                        CandidateFluxDomain prev = seenDomains.get(domainname);
                        seenDomains.put(domainname, prev.merge(cfd));
                    } else {
                        seenDomains.put(domainname, cfd);
                    }

                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (br != null) {
                    br.close();
                }
            }
        }

        //add all domains from a file
        if (domainfile != null) {
            addDomainsFromFile(domainfile, maxCandidateDomains, retval, seenDomains);
        }

        ArrayList<String> allDomains = new ArrayList<String>();
        allDomains.addAll(seenDomains.keySet());

        // add all domains from recently seen flux domains
        if (retval.size() < maxCandidateDomains && recentFluxDomains.size() > 0) {
            addRecentFluxDomains(recentFluxDomains, maxCandidateDomains, retval, seenDomains, allDomains);
        }

        // then add the non-recent ones that meet the score threshold
        if (retval.size() < maxCandidateDomains) {
            addThresholdMeetingDomains(maxCandidateDomains, goodCandidateThreshold, retval, seenDomains,
                    allDomains);
        }

        // then fill the rest randomly from what's left over
        if (retval.size() < maxCandidateDomains) {
            Collections.shuffle(allDomains);
            for (String domainname : allDomains) {
                if (retval.size() == maxCandidateDomains) {
                    break;
                }
                retval.add(seenDomains.get(domainname));
            }
        }

        return retval;
    }

    /**
     * Copies candidate flux domains into a list if its candidate score is greater
     * than a threshold up to a limit on the size of the list.  The candidate flux 
     * domains are copied from a map of candidate flux domains.  Domains are only 
     * considered if they appear in the all domains list.   Once a candidate flux 
     * domain is copied it's corresponding domain name is removed from the all 
     * domains list.
     * 
     * @param maxCandidateDomains the limit on the total number of domains to add
     * @param goodCandidateThreshold the candidate score threshold
     * @param resultBuf the list in which to store the candidate flux domains
     * @param seenDomains the map of candidate flux domains.
     * @param allDomains this list of domains to consider
     */
    private void addThresholdMeetingDomains(int maxCandidateDomains, double goodCandidateThreshold,
            List<CandidateFluxDomain> resultBuf, HashMap<String, CandidateFluxDomain> seenDomains,
            ArrayList<String> allDomains) {
        ArrayList<CandidateFluxDomain> sortedDomains = new ArrayList<CandidateFluxDomain>();
        ArrayList<String> removeDomains = new ArrayList<String>();
        // get all cfd's whose score is over the threshold
        for (String domain : allDomains) {
            CandidateFluxDomain temp = seenDomains.get(domain);
            if (this.calcCandidateScore(temp) > goodCandidateThreshold) {
                sortedDomains.add(temp);
            }
        }

        // sort them in descending order by score
        Collections.sort(sortedDomains, new Comparator<CandidateFluxDomain>() {
            @Override
            public int compare(CandidateFluxDomain o1, CandidateFluxDomain o2) {
                Double o1score = calcCandidateScore(o1);
                Double o2score = calcCandidateScore(o2);
                return o2score.compareTo(o1score); // Descending
                // order
            }
        });

        for (CandidateFluxDomain cfd2 : sortedDomains) {
            if (resultBuf.size() == maxCandidateDomains) {
                break;
            }
            resultBuf.add(cfd2);
            removeDomains.add(cfd2.getDomainName());
        }
        allDomains.removeAll(removeDomains);
    }

    /**
     * Copies candidate flux domains into a list if its corresponding 2LD is present
     * in a list of recent flux domains up to a limit on the size of the list.  The 
     * candidate flux domains are copied from a map of candidate flux domains.  Domains 
     * are only considered if they appear in the all domains list.  Once a candidate flux 
     * domain is copied it's corresponding domain name is removed from the all domains list.
     * 
     * @param recentFluxDomains the list of recent flux 2LD's
     * @param maxCandidateDomains the limit on the total number of domains to add
     * @param resultBuf the list in which to store the candidate flux domains
     * @param seenDomains the map of candidate flux domains.
     * @param allDomains this list of domains to consider
     */
    private void addRecentFluxDomains(Set<String> recentFluxDomains, int maxCandidateDomains,
            List<CandidateFluxDomain> resultBuf, HashMap<String, CandidateFluxDomain> seenDomains,
            ArrayList<String> allDomains) {
        ArrayList<String> removeDomains = new ArrayList<String>();
        Collections.shuffle(allDomains); // this is probably not necessary
        for (String domainname : allDomains) {
            if (resultBuf.size() == maxCandidateDomains) {
                break;
            }
            String domainname2LD = DomainNameUtils.extractEffective2LD(domainname);
            if (domainname2LD != null && recentFluxDomains.contains(domainname2LD)) {
                resultBuf.add(seenDomains.get(domainname));
                removeDomains.add(domainname);
            }
        }
        allDomains.removeAll(removeDomains);
    }

    /**
     * Copies candidate flux domains into a list if they appear in a domain file up
     * to a limit on the size of the list.  The candidate flux domains are copied 
     * from a map of candidate flux domains.  Once a candidate flux domain is copied
     * it is removed from the map.  
     * 
     * @param domainfile the file from which to read the domains
     * @param maxCandidateDomains the limit on the total number of domains to add
     * @param resultBuf the list in which to store the candidate flux domains
     * @param seenDomains the map of candidate flux domains.
     * @throws IOException
     */
    private void addDomainsFromFile(String domainfile, int maxCandidateDomains, List<CandidateFluxDomain> resultBuf,
            HashMap<String, CandidateFluxDomain> seenDomains) throws IOException {
        BufferedReader br = new BufferedReader(new FileReader(new File(domainfile)));
        String line = null;
        while ((line = br.readLine()) != null) {
            if (resultBuf.size() == maxCandidateDomains) {
                break;
            }
            line = DomainNameUtils.stripDots(line.trim());
            CandidateFluxDomain d = seenDomains.get(line);
            if (d != null) {
                if (log.isDebugEnabled()) {
                    log.debug("Adding domain " + line + " from domains file.");
                }
                resultBuf.add(d);
                seenDomains.remove(line);
            } else {
                if (log.isDebugEnabled()) {
                    log.debug("Unable to load domain " + line + " from domains file.");
                }
            }
        }
        br.close();
    }

    /**
     * Load recent flux 2LD's.
     *
     * @param startTime unix epoch in sec.
     * @return the list of recent flux 2LD's
     */
    private Set<String> loadRecentFluxDomains(long startTime) {
        Set<String> retval = new HashSet<String>();
        DBInterface iface = DBInterfaceFactory.loadDBInterface();
        DateTime startDateTime = new DateTime(startTime * 1000);
        for (int i = 1; i < 8; i++) {
            Date prevdate = new Date(startDateTime.minusDays(i).getMillis());
            try {
                for (StoredDomainCluster fluxCluster : iface.getClusters(prevdate, ClusterClass.FLUX)) {
                    for (String domain : fluxCluster.getDomains()) {
                        retval.add(DomainNameUtils.extractEffective2LD(domain));
                    }
                }
            } catch (Exception e) {
                if (log.isErrorEnabled()) {
                    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMdd");
                    log.error("Uable to load previous flux domains for " + dateFormat.format(prevdate), e);
                }
            }
        }
        return retval;
    }

    /**
     * Gets the names of the data input files from a specific 
     * directory for the time period between the start and end times.
     *
     * @param dirPath the data file directory
     * @param startTime the start time
     * @param endTime the end time
     * @return the list of input file names
     */
    private List<String> getFileNames(String dirPath, long startTime, long endTime) {
        ArrayList<String> retval = new ArrayList<String>();
        ArrayList<File> selectedFiles = new ArrayList<File>();
        String fileregex = localprops.getProperty(FLUXFILEREGEXKEY);
        String parseregx = localprops.getProperty(FLUXFILEPARSEREGEXKEY);
        Pattern parsepattern = Pattern.compile(parseregx);
        File fluxdir = new File(dirPath);
        if (fluxdir.isDirectory()) {
            File[] posFluxFiles = fluxdir.listFiles();
            if (posFluxFiles != null) {
                for (File posFluxFile : posFluxFiles) {
                    if (posFluxFile.getName().matches(fileregex)) {
                        Matcher parsematcher = parsepattern.matcher(posFluxFile.getName());
                        parsematcher.find();
                        long timestamp = Long.parseLong(parsematcher.group(0));
                        if (timestamp >= startTime && timestamp < endTime) {
                            selectedFiles.add(posFluxFile);
                        }
                    }
                }
            }

        }

        // sorts in ascending order by filename
        Collections.sort(selectedFiles, new Comparator<File>() {
            @Override
            public int compare(File o1, File o2) {
                return o1.getName().compareTo(o2.getName());
            }
        });

        for (File selectedFile : selectedFiles) {
            if (log.isDebugEnabled()) {
                log.debug("Loading file: " + selectedFile.getName());
            }
            retval.add(selectedFile.getAbsolutePath());
        }

        return retval;
    }

    /**
     * Runs the clustering process on the data files for the time period
     * between the start and end times.  The linkage type and max cut height
     * are read from the ClusterGenerator.localprops file
     *
     * @param startTime the start time
     * @param endTime the end time
     * @param selcfds if true then the file with a list of domains to cluster regardless
     *       of candidate score is used for clustering
     * @return the list of clusters
     * @throws Exception if there is an error reading the ClusterGenerator.localprops
     *       or data files
     */
    public List<DomainCluster> generateClusters(long startTime, long endTime, boolean selcfds) throws Exception {
        if (selcfds) {
            String selcfdfilepath = appprops.getProperty(SELECTEDCFDFILEKEY);
            if (new File(selcfdfilepath).exists()) {
                return this.generateClusters(startTime, endTime, selcfdfilepath);
            }
        }
        return this.generateClusters(startTime, endTime, null);
    }

    /**
     * Runs the clustering process on the data files for the time period
     * between the start and end times.  The linkage type and max cut height
     * are read from the ClusterGenerator.localprops file
     *
     * @param startTime the start time
     * @param endTime the end time
     * @param domainfile a list of domains to cluster regardless of candidate
     *       score
     * @return the list of clusters
     * @throws Exception if there is an error reading the ClusterGenerator.localprops
     *       or data files
     */
    public List<DomainCluster> generateClusters(long startTime, long endTime, String domainfile) throws Exception {
        double maxCutHeight = Double.parseDouble(appprops.getProperty(MAXCUTHEIGHTKEY));
        String linkageTypeStr = appprops.getProperty(LINKAGETYPEKEY);
        LinkageType linkage = LinkageType.COMPLETE_LINKAGE;
        if (linkageTypeStr.toLowerCase().trim().equals("single")) {
            linkage = LinkageType.SINGLE_LINKAGE;
        }
        return this.generateClusters(startTime, endTime, domainfile, linkage, maxCutHeight);
    }

    /**
     * Runs the clustering process on the data files for the time period
     * between the start and end times.
     *
     * @param startTime the start time
     * @param endTime the end time
     * @param domainfile a list of domains to cluster regardless of candidate
     *       score
     * @param linkage the linkage type
     * @param maxCutHeight the max cut height
     * @return the list of clusters
     * @throws Exception if there is an error reading the ClusterGenerator.localprops
     *       or data files
     */
    private List<DomainCluster> generateClusters(long startTime, long endTime, String domainfile,
            LinkageType linkage, double maxCutHeight) throws Exception {
        ArrayList<DomainCluster> retval = new ArrayList<DomainCluster>();
        if (log.isInfoEnabled()) {
            log.info(this.getClass().getSimpleName() + " Started: " + Calendar.getInstance().getTime());
            log.info("Loading Candidate Flux Domains.");
        }
        List<CandidateFluxDomain> cfdList = loadCandidateFluxDomains(startTime, endTime, domainfile);

        if (log.isInfoEnabled()) {
            log.info("Loaded " + cfdList.size() + " Candidate Flux Domains.");
        }
        if (cfdList.size() > 0) {
            if (log.isInfoEnabled()) {
                log.info("Computing Distance Matrix.");
            }
            Vector<Float> utDistValues = this.computeDistanceMatrix(cfdList);
            DistanceMatrix distMatrix = new DistanceMatrix(utDistValues);
            if (log.isInfoEnabled()) {
                log.info("Distance Matrix Calculated.");
            }
            HierarchicalClustering hc = new HierarchicalClustering(linkage);
            if (log.isInfoEnabled()) {
                log.info("Running Clusterer.");
            }
            hc.runClusterer(distMatrix, maxCutHeight);
            if (log.isInfoEnabled()) {
                log.info("Clustering Completed.");
            }
            Dendrogram dgram = hc.getDendrogram();
            if (log.isInfoEnabled()) {
                log.info("Creating Domain Clusters.");
            }
            Vector<HCluster> hclusters = dgram.getClusters(maxCutHeight);
            for (HCluster hcluster : hclusters) {
                DomainCluster dm = new DomainCluster();
                for (int index : hcluster.getIndexes()) {
                    dm.addCandidateFluxDomain(cfdList.get(index));
                }
                retval.add(dm);
            }
            if (log.isInfoEnabled()) {
                for (DomainCluster d : retval) {
                    log.info(d.toString());
                }
                log.info("Created " + retval.size() + " Domain Clusters.");
                log.info(this.getClass().getSimpleName() + " Finished: " + Calendar.getInstance().getTime());

            }
        }

        return retval;
    }

    /**
     * Store clusters through a db interface loaded by the DBInterfaceFactory.
     *
     * @param clusters the list of clusters to store.
     * @param log_date the clustering run date
     * @throws Exception if the database interface could not be loaded.
     */
    public void storeClusters(List<DomainCluster> clusters, Date log_date) throws Exception {
        DBInterface dbiface = DBInterfaceFactory.loadDBInterface();
        if (dbiface == null) {
            throw new Exception("Could not load DB interface.");
        }
        if (log.isInfoEnabled()) {
            log.info(this.getClass().getSimpleName() + " Started: " + Calendar.getInstance().getTime());
            log.info("Storing " + clusters.size() + " Clusters.");
        }
        dbiface.initClusterTables(log_date);
        dbiface.storeClusters(clusters, "SIE", log_date);
        if (log.isInfoEnabled()) {
            log.info("Clusters stored.");
            log.info(this.getClass().getSimpleName() + " Finished: " + Calendar.getInstance().getTime());
        }
    }

    /**
     * Prints each each cluster in the list to stdout.
     *
     * @param clusters the list clusters to print
     */
    public void printClusters(List<DomainCluster> clusters) {
        for (DomainCluster cluster : clusters) {
            System.out.println(cluster);
        }

    }
}