edu.txstate.dmlab.clusteringwiki.cluster.ClusterEditor.java Source code

Java tutorial

Introduction

Here is the source code for edu.txstate.dmlab.clusteringwiki.cluster.ClusterEditor.java

Source

package edu.txstate.dmlab.clusteringwiki.cluster;

/**
 *  ClusteringWiki - personalized and collaborative clustering of search results
 *  Copyright (C) 2010  Texas State University-San Marcos
 *  
 *  Contact: http://dmlab.cs.txstate.edu
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import edu.txstate.dmlab.clusteringwiki.entity.ClusterEdit;
import edu.txstate.dmlab.clusteringwiki.preprocess.ICollectionContext;

/**
 * Apply cluster edits retrieved from data store to T-init cluster
 * 
 * @author David C. Anastasiu
 *
 */
public class ClusterEditor implements IClusterEditor {

    /**
     * Index of result nodes, key = result label, value = page index in allDocs
     */
    public final Map<String, Set<Integer>> pages;

    /**
     * Index of result node clusters (clusters pages are in)
     * key = page index in allDocs
     * value = set of cluster paths
     */
    public final Map<Integer, Set<String>> pageClusters;

    /**
     * List of edits to be applied
     */
    public final List<ClusterEdit> edits;

    /**
     * Some results have the same URL + title, causing non-result set duplicate paths problems
     * customResultLabels identifies duplicates and adds a prefix for copies of result lables
     */
    public final Map<Integer, String> customResultLabels;

    /**
     * Reference to the doc context
     */
    public final ICollectionContext context;

    /**
     * JSON data encoding of T-init
     */
    protected JSONObject root;

    /**
     * Next id to be assigned to new path clusters
     */
    protected int nextClusterId = 0;

    /**
     * Constructor
     * @param cluster - root cluster
     * @param edits - retrieved edits to be merged
     * @throws Exception 
     */
    public ClusterEditor(ICluster theCluster, List<ClusterEdit> theEdits, ICollectionContext theContext)
            throws Exception {

        edits = theEdits;
        context = theContext;

        //get indexes of pages
        pages = theContext.getResultLabelIndex();
        customResultLabels = theContext.getCustomResultLabels();

        pageClusters = new HashMap<Integer, Set<String>>();
        populatePageClusters(theCluster);
        nextClusterId++;

        root = theCluster.toJSON();
    }

    /**
     * Apply edits
     * @return
     */
    public JSONObject applyUserEdits() {
        for (ClusterEdit e : edits) {
            if (e.getCardinality() > 0)
                addPath(e);
            else if (e.getCardinality() < 0)
                subtractPath(e);
        }

        return root;
    }

    /**
     * Method to allow different application of aggregated edits
     * @return
     */
    public JSONObject applyAggregatedEdits() {
        return applyUserEdits();
    }

    /**
     * add data to pageClusters from given cluster
     * @param cluster
     */
    protected void populatePageClusters(ICluster cluster) {
        if (Integer.valueOf(cluster.getId()) > nextClusterId)
            nextClusterId = Integer.valueOf(cluster.getId());
        Map<Integer, IClusterDocument> docs = cluster.getDocs();
        if (docs != null) {
            for (Integer docIndex : docs.keySet()) {
                //build cluster page indexes
                Set<String> clusters = pageClusters.get(docIndex);
                if (clusters == null)
                    clusters = new HashSet<String>();
                clusters.add(cluster.getId());
                pageClusters.put(docIndex, clusters);
            }
        }
        List<ICluster> children = cluster.getChildren();
        if (children != null)
            for (ICluster c : children)
                populatePageClusters(c);
    }

    /**
     * Get the path for the edit as a list of cluster label strings
     * @param edit
     * @return
     */
    protected List<String> getPathList(ClusterEdit edit) {
        List<String> path = new ArrayList<String>(5);
        if (!StringUtils.isBlank(edit.getPath1()))
            path.add(edit.getPath1());
        if (!StringUtils.isBlank(edit.getPath2()))
            path.add(edit.getPath2());
        if (!StringUtils.isBlank(edit.getPath3()))
            path.add(edit.getPath3());
        if (!StringUtils.isBlank(edit.getPath4()))
            path.add(edit.getPath4());
        if (!StringUtils.isBlank(edit.getPath5()))
            path.add(edit.getPath5());
        return path;
    }

    /**
     * Get the result id for given path
     * @param path
     * @return
     */
    protected Integer getResultId(List<String> path) {
        String resultLabel = path.get(path.size() - 1);
        Set<Integer> p = pages.get(resultLabel);
        if (p != null)
            return p.iterator().next();
        return null;
    }

    /**
     * Get a set of result ids for the given list of paths
     * @param path
     * @return
     */
    protected Set<Integer> getResultIds(List<String> path) {
        String resultLabel = path.get(path.size() - 1);
        return pages.get(resultLabel);
    }

    /**
     * Get child cluster JSON structure from parent JSON cluster structure
     * given label of child cluster
     * @param cluster
     * @param label
     * @return
     */
    protected JSONObject getChildCluster(JSONObject cluster, String label) {
        try {
            JSONArray children = cluster.getJSONArray("children");
            for (int i = 0; i < children.length(); i++) {
                String l = children.getJSONObject(i).getString("label");
                if (StringUtils.equalsIgnoreCase(l.trim(), label))
                    return children.getJSONObject(i);
            }
            return null;
        } catch (JSONException e) {
            return null;
        }
    }

    /**
     * Create a new cluster as JSON structure given cluster label
     * @param label
     * @return
     */
    protected JSONObject newCluster(String label) {
        JSONObject c = new JSONObject();
        try {
            c.put("id", nextClusterId++);
            c.put("label", label);
            c.put("pages", JSONObject.NULL);
            c.put("children", JSONObject.NULL);
            c.put("addedPath", true);
        } catch (JSONException e) {
            /* do nothing */ }

        return c;
    }

    /**
     * Add path from cluster edit
     * @param edit
     */
    protected void addPath(ClusterEdit edit) {
        List<String> path = getPathList(edit);
        Set<Integer> pageIds = getResultIds(path);
        if (pageIds == null)
            return;

        JSONObject c = root;
        for (int i = 0; i < path.size() - 1; i++) { //path contains result label too
            JSONObject child = getChildCluster(c, path.get(i));
            if (child == null) {
                //if child cluster does not exist we add it
                child = newCluster(path.get(i));
                try {
                    JSONArray children = c.getJSONArray("children");
                    children.put(child);
                } catch (JSONException e) {
                    JSONArray children = new JSONArray();
                    try {
                        children.put(child);
                        c.put("children", children);
                    } catch (JSONException e1) {
                        /* do nothing */ }
                }
            }
            c = child;
        }
        JSONArray pages = null;
        try {
            pages = c.getJSONArray("pages");
        } catch (JSONException e) {
            pages = new JSONArray();
        }
        boolean found = false;
        for (int j = 0; j < pages.length(); j++) {
            try {
                if (pageIds.contains(pages.getInt(j)))
                    found = true;
            } catch (JSONException e) {
                /* do nothing */ }
        }

        if (!found) {
            Integer pageId = pageIds.iterator().next();
            pages.put(pageId);
            //get cluster id
            String cid;
            try {
                cid = c.getString("id");
            } catch (JSONException e) {
                cid = null;
            }
            //update indexes for page that was added
            Set<String> clusters = pageClusters.get(pageId);
            clusters.add(cid);
            pageClusters.put(pageId, clusters);

            //signal added pages
            JSONArray addedPages = null;
            try {
                addedPages = c.getJSONArray("addedPages");
            } catch (JSONException e) {
                addedPages = new JSONArray();
            }
            addedPages.put(pageId);
            try {
                c.put("pages", pages);
                c.put("addedPages", addedPages);
                c.put("addedPath", true);
            } catch (JSONException e) {
                /* do nothing */ }
        }
    }

    /**
     * Remove path from cluster
     * @param edit
     */
    protected void subtractPath(ClusterEdit edit) {
        List<String> path = getPathList(edit);
        Set<Integer> pageIds = getResultIds(path);
        if (pageIds == null)
            return;

        //page cannot be removed if there is only one copy in cluster
        Set<Integer> validPages = new HashSet<Integer>();
        for (Integer pageId : pageIds) {
            Set<String> clusters = pageClusters.get(pageId);
            if (clusters != null && clusters.size() > 1)
                validPages.add(pageId);
        }
        if (validPages.size() == 0)
            return; //no page ids that can be removed

        JSONObject c = root;
        for (int i = 0; i < path.size() - 1; i++) { //path contains result label too
            c = getChildCluster(c, path.get(i));
            if (c == null)
                return; //path does not exist
        }
        try {
            JSONArray pages = c.getJSONArray("pages");
            JSONArray p = new JSONArray();
            for (int j = 0; j < pages.length(); j++) {
                Integer pageId = pages.getInt(j);
                if (!validPages.contains(pageId))
                    p.put(pageId);
                else {
                    //update indexes of removed pages
                    Set<String> clusters = pageClusters.get(pageId);
                    clusters.remove(c.getString("id"));
                    pageClusters.put(pageId, clusters);
                }
            }
            c.put("pages", p);
            if (p.length() == 0)
                deleteNode(path.subList(0, path.size() - 1));
        } catch (JSONException e) {
            /* do nothing */ }
    }

    /**
     * Delete a node in the cluster structure associated with given path
     * @param path
     */
    protected void deleteNode(List<String> path) {

        JSONObject c = root;
        JSONObject parent = null;
        for (int i = 0; i < path.size(); i++) {
            parent = c;
            c = getChildCluster(c, path.get(i));
            if (c == null)
                return; //path does not exist
        }
        if (parent != null)
            try {
                try {
                    JSONArray pages = c.getJSONArray("pages");
                    if (pages != null && pages.length() > 0)
                        return;
                } catch (JSONException e) {
                    /* do nothing */ }
                JSONArray children = null;
                try {
                    children = c.getJSONArray("children");
                    if (children != null && children.length() > 0)
                        return;
                } catch (JSONException e) {
                    /* do nothing */ }
                //trim node from parent's children
                children = parent.getJSONArray("children");
                JSONArray newch = new JSONArray();
                for (int j = 0; j < children.length(); j++)
                    if (!StringUtils.equals(children.getJSONObject(j).getString("id"), c.getString("id")))
                        newch.put(children.getJSONObject(j));
                parent.put("children", newch);
                if (newch.length() == 0) {
                    if (path.size() > 1)
                        deleteNode(path.subList(0, path.size() - 1));
                }

            } catch (JSONException e) {
                System.out.println(e.getMessage());
            }

    }
}