org.ossmeter.metricprovider.trans.newsgroups.threads.Threader.java Source code

Java tutorial

Introduction

Here is the source code for org.ossmeter.metricprovider.trans.newsgroups.threads.Threader.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ossmeter.metricprovider.trans.newsgroups.threads;

/**
 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
 *
 * @author rwinston <rwinston@checkfree.com>
 *
 */

import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.net.nntp.Threadable;

public class Threader {
    private ThreadContainer root;
    private HashMap<String, ThreadContainer> idTable;
    private int bogusIdCount = 0;

    /**
     * The client passes in a list of Threadable objects, and
     * the Threader constructs a connected 'graph' of messages
     * @param messages list of messages to thread
     * @return null if messages == null or root.child == null
     * @since 2.2
     */
    public Threadable thread(List<? extends Threadable> messages) {
        return thread((Iterable<? extends Threadable>) messages);
    }

    /**
     * The client passes in a list of Iterable objects, and
     * the Threader constructs a connected 'graph' of messages
     * @param messages iterable of messages to thread
     * @return null if messages == null or root.child == null
     * @since 3.0
     */
    public Threadable thread(Iterable<? extends Threadable> messages) {
        if (messages == null) {
            return null;
        }

        idTable = new HashMap<String, ThreadContainer>();

        // walk through each Threadable element
        for (Threadable t : messages) {
            if (!t.isDummy()) {
                buildContainer(t);
            }
        }

        root = findRootSet();
        idTable.clear();
        idTable = null;

        pruneEmptyContainers(root);

        root.reverseChildren();
        gatherSubjects();

        if (root.next != null) {
            throw new RuntimeException("root node has a next:" + root);
        }

        for (ThreadContainer r = root.child; r != null; r = r.next) {
            if (r.threadable == null) {
                r.threadable = r.child.threadable.makeDummy();
            }
        }

        Threadable result = (root.child == null ? null : root.child.threadable);
        root.flush();
        root = null;

        return result;
    }

    /**
     *
     * @param threadable
     */
    private void buildContainer(Threadable threadable) {
        String id = threadable.messageThreadId();
        ThreadContainer container = idTable.get(id);

        // A ThreadContainer exists for this id already. This should be a forward reference, but may
        // be a duplicate id, in which case we will need to generate a bogus placeholder id
        if (container != null) {
            if (container.threadable != null) { // oops! duplicate ids...
                id = "<Bogus-id:" + (bogusIdCount++) + ">";
                container = null;
            } else {
                // The container just contained a forward reference to this message, so let's
                // fill in the threadable field of the container with this message
                container.threadable = threadable;
            }
        }

        // No container exists for that message Id. Create one and insert it into the hash table.
        if (container == null) {
            container = new ThreadContainer();
            container.threadable = threadable;
            idTable.put(id, container);
        }

        // Iterate through all of the references and create ThreadContainers for any references that
        // don't have them.
        ThreadContainer parentRef = null;
        {
            String[] references = threadable.messageThreadReferences();
            for (String refString : references) {
                ThreadContainer ref = idTable.get(refString);

                // if this id doesnt have a container, create one
                if (ref == null) {
                    ref = new ThreadContainer();
                    idTable.put(refString, ref);
                }

                // Link references together in the order they appear in the References: header,
                // IF they dont have a have a parent already &&
                // IF it will not cause a circular reference
                if ((parentRef != null) && (ref.parent == null) && (parentRef != ref)
                        && !(ref.findChild(parentRef))) {
                    // Link ref into the parent's child list
                    ref.parent = parentRef;
                    ref.next = parentRef.child;
                    parentRef.child = ref;
                }
                parentRef = ref;
            }
        }

        // parentRef is now set to the container of the last element in the references field. make that
        // be the parent of this container, unless doing so causes a circular reference
        if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
            parentRef = null;
        }

        // if it has a parent already, its because we saw this message in a References: field, and presumed
        // a parent based on the other entries in that field. Now that we have the actual message, we can
        // throw away the old parent and use this new one
        if (container.parent != null) {
            ThreadContainer rest, prev;

            for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
                if (rest == container) {
                    break;
                }
            }

            if (rest == null) {
                throw new RuntimeException("Didnt find " + container + " in parent" + container.parent);
            }

            // Unlink this container from the parent's child list
            if (prev == null) {
                container.parent.child = container.next;
            } else {
                prev.next = container.next;
            }

            container.next = null;
            container.parent = null;
        }

        // If we have a parent, link container into the parents child list
        if (parentRef != null) {
            container.parent = parentRef;
            container.next = parentRef.child;
            parentRef.child = container;
        }
    }

    /**
     * Find the root set of all existing ThreadContainers
     * @return root the ThreadContainer representing the root node
     */
    private ThreadContainer findRootSet() {
        ThreadContainer root = new ThreadContainer();
        Iterator<String> iter = idTable.keySet().iterator();

        while (iter.hasNext()) {
            Object key = iter.next();
            ThreadContainer c = idTable.get(key);
            if (c.parent == null) {
                if (c.next != null) {
                    throw new RuntimeException("c.next is " + c.next.toString());
                }
                c.next = root.child;
                root.child = c;
            }
        }
        return root;
    }

    /**
     * Delete any empty or dummy ThreadContainers
     * @param parent
     */
    private void pruneEmptyContainers(ThreadContainer parent) {
        ThreadContainer container, prev, next;
        for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = (container == null
                ? null
                : container.next)) {

            // Is it empty and without any children? If so,delete it
            if (container.threadable == null && container.child == null) {
                if (prev == null) {
                    parent.child = container.next;
                } else {
                    prev.next = container.next;
                }

                // Set container to prev so that prev keeps its same value the next time through the loop
                container = prev;
            }

            // Else if empty, with kids, and (not at root or only one kid)
            else if (container.threadable == null && container.child != null
                    && (container.parent != null || container.child.next == null)) {
                // We have an invalid/expired message with kids. Promote the kids to this level.
                ThreadContainer tail;
                ThreadContainer kids = container.child;

                // Remove this container and replace with 'kids'.
                if (prev == null) {
                    parent.child = kids;
                } else {
                    prev.next = kids;
                }

                // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
                // i.e. splice kids into the list in place of container
                for (tail = kids; tail.next != null; tail = tail.next) {
                    tail.parent = container.parent;
                }

                tail.parent = container.parent;
                tail.next = container.next;

                // next currently points to the item after the inserted items in the chain - reset that so we process the newly
                // promoted items next time round
                next = kids;

                // Set container to prev so that prev keeps its same value the next time through the loop
                container = prev;
            } else if (container.child != null) {
                // A real message , with kids
                // Iterate over the children
                pruneEmptyContainers(container);
            }
        }
    }

    /**
     *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
     */
    private void gatherSubjects() {

        int count = 0;

        for (ThreadContainer c = root.child; c != null; c = c.next) {
            count++;
        }

        // TODO verify this will avoid rehashing
        HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2),
                (float) 0.9);
        count = 0;

        for (ThreadContainer c = root.child; c != null; c = c.next) {
            Threadable threadable = c.threadable;

            // No threadable? If so, it is a dummy node in the root set.
            // Only root set members may be dummies, and they alway have at least 2 kids
            // Take the first kid as representative of the subject
            if (threadable == null) {
                threadable = c.child.threadable;
            }

            String subj = threadable.simplifiedSubject();

            if (subj == null || subj.length() == 0) {
                continue;
            }

            ThreadContainer old = subjectTable.get(subj);

            // Add this container to the table iff:
            // - There exists no container with this subject
            // - or this is a dummy container and the old one is not - the dummy one is
            // more interesting as a root, so put it in the table instead
            // - The container in the table has a "Re:" version of this subject, and
            // this container has a non-"Re:" version of this subject. The non-"Re:" version
            // is the more interesting of the two.
            if (old == null || (c.threadable == null && old.threadable != null) || (old.threadable != null
                    && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply())) {
                subjectTable.put(subj, c);
                count++;
            }
        }

        // If the table is empty, we're done
        if (count == 0) {
            return;
        }

        // subjectTable is now populated with one entry for each subject which occurs in the
        // root set. Iterate over the root set, and gather together the difference.
        ThreadContainer prev, c, rest;
        for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = (rest == null ? null
                : rest.next)) {
            Threadable threadable = c.threadable;

            // is it a dummy node?
            if (threadable == null) {
                threadable = c.child.threadable;
            }

            String subj = threadable.simplifiedSubject();

            // Dont thread together all subjectless messages
            if (subj == null || subj.length() == 0) {
                continue;
            }

            ThreadContainer old = subjectTable.get(subj);

            if (old == c) { // That's us
                continue;
            }

            // We have now found another container in the root set with the same subject
            // Remove the "second" message from the root set
            if (prev == null) {
                root.child = c.next;
            } else {
                prev.next = c.next;
            }
            c.next = null;

            if (old.threadable == null && c.threadable == null) {
                // both dummies - merge them
                ThreadContainer tail;
                for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
                    // do nothing
                }

                if (tail != null) { // protect against possible NPE
                    tail.next = c.child;
                }

                for (tail = c.child; tail != null; tail = tail.next) {
                    tail.parent = old;
                }

                c.child = null;
            } else if (old.threadable == null || (c.threadable != null && c.threadable.subjectIsReply()
                    && !old.threadable.subjectIsReply())) {
                // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
                c.parent = old;
                c.next = old.child;
                old.child = c;
            } else {
                // else make the old and new messages be children of a new dummy container.
                // We create a new container object for old.msg and empty the old container
                ThreadContainer newc = new ThreadContainer();
                newc.threadable = old.threadable;
                newc.child = old.child;

                for (ThreadContainer tail = newc.child; tail != null; tail = tail.next) {
                    tail.parent = newc;
                }

                old.threadable = null;
                old.child = null;

                c.parent = old;
                newc.parent = old;

                // Old is now a dummy- give it 2 kids , c and newc
                old.child = c;
                c.next = newc;
            }
            // We've done a merge, so keep the same prev
            c = prev;
        }

        subjectTable.clear();
        subjectTable = null;

    }

    // DEPRECATED METHODS - for API compatibility only - DO NOT USE

    /**
     * The client passes in an array of Threadable objects, and
     * the Threader constructs a connected 'graph' of messages
     * @param messages array of messages to thread
     * @return null if messages == null or root.child == null
     * @deprecated (2.2) prefer {@link #thread(List)}
     */
    @Deprecated
    public Threadable thread(Threadable[] messages) {
        return thread(java.util.Arrays.asList(messages));
    }

}