info.zhoumin.dat.AbstractDoubleArrayTrie.java Source code

Java tutorial

Introduction

Here is the source code for info.zhoumin.dat.AbstractDoubleArrayTrie.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package info.zhoumin.dat;

import info.zhoumin.dat.analyzer.Analyzer;
import info.zhoumin.dat.util.ResizableIntArray;
import io.netty.buffer.ByteBuf;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * @author Min Zhou (coderplay AT gmail.com)
 */
abstract class AbstractDoubleArrayTrie<K, V> extends AbstractTrie<K, V> {
    private static final long serialVersionUID = -6999526694361413605L;

    /** Maximum trie index value */
    private static final int TRIE_INDEX_MAX = 0x7fffffff;

    private static final int DA_SIGNATURE = 0xDAFCDAFC;

    private static final int TRIE_INDEX_ERROR = 0;

    private static final int FREE_LIST_BEGIN = 1;

    private static final int DOUBLE_ARRAY_ROOT = 2;

    private static final int DA_POOL_BEGIN = 3;

    protected ResizableIntArray base;

    protected ResizableIntArray check;

    protected Tail tail;

    protected boolean isDirty;

    AbstractDoubleArrayTrie(Analyzer<? super K> keyAnalyzer) {
        super(keyAnalyzer);
        this.base = new ResizableIntArray(3);
        this.check = new ResizableIntArray(3);

        /*
         * DA Header: - Cell 0: SIGNATURE, number of cells - Cell 1: free
         * circular-list pointers - Cell 2: root node - Cell 3: DA pool begin
         */
        base.set(0, DA_SIGNATURE);
        check.set(0, DA_POOL_BEGIN);
        base.set(1, -1);
        check.set(1, -1);
        base.set(2, DA_POOL_BEGIN);
        check.set(2, 0);

        this.tail = new Tail();

        this.isDirty = true;
    }

    private int getCheck(int s) {
        return (s < check.capacity()) ? check.get(s) : TRIE_INDEX_ERROR;
    }

    private boolean daIsSeperate(int s) {
        return base.get(s) < 0;
    }

    /**
     * Walk the double-array trie from state s, using input character c. If there
     * exists an edge from @a s with arc labeled @a c, this function returns TRUE
     * and @a *s is updated to the new state. Otherwise, it returns FALSE and @a
     * *s is left unchanged.
     * 
     * @param s current state
     * @param c the input character
     * 
     * @return boolean indicating success
     */
    private int getNext(int s, int c) {
        int next = base.get(s) + c;
        if (next < check.capacity() && check.get(next) == s) {
            return next;
        }
        return -1;
    }

    private boolean branchInBranch(int sepNode, int c, ByteBuf suffix, V data) {
        int newDA = insertBranch(sepNode, c);

        int newTail = tail.addSuffix(suffix);
        // tail_set_data (trie->tail, new_tail, data);
        base.set(newDA, -newTail);

        this.isDirty = true;
        return true;
    }

    private boolean branchInTail(int sepNode, int c, ByteBuf suffix, V data) {
        /* adjust separate point in old path */
        int oldTail = getTailIndex(sepNode);
        ByteBuf oldSuffix = tail.getSuffix(oldTail);
        if (oldSuffix == null)
            return false;

        byte oldByte, newByte;
        int s = sepNode;
        suffix.resetReaderIndex();
        oldSuffix.resetReaderIndex();
        do {
            newByte = suffix.readByte();
            oldByte = oldSuffix.readByte();
            if (newByte != oldByte)
                break;
            int t = insertBranch(s, newByte + 1);
            if (TRIE_INDEX_ERROR == t) {
                // /* failed, undo previous insertions and return error */
                // da_prune_upto (trie->da, sep_node, s);
                // trie_da_set_tail_index (trie->da, sep_node, old_tail);
                // throw new RuntimeException("error happened!");
                return false;
            }
            s = t;
        } while (suffix.isReadable() && oldSuffix.isReadable());

        int oldDA = insertBranch(s, oldByte + 1);
        if (TRIE_INDEX_ERROR == oldDA) {
            // /* failed, undo previous insertions and return error */
            // da_prune_upto (trie->da, sep_node, s);
            // trie_da_set_tail_index (trie->da, sep_node, old_tail);
            // throw new RuntimeException("error happened!");
            return false;
        }

        tail.setSuffix(oldTail, oldSuffix.discardReadBytes().copy());
        setTailIndex(oldDA, oldTail);

        /* insert the new branch at the new separate point */
        return branchInBranch(s, newByte + 1, suffix.discardReadBytes().copy(), data);
    }

    /**
     * @brief Insert a branch from trie node
     * 
     * @param d : the double-array structure
     * @param s : the state to add branch to
     * @param c : the character for the branch label
     * 
     * @return the index of the new node
     * 
     *         Insert a new arc labelled with character @a c from the trie node
     *         represented by index @a s in double-array structure @a d. Note that
     *         it assumes that no such arc exists before inserting.
     */
    private int insertBranch(int s, int c) {
        int bs = base.get(s);
        int next;
        if (bs > 0) {
            next = bs + c;
            /* if already there, do not actually insert */
            if (next < check.capacity() && check.get(next) == s)
                return next;

            /*
             * if (base + c) > TRIE_INDEX_MAX which means 'next' is overflow, or cell
             * [next] is not free, relocate to a free slot
             */
            if (bs > TRIE_INDEX_MAX - c || !checkFreeCell(next)) {
                List<Integer> symbols = outputSymbols(s);
                int insertIndex = Collections.binarySearch(symbols, c);
                if (insertIndex < 0)
                    symbols.add(-(insertIndex + 1), c);
                int newBase = findFreeBase(symbols);

                if (TRIE_INDEX_ERROR == newBase)
                    return TRIE_INDEX_ERROR;

                relocateBase(s, newBase);
                next = newBase + c;
            }
        } else {
            List<Integer> symbols = new ArrayList<Integer>();
            symbols.add(c);
            int newBase = findFreeBase(symbols);

            if (TRIE_INDEX_ERROR == newBase)
                return TRIE_INDEX_ERROR;

            base.set(s, newBase);
            next = newBase + c;
        }
        allocateCell(next);
        check.set(next, s);
        return next;
    }

    private void relocateBase(int s, int newBase) {

        int oldBase = base.get(s);
        List<Integer> symbols = outputSymbols(s);

        for (int sym : symbols) {

            int oldNext = oldBase + sym;
            int newNext = newBase + sym;
            int oldNextBase = base.get(oldNext);

            /* allocate new next node and copy BASE value */
            allocateCell(newNext);
            check.set(newNext, s);
            base.set(newNext, oldNextBase);

            /*
             * old_next node is now moved to new_next so, all cells belonging to
             * old_next must be given to new_next
             */
            /* preventing the case of TAIL pointer */
            if (oldNextBase > 0) {

                int max = Math.min((1 << Byte.SIZE) + 1, TRIE_INDEX_MAX - oldNextBase);
                for (int c = 1; c < max; c++) {
                    if (check.get(oldNextBase + c) == oldNext)
                        check.set(oldNextBase + c, newNext);
                }
            }

            /* free old_next node */
            freeCell(oldNext);
        }

        /* finally, make BASE[s] point to new_base */
        base.set(s, newBase);
    }

    private List<Integer> outputSymbols(int s) {
        List<Integer> syms = new ArrayList<Integer>();

        int bs = base.get(s);
        int max = Math.min((1 << Byte.SIZE) + 1, TRIE_INDEX_MAX - bs);
        for (int c = 1; c < max; c++) {
            if (getCheck(bs + c) == s)
                syms.add(c);
        }

        return syms;
    }

    private void freeCell(int cell) {
        /* find insertion point */
        int i = -check.get(FREE_LIST_BEGIN);
        while (i != FREE_LIST_BEGIN && i < cell)
            i = -check.get(i);

        int prev = -base.get(i);

        /* insert cell before i */
        check.set(cell, -i);
        base.set(cell, -prev);
        check.set(prev, -cell);
        base.set(i, -cell);
    }

    private int findFreeBase(List<Integer> symbols) {
        /* find first free cell that is beyond the first symbol */
        int firstSym = symbols.get(0);
        int s = -check.get(FREE_LIST_BEGIN);
        while (s != FREE_LIST_BEGIN && s < (firstSym + DA_POOL_BEGIN)) {
            s = -check.get(s);
        }
        if (s == FREE_LIST_BEGIN) {
            for (s = firstSym + DA_POOL_BEGIN;; ++s) {
                if (!extendDoubleArray(s))
                    return TRIE_INDEX_ERROR;
                if (check.get(s) < 0)
                    break;
            }
        }

        /* search for next free cell that fits the symbols set */
        while (!fitSymbols(s - firstSym, symbols)) {
            /* extend pool before getting exhausted */
            if (-check.get(s) == FREE_LIST_BEGIN) {
                if (!extendDoubleArray(base.capacity()))
                    return TRIE_INDEX_ERROR;
            }

            s = -check.get(s);
        }

        return s - firstSym;
    }

    private boolean fitSymbols(int base, List<Integer> symbols) {
        for (int sym : symbols) {
            /*
             * if (base + sym) > TRIE_INDEX_MAX which means it's overflow, or
             * cell[base + sym] is not free, the symbol is not fit.
             */
            if (base > TRIE_INDEX_MAX - sym || !checkFreeCell(base + sym))
                return false;
        }
        return true;
    }

    private boolean checkFreeCell(int s) {
        return extendDoubleArray(s) && check.get(s) < 0;
    }

    private boolean extendDoubleArray(int toIndex) {
        if (toIndex <= 0 || TRIE_INDEX_MAX <= toIndex)
            return false;

        if (toIndex < base.capacity())
            return true;

        int newBegin = base.capacity();
        base.capacity(toIndex + 1);
        check.capacity(toIndex + 1);

        /* initialize new free list */
        for (int i = newBegin; i < toIndex; i++) {
            check.set(i, -(i + 1));
            base.set(i + 1, -i);
        }

        /* merge the new circular list to the old */
        int freeTail = -base.get(FREE_LIST_BEGIN);
        check.set(freeTail, -newBegin);
        base.set(newBegin, -freeTail);
        check.set(toIndex, -FREE_LIST_BEGIN);
        base.set(FREE_LIST_BEGIN, -toIndex);

        return true;
    }

    private void allocateCell(int cell) {
        int prev = -base.get(cell);
        int next = -check.get(cell);

        /* remove the cell from free list */
        check.set(prev, -next);
        base.set(next, -prev);
    }

    @Override
    public V put(K key, V value) {
        if (key == null) {
            throw new NullPointerException("Key cannot be null");
        }

        int s = DOUBLE_ARRAY_ROOT;
        keyAnalyzer.setValue(key);
        while (keyAnalyzer.hasNext()) {
            if (daIsSeperate(s))
                break;
            int c = (int) keyAnalyzer.next() + 1;
            int next = getNext(s, c);
            if (next < 0) {
                branchInBranch(s, c, keyAnalyzer.rest(), value);
                return value;
            }
            s = next;
        }

        /* walk through tail */
        ByteBuf suffix = tail.getSuffix(getTailIndex(s));
        suffix.resetReaderIndex();
        ByteBuf tailBytes = keyAnalyzer.rest();

        int c = (keyAnalyzer.hasNext() ? keyAnalyzer.next() : 0) + 1;
        while (suffix.isReadable() && tailBytes.isReadable()) {
            byte b = suffix.readByte();
            if (b != tailBytes.readByte()) {
                branchInTail(s, c, tailBytes, value);
                return value;
            }
        }

        // , ?
        this.isDirty = true;
        return null;
    }

    private int getTailIndex(int s) {
        return -base.get(s);
    }

    private void setTailIndex(int s, int v) {
        base.set(s, -v);
    }

    @Override
    public V get(Object key) {
        // if (key == null) {
        // throw new NullPointerException("Key cannot be null");
        // }
        //
        // int s = getRoot();
        // keyAnalyzer.setValue(key);
        // while(keyAnalyzer.hasNext()) {
        // int c = (int) keyAnalyzer.next() + 1;
        // if((s = walkDoubleArray(s, c)) < 0) {
        //
        // }
        // }
        return null;
    }

}