com.weblyzard.lib.string.nilsimsa.Nilsimsa.java Source code

Java tutorial

Introduction

Here is the source code for com.weblyzard.lib.string.nilsimsa.Nilsimsa.java

Source

package com.weblyzard.lib.string.nilsimsa;

import java.util.*;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.ArrayUtils;

/**
 * Computes the Nilsimsa hash for the given string.
 * @author Albert Weichselbraun <albert.weichselbraun@htwchur.ch>
 *                              <weichselbraun@weblyzard.com>
 * 
 * This class is a translation of the Python implementation by Michael Itz
 * to the Java language <http://code.google.com/p/py-nilsimsa>.
 * 
 * Original C nilsimsa-0.2.4 implementation by cmeclax:
 * <http://ixazon.dynip.com/~cmeclax/nilsimsa.html>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 3 dated June, 2007.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program;  if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
 * MA 02110-1301, USA.
 * 
 */
public class Nilsimsa {

    private int count = 0; // num characters seen
    private int[] acc = new int[256]; // accumulators for computing the digest
    private int[] lastch = new int[4]; // the last four seen characters

    // pre-defined transformation arrays
    private static final byte[] TRAN = Nilsimsa
            ._getByteArray("02D69E6FF91D04ABD022161FD873A1AC" + "3B7062961E6E8F399D05144AA6BEAE0E"
                    + "CFB99C9AC76813E12DA4EB518D646B50" + "23800341ECBB71CC7A867F98F2365EEE"
                    + "8ECE4FB832B65F59DC1B314C7BF06301" + "6CBA07E81277493CDA46FE2F791C9B30"
                    + "E300067E2E0F383321ADA554CAA729FC" + "5A47697DC595B5F40B90A3816D255535"
                    + "F575740A26BF195C1AC6FF995D84AA66" + "3EAF78B32043C1ED24EAE63F18F3A042"
                    + "57085360C3C0834082D709BD442A67A8" + "93E0C2569FD9DD8515B48A27289276DE"
                    + "EFF8B2B7C93D45944B110D65D5348B91" + "0CFA87E97C5BB14DE5D4CB10A21789BC"
                    + "DBB0E2978852F748D3612C3A2BD18CFB" + "F1CDE46AE7A9FDC437C8D2F6DF58724E");

    // pre-defined array for the computation of the bitwise difference
    // between two nilsimsa strings.
    private static final byte[] POPC = Nilsimsa
            ._getByteArray("00010102010202030102020302030304" + "01020203020303040203030403040405"
                    + "01020203020303040203030403040405" + "02030304030404050304040504050506"
                    + "01020203020303040203030403040405" + "02030304030404050304040504050506"
                    + "02030304030404050304040504050506" + "03040405040505060405050605060607"
                    + "01020203020303040203030403040405" + "02030304030404050304040504050506"
                    + "02030304030404050304040504050506" + "03040405040505060405050605060607"
                    + "02030304030404050304040504050506" + "03040405040505060405050605060607"
                    + "03040405040505060405050605060607" + "04050506050606070506060706070708");

    public Nilsimsa() {
        reset();
    }

    /**
     * Updates the Nilsimsa digest using the given String
     * @param s: the String data to consider in the update
     */
    public void update(String s) {
        for (int ch : s.toCharArray()) {
            count++;

            // incr accumulators for triplets
            if (lastch[1] > -1) {
                acc[_tran3(ch, lastch[0], lastch[1], 0)]++;
            }
            if (lastch[2] > -1) {
                acc[_tran3(ch, lastch[0], lastch[2], 1)]++;
                acc[_tran3(ch, lastch[1], lastch[2], 2)]++;
            }
            if (lastch[3] > -1) {
                acc[_tran3(ch, lastch[0], lastch[3], 3)]++;
                acc[_tran3(ch, lastch[1], lastch[3], 4)]++;
                acc[_tran3(ch, lastch[2], lastch[3], 5)]++;
                acc[_tran3(lastch[3], lastch[0], ch, 6)]++;
                acc[_tran3(lastch[3], lastch[2], ch, 7)]++;
            }

            // adjust lastch
            for (int i = 3; i > 0; i--) {
                lastch[i] = lastch[i - 1];
            }
            lastch[0] = ch;
        }
    }

    /**
     * resets the Hash computation
     */
    private void reset() {
        count = 0;
        Arrays.fill(acc, (byte) 0);
        Arrays.fill(lastch, -1);
    }

    /*
     * Converts the given hexString to a byte array. 
     * @param hexString: the hexString to convert
     * @return the corresponding byte array
     */
    private static byte[] _getByteArray(String hexString) {
        try {
            return Hex.decodeHex(hexString.toCharArray());
        } catch (DecoderException e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * Accumulator for a transition n between the chars a, b, c
     */
    private int _tran3(int a, int b, int c, int n) {
        int i = (c) ^ TRAN[n];
        return (((TRAN[(a + n) & 255] ^ TRAN[b & 0xff] * (n + n + 1)) + TRAN[i & 0xff]) & 255);
    }

    /**
     * @return the digest for the current Nilsimsa object.
     */
    public byte[] digest() {
        int total = 0;
        int threshold;
        byte[] digest = new byte[32];
        Arrays.fill(digest, (byte) 0);

        if (count == 3) {
            total = 1;
        } else if (count == 4) {
            total = 4;
        } else if (count > 4) {
            total = 8 * count - 28;
        }
        threshold = total / 256;

        for (int i = 0; i < 256; i++) {
            if (acc[i] > threshold) {
                digest[i >> 3] += 1 << (i & 7);
            }
        }
        ArrayUtils.reverse(digest);
        return digest;
    }

    /**
     * @return a String representation of the current state of
     *          the Nilsimsa object. 
     */
    public String hexdigest() {
        return Hex.encodeHexString(digest());
    }

    /**
     * Compute the Nilsimsa digest for the given String.
     * @param s: the String to hash
     * @return the Nilsimsa digest.
     */
    public byte[] digest(String s) {
        reset();
        update(s);
        return digest();
    }

    /**
     * Compute the Nilsimsa hexDigest for the given String.
     * @param s: the String to hash
     * @return the Nilsimsa hexdigest.
     */

    public String hexdigest(String s) {
        return Hex.encodeHexString(digest(s));
    }

    /**
     * Compares a Nilsimsa object to the current one and
     * return the number of bits that differ.
     * @param cmp: the comparison object
     * @return the number of bits the strings differ.
     */
    public int compare(Nilsimsa cmp) {
        byte bits = 0;
        int j;
        byte[] n1 = digest();
        byte[] n2 = cmp.digest();

        for (int i = 0; i < 32; i++) {
            j = 255 & n1[i] ^ n2[i];
            bits += POPC[j];
        }
        return 128 - bits;
    }
}