org.apache.hadoop.hashtable.HashTableBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hashtable.HashTableBenchmark.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hashtable;

import java.util.HashMap;
import java.util.Map;
import java.io.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class HashTableBenchmark {
    private static final Log LOG = LogFactory.getLog(HashTableBenchmark.class);

    private int capacity = 64 * 1024 * 1024;
    private int maxsize = 90000000;
    private Long[] ids;
    private LongInfo[] idsLI;
    private int divider = 100000000;

    private int NUM_NODES = 0;
    private int hash_mask = capacity - 1;
    private String blockFile;
    private boolean linkedElements = false;
    private RandomGen rg;

    public HashTableBenchmark(String filename, int which, int capacity, int count, boolean linkedElements) {
        this.capacity = capacity;
        this.hash_mask = capacity - 1;
        this.maxsize = count;
        this.blockFile = filename;
        this.linkedElements = linkedElements;
        switch (which) {
        case 0:
            readBlockFile();
            break;
        default:
            rg = new RandomGen(which);
            generateRandom();
        }
    }

    private String getHistogram(int[] entries) {
        Map<Integer, Integer> hist = new HashMap<Integer, Integer>();

        for (int i = 0; i < entries.length; i++) {
            Integer count = hist.get(entries[i]);
            if (count == null) {
                hist.put(entries[i], 1);
            } else {
                hist.put(entries[i], count + 1);
            }
        }
        return "HISTOGRAM: entriesLen: " + entries.length + " -- " + hist.toString();
    }

    // //////////////// READ + RANDOM GENERATORS

    private void readBlockFile() {
        try {
            LOG.info("----> READ BLOCK FILE : START");
            initArray();
            FileInputStream fstream = new FileInputStream(blockFile);
            DataInputStream in = new DataInputStream(fstream);
            BufferedReader br = new BufferedReader(new InputStreamReader(new DataInputStream(fstream)));
            String strLine;
            NUM_NODES = 0;
            while ((strLine = br.readLine()) != null) {
                if (NUM_NODES % divider == 0)
                    LOG.info("Processed : " + NUM_NODES);
                updateArray(NUM_NODES, Long.parseLong(strLine));
                NUM_NODES++;
            }
            in.close();
            LOG.info("----> READ BLOCK FILE : DONE: Read " + NUM_NODES + " block ids");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void generateRandom() {
        initArray();
        long start, stop;
        LOG.info("---------->GENERATING RANDOM IDS ---------->");
        start = System.currentTimeMillis();
        for (int i = 0; i < maxsize; i++) {
            updateArray(i, rg.next());
        }
        NUM_NODES = maxsize;
        stop = System.currentTimeMillis();
        LOG.info("---------->GENERATING RANDOM IDS DONE -- TIME: " + ((stop - start) / 1000.0) + " GENRATED: "
                + NUM_NODES + " ids ");
    }

    // //////////////////////////////////////////////////////////

    private void initArray() {
        if (linkedElements) {
            idsLI = new LongInfo[maxsize];
        } else {
            ids = new Long[maxsize];
        }
    }

    private void updateArray(int i, long id) {
        if (linkedElements) {
            idsLI[i] = new LongInfo(id);
        } else {
            ids[i] = new Long(id);
        }
    }

    // //////////////////////////////////////////////////////////

    public void testMultiHashing(int mode) {
        LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++");
        LOG.info("-------------------->MULTIHASHING------------------->");
        long start, stop;
        THashSet c = null;
        if (mode == 0) {
            c = new QuadHash(capacity, 0);
            LOG.info("LINEAR COLLISION RESOLUTION");
        } else if (mode == 1) {
            c = new QuadHash(capacity, 1);
            LOG.info("QUAD COLLISION RESOLUTION");
        } else if (mode == 2) {
            c = new DoubleHash(capacity);
            LOG.info("DOUBLE HASH COLLISION RESOLUTION");
        } else if (mode == 3) {
            c = new CuckooHash(capacity);
            LOG.info("CUCKOO HASH COLLISION RESOLUTION");
        }

        start = System.currentTimeMillis();
        for (int i = 0; i < NUM_NODES; i++) {
            c.put(ids[i]);
        }
        stop = System.currentTimeMillis();

        LOG.info("--------------->MULTIHASHING PUT DONE--------------->");
        LOG.info(" TIME: " + ((stop - start) / 1000.0));
        LOG.info(" FAILED : " + c.getFailed());

        start = System.currentTimeMillis();
        int present = 0;
        for (int i = 0; i < NUM_NODES; i++) {
            Long getElem = c.get(ids[i]);
            if (getElem != null && getElem.equals(ids[i])) {
                present++;
            }
        }

        stop = System.currentTimeMillis();
        LOG.info("--------------->MULTIHASHING GET DONE--------------->");
        LOG.info(" TIME: " + ((stop - start) / 1000.0));
        LOG.info(" NOT PRESENT: " + (NUM_NODES - present));
    }

    public void testLightweightSetHashing(int mode) {
        LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++");
        LOG.info("------------------>LIGHTWEIGHTGSET------------------>");
        long start, stop;
        LightWeightSet c = null;
        if (mode == 0) {
            c = new LightWeightGSet(capacity);
            LOG.info("SET VERSION: ONE HASH");
        } else {
            c = new LightWeightGSetMulti(capacity);
            LOG.info("SET VERSION: DOUBLE HASH");
        }
        start = System.currentTimeMillis();
        for (int i = 0; i < NUM_NODES; i++) {
            c.put(idsLI[i]);
        }
        stop = System.currentTimeMillis();
        LOG.info("------------->LIGHTWEIGHTGSET PUT DONE-------------->");
        LOG.info(" TIME: " + ((stop - start) / 1000.0));
        start = System.currentTimeMillis();
        int present = 0;

        LongInfo tempi = new LongInfo();
        for (int i = 0; i < NUM_NODES; i++) {
            tempi.setData(idsLI[i].data);
            LongInfo getElem = c.get(tempi);
            if (getElem != null && getElem.equals(tempi))
                present++;
        }

        stop = System.currentTimeMillis();
        LOG.info("------------->LIGHTWEIGHTGSET GET DONE-------------->");
        LOG.info(" TIME: " + ((stop - start) / 1000.0));
        LOG.info(" NOT PRESENT: " + (NUM_NODES - present));
    }

    public void testHashFunctions() {
        long start, stop;
        int[] map;

        LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++");
        for (int hash = 0; hash < 7; hash++) {
            LOG.info("------------------>" + Hashes.getHashDesc(hash) + "------------------>");
            map = new int[capacity];
            start = System.currentTimeMillis();
            for (int i = 0; i < NUM_NODES; i++) {
                map[Hashes.getHash(ids[i], hash) & hash_mask]++;
            }
            stop = System.currentTimeMillis();
            LOG.info("TIME: " + ((stop - start) / 1000.0));
            LOG.info("HIST :" + getHistogram(map));
        }

        LOG.info("================> Double Hashing ================>");

        map = new int[capacity];
        start = System.currentTimeMillis();
        for (int i = 0; i < NUM_NODES; i++) {
            int hash1 = Hashes.getHash32ShiftMul((int) (ids[i].longValue())) & hash_mask;
            if (map[hash1] == 0)
                map[hash1]++;
            else
                map[Hashes.getHash6432shift(ids[i]) & hash_mask]++;
        }
        stop = System.currentTimeMillis();
        LOG.info("TIME: " + ((stop - start) / 1000.0));
        LOG.info("HIST :" + getHistogram(map));

    }
}