org.fnlp.nlp.similarity.train.WordClusterM.java Source code

Java tutorial

Introduction

Here is the source code for org.fnlp.nlp.similarity.train.WordClusterM.java

Source

/**
*  This file is part of FNLP (formerly FudanNLP).
*  
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
*  
*  Copyright 2009-2014 www.fnlp.org. All rights reserved. 
*/

package org.fnlp.nlp.similarity.train;

import gnu.trove.iterator.TIntIterator;

import java.util.Date;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;

/**
 * Brown ??
 * @author xpqiu
 * @since FudanNLP 1.5
 */
public class WordClusterM extends WordCluster {

    private static final long serialVersionUID = 58160232476872689L;
    transient int numThread = 4;
    transient private ExecutorService pool;
    transient float maxL;
    transient int maxc1;
    transient int maxc2;
    transient AtomicInteger count = new AtomicInteger();

    public WordClusterM(int threads) {
        this.numThread = threads;
        pool = Executors.newFixedThreadPool(numThread);
    }

    public synchronized void getmax(float f, int i, int j) {
        if (f > maxL) {
            maxL = f;
            maxc1 = i;
            maxc2 = j;
        }
    }

    class Multiplesolve implements Runnable {

        int c1, c2;

        public Multiplesolve(int c1, int c2) {
            this.c1 = c1;
            this.c2 = c2;
        }

        @Override
        public void run() {
            float l = calcL(c1, c2);
            getmax(l, c1, c2);
            count.decrementAndGet();
        }

    }

    /**
     * merge clusters
     */
    public void mergeCluster() {
        maxc1 = -1;
        maxc2 = -1;
        maxL = Float.NEGATIVE_INFINITY;
        TIntIterator it1 = slots.iterator();

        while (it1.hasNext()) {
            int i = it1.next();
            TIntIterator it2 = slots.iterator();
            //         System.out.print(i+": ");
            while (it2.hasNext()) {
                int j = it2.next();

                if (i >= j)
                    continue;
                //            System.out.print(j+" ");
                Multiplesolve c = new Multiplesolve(i, j);
                count.incrementAndGet();
                pool.execute(c);
            }
            //         System.out.println();
        }

        while (count.get() != 0) {//?  
            try {
                Thread.sleep(slotsize * slotsize / 1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        merge(maxc1, maxc2);
    }

    /**
     * @param args
     * @throws Exception 
     */
    public static void main(String[] args) throws Exception {

        /**
         * ??
         */
        Options opt = new Options();

        opt.addOption("path", true, "?");
        opt.addOption("res", true, "?");
        opt.addOption("slot", true, "?");
        opt.addOption("thd", true, "");

        BasicParser parser = new BasicParser();
        CommandLine cl;
        try {
            cl = parser.parse(opt, args);
        } catch (Exception e) {
            System.err.println("Parameters format error");
            return;
        }

        int threads = Integer.parseInt(cl.getOptionValue("thd", "3"));
        System.out.println("?:" + threads);

        int slotsize = Integer.parseInt(cl.getOptionValue("slot", "20"));
        System.out.println("?:" + slotsize);

        String file = cl.getOptionValue("path", "./tmp/SogouCA.mini.txt");
        System.out.println("?:" + file);

        String resfile = cl.getOptionValue("res", "./tmp/cluster.txt");
        System.out.println(":" + resfile);

        long starttime = System.currentTimeMillis();
        SougouCA sca = new SougouCA(file);

        WordClusterM wc = new WordClusterM(threads);
        wc.slotsize = slotsize;
        wc.read(sca);

        wc.startClustering();
        wc.saveModel(resfile + ".m");
        wc.saveTxt(resfile);
        wc = (WordClusterM) WordCluster.loadFrom(resfile + ".m");
        wc.saveTxt(resfile + "1");
        long endtime = System.currentTimeMillis();
        System.out.println("Total Time:" + (endtime - starttime) / 60000);
        System.out.println("Done");
        System.exit(0);
    }
}