Example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone

Introduction

In this page you can find the example usage for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone.

Prototype

public DoubleMetaphone() 

Source Link

Document

Creates an instance of this DoubleMetaphone encoder

Usage

From source file:query.Amostra.java

public BlockIndex blocaDadosDaAmostraConsultaCora(CSVSource dataSource) {

    BlockIndex bi2 = new BlockIndex();
    //SoundEx db = new SoundEx();

    DoubleMetaphone db = new DoubleMetaphone();
    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();
        if (encontraKeySelecionada(next.getAttributeValue("id").toString())) {
            String pk = next.getAttributeValue("id").toString();
            String block = next.getAttributeValue("titulo").toString();
            String block2 = next.getAttributeValue("titulo").toString();
            String block3 = next.getAttributeValue("autor").toString();

            //String keyBlock = block;
            String keyBlock = db.doubleMetaphone(block2);
            Vertice v1 = new Vertice(pk, "cora", -1, block3, block2);
            bi2.insertVertice(keyBlock, v1);
            this.dadosEntrada.add(v1);
            //   System.out.println(" Pegou Id   " + next.getAttributeValue("title").toString() );
        }/* w  ww .  ja v  a 2 s  .  co  m*/
    }

    return bi2;

}

From source file:query.Amostra.java

public BlockIndex blocaDadosDaAmostraConsulta(CSVSource dataSource) {

    BlockIndex bi2 = new BlockIndex();
    //SoundEx db = new SoundEx();

    DoubleMetaphone db = new DoubleMetaphone();
    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();
        if (encontraKeySelecionada(next.getAttributeValue("key").toString())) {
            String pk = next.getAttributeValue("pk").toString();
            String block = next.getAttributeValue("title").toString();
            String block2 = next.getAttributeValue("artist").toString();

            String keyBlock = db.encode(block);
            //String keyBlock = db.getSoundEx(block2);
            Vertice v1 = new Vertice(pk, "cd", -1, block, block2);
            bi2.insertVertice(keyBlock, v1);
            this.dadosEntrada.add(v1);
            //   System.out.println(" Pegou Id   " + next.getAttributeValue("title").toString() );
        }/*from  ww  w.j av a  2 s  .  c o  m*/
    }

    return bi2;

}

From source file:query.QueryExperimento.java

public void query() throws FileNotFoundException {
    GlobalConfig.getInstance().setInMemoryObjectThreshold(1000);

    // sets the CSV data source
    CSVSource dataSource = new CSVSource("cd", new File("cd.csv"));
    dataSource.enableHeader();//from w w  w .  j  av  a2  s. c om
    dataSource.addIdAttributes("pk");
    long start = System.currentTimeMillis();

    DoubleMetaphone db = new DoubleMetaphone();
    int achou = 0;
    int nAchou = 0;
    int total = 0;

    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();

        String pk = next.getAttributeValue("pk").toString();
        String block = next.getAttributeValue("artist").toString();

        String keyBlock = db.encode(block);

        boolean clusterId = bi.getId(pk, keyBlock, "cd");
        total++;
        if (clusterId) {
            achou++;

        } else {
            nAchou++;

        }

    }
    System.err.println((System.currentTimeMillis() - start) + " ms");
    System.err.println("total " + total + " n achou " + nAchou + "Achou " + achou);
}

From source file:query.QueryExperimento.java

/**
* 
* @param dataSource//ww w . j a  v a2  s  .c o  m
* @param tamanho porcentagem de elementos que n]ao se deseja guardar informaes
* @return as tuplas que se deseja ter informaes em um blooco que sera processado
*/
public BlockIndex blocaConsultaReduzidaFixa(CSVSource dataSource, int tamanho) {

    BlockIndex bi2 = new BlockIndex();
    DoubleMetaphone db = new DoubleMetaphone();
    int numeroElementos = 0;

    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();
        if (numeroElementos < tamanho) {
            String pk = next.getAttributeValue("pk").toString();
            String block = next.getAttributeValue("title").toString();
            String block2 = next.getAttributeValue("artist").toString();

            String keyBlock = db.encode(block);
            Vertice v1 = new Vertice(pk, "cd", -1, block, block2);
            bi2.insertVertice(keyBlock, v1);
            numeroElementos++;

        } else {

            numeroElementos++;
        }

    }
    System.out.println(" Tamanho bloco " + bi2.getNumeroElementos());
    return bi2;

}

From source file:query.QueryExperimento.java

/**
 * //from w  w w . j a  v  a 2s. co m
 * @param dataSource
 * @param tamanho porcentagem de elementos que n]ao se deseja guardar informaes
 * @return as tuplas que se deseja ter informaes em um blooco que sera processado
 */
public BlockIndex blocaConsultaReduzida(CSVSource dataSource, int tamanho) {

    BlockIndex bi2 = new BlockIndex();
    DoubleMetaphone db = new DoubleMetaphone();
    int numeroElementos = 0;

    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();
        if (numeroElementos % 2 == 0) {
            numeroElementos++;

        } else {
            String pk = next.getAttributeValue("pk").toString();
            String block = next.getAttributeValue("title").toString();
            String block2 = next.getAttributeValue("artist").toString();

            String keyBlock = db.encode(block);
            Vertice v1 = new Vertice(pk, "cd", -1, block, block2);
            bi2.insertVertice(keyBlock, v1);
            numeroElementos++;
        }

    }

    return bi2;

}

From source file:query.QueryExperimento.java

public BlockIndex blocaConsultaCD(CSVSource dataSource) {

    BlockIndex bi2 = new BlockIndex();
    // SoundEx db = new SoundEx();

    DoubleMetaphone db = new DoubleMetaphone();
    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();

        String pk = next.getAttributeValue("pk").toString();
        String block = next.getAttributeValue("title").toString();
        String block2 = next.getAttributeValue("artist").toString();

        String keyBlock = db.doubleMetaphone(block);
        //String keyBlock = db.getSoundEx(block2);
        Vertice v1 = new Vertice(pk, "cd", -1, block, block2);
        bi2.insertVertice(keyBlock, v1);

    }/* w w w .  j  a va 2 s  . c o m*/

    return bi2;

}

From source file:query.QueryExperimento.java

public BlockIndex blocaConsultaCora(CSVSource dataSource) {

    BlockIndex bi2 = new BlockIndex();
    DoubleMetaphone db = new DoubleMetaphone();
    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();

        String pk = next.getAttributeValue("ID").toString();
        JsonValue block = next.getAttributeValue("TITLE");
        //JsonValue block2 =  next.getAttributeValue("title");

        if (block == null) {
            continue;
        } else {/*from   www . ja va 2  s  . com*/
            String block2 = "";
            String keyBlock = db.encode(block.toString());
            Vertice v1 = new Vertice(pk, "cora", -1, block.toString(), block2.toString());
            bi2.insertVertice(keyBlock, v1);

        }

    }
    return bi2;

}

From source file:query.QueryLevensteinExperimentCora.java

public void query() throws FileNotFoundException {
    GlobalConfig.getInstance().setInMemoryObjectThreshold(1000);

    // sets the CSV data source
    CSVSource dataSource = new CSVSource("cora", new File(".csv"));
    dataSource.enableHeader();/* w w  w  .  j  a v a  2 s.  c  o  m*/
    dataSource.addIdAttributes("pk");
    long start = System.currentTimeMillis();

    DoubleMetaphone db = new DoubleMetaphone();
    int achou = 0;
    int nAchou = 0;
    int total = 0;
    for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) {
        DuDeObject next = iterator.next();

        String pk = next.getAttributeValue("pk").toString();
        String block = next.getAttributeValue("artist").toString();

        String keyBlock = db.encode(block);
        boolean clusterId = bi.getId(pk, keyBlock, "cd");
        total++;
        if (clusterId) {
            achou++;
            // System.err.println(" Imprime " + pk + " " + clusterId);

        } else {
            nAchou++;
            //System.err.println(" Imprime " + pk + " " + clusterId);
        }

    }
    System.err.println((System.currentTimeMillis() - start) + " ms");
    System.err.println("total " + total + " n achou " + nAchou + "Achou " + achou);
}