List of usage examples for org.apache.commons.codec.language DoubleMetaphone DoubleMetaphone
public DoubleMetaphone()
From source file:query.Amostra.java
public BlockIndex blocaDadosDaAmostraConsultaCora(CSVSource dataSource) { BlockIndex bi2 = new BlockIndex(); //SoundEx db = new SoundEx(); DoubleMetaphone db = new DoubleMetaphone(); for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); if (encontraKeySelecionada(next.getAttributeValue("id").toString())) { String pk = next.getAttributeValue("id").toString(); String block = next.getAttributeValue("titulo").toString(); String block2 = next.getAttributeValue("titulo").toString(); String block3 = next.getAttributeValue("autor").toString(); //String keyBlock = block; String keyBlock = db.doubleMetaphone(block2); Vertice v1 = new Vertice(pk, "cora", -1, block3, block2); bi2.insertVertice(keyBlock, v1); this.dadosEntrada.add(v1); // System.out.println(" Pegou Id " + next.getAttributeValue("title").toString() ); }/* w ww . ja v a 2 s . co m*/ } return bi2; }
From source file:query.Amostra.java
public BlockIndex blocaDadosDaAmostraConsulta(CSVSource dataSource) { BlockIndex bi2 = new BlockIndex(); //SoundEx db = new SoundEx(); DoubleMetaphone db = new DoubleMetaphone(); for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); if (encontraKeySelecionada(next.getAttributeValue("key").toString())) { String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("title").toString(); String block2 = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); //String keyBlock = db.getSoundEx(block2); Vertice v1 = new Vertice(pk, "cd", -1, block, block2); bi2.insertVertice(keyBlock, v1); this.dadosEntrada.add(v1); // System.out.println(" Pegou Id " + next.getAttributeValue("title").toString() ); }/*from ww w.j av a 2 s . c o m*/ } return bi2; }
From source file:query.QueryExperimento.java
public void query() throws FileNotFoundException { GlobalConfig.getInstance().setInMemoryObjectThreshold(1000); // sets the CSV data source CSVSource dataSource = new CSVSource("cd", new File("cd.csv")); dataSource.enableHeader();//from w w w . j av a2 s. c om dataSource.addIdAttributes("pk"); long start = System.currentTimeMillis(); DoubleMetaphone db = new DoubleMetaphone(); int achou = 0; int nAchou = 0; int total = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); boolean clusterId = bi.getId(pk, keyBlock, "cd"); total++; if (clusterId) { achou++; } else { nAchou++; } } System.err.println((System.currentTimeMillis() - start) + " ms"); System.err.println("total " + total + " n achou " + nAchou + "Achou " + achou); }
From source file:query.QueryExperimento.java
/** * * @param dataSource//ww w . j a v a2 s .c o m * @param tamanho porcentagem de elementos que n]ao se deseja guardar informaes * @return as tuplas que se deseja ter informaes em um blooco que sera processado */ public BlockIndex blocaConsultaReduzidaFixa(CSVSource dataSource, int tamanho) { BlockIndex bi2 = new BlockIndex(); DoubleMetaphone db = new DoubleMetaphone(); int numeroElementos = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); if (numeroElementos < tamanho) { String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("title").toString(); String block2 = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); Vertice v1 = new Vertice(pk, "cd", -1, block, block2); bi2.insertVertice(keyBlock, v1); numeroElementos++; } else { numeroElementos++; } } System.out.println(" Tamanho bloco " + bi2.getNumeroElementos()); return bi2; }
From source file:query.QueryExperimento.java
/** * //from w w w . j a v a 2s. co m * @param dataSource * @param tamanho porcentagem de elementos que n]ao se deseja guardar informaes * @return as tuplas que se deseja ter informaes em um blooco que sera processado */ public BlockIndex blocaConsultaReduzida(CSVSource dataSource, int tamanho) { BlockIndex bi2 = new BlockIndex(); DoubleMetaphone db = new DoubleMetaphone(); int numeroElementos = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); if (numeroElementos % 2 == 0) { numeroElementos++; } else { String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("title").toString(); String block2 = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); Vertice v1 = new Vertice(pk, "cd", -1, block, block2); bi2.insertVertice(keyBlock, v1); numeroElementos++; } } return bi2; }
From source file:query.QueryExperimento.java
public BlockIndex blocaConsultaCD(CSVSource dataSource) { BlockIndex bi2 = new BlockIndex(); // SoundEx db = new SoundEx(); DoubleMetaphone db = new DoubleMetaphone(); for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("title").toString(); String block2 = next.getAttributeValue("artist").toString(); String keyBlock = db.doubleMetaphone(block); //String keyBlock = db.getSoundEx(block2); Vertice v1 = new Vertice(pk, "cd", -1, block, block2); bi2.insertVertice(keyBlock, v1); }/* w w w . j a va 2 s . c o m*/ return bi2; }
From source file:query.QueryExperimento.java
public BlockIndex blocaConsultaCora(CSVSource dataSource) { BlockIndex bi2 = new BlockIndex(); DoubleMetaphone db = new DoubleMetaphone(); for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("ID").toString(); JsonValue block = next.getAttributeValue("TITLE"); //JsonValue block2 = next.getAttributeValue("title"); if (block == null) { continue; } else {/*from www . ja va 2 s . com*/ String block2 = ""; String keyBlock = db.encode(block.toString()); Vertice v1 = new Vertice(pk, "cora", -1, block.toString(), block2.toString()); bi2.insertVertice(keyBlock, v1); } } return bi2; }
From source file:query.QueryLevensteinExperimentCora.java
public void query() throws FileNotFoundException { GlobalConfig.getInstance().setInMemoryObjectThreshold(1000); // sets the CSV data source CSVSource dataSource = new CSVSource("cora", new File(".csv")); dataSource.enableHeader();/* w w w . j a v a 2 s. c o m*/ dataSource.addIdAttributes("pk"); long start = System.currentTimeMillis(); DoubleMetaphone db = new DoubleMetaphone(); int achou = 0; int nAchou = 0; int total = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); boolean clusterId = bi.getId(pk, keyBlock, "cd"); total++; if (clusterId) { achou++; // System.err.println(" Imprime " + pk + " " + clusterId); } else { nAchou++; //System.err.println(" Imprime " + pk + " " + clusterId); } } System.err.println((System.currentTimeMillis() - start) + " ms"); System.err.println("total " + total + " n achou " + nAchou + "Achou " + achou); }