List of usage examples for org.apache.commons.codec.language DoubleMetaphone encode
public String encode(String value)
From source file:query.QueryExperimento.java
/** * /*from w w w .j av a 2s. com*/ * @param dataSource * @param tamanho porcentagem de elementos que n]ao se deseja guardar informaes * @return as tuplas que se deseja ter informaes em um blooco que sera processado */ public BlockIndex blocaConsultaReduzida(CSVSource dataSource, int tamanho) { BlockIndex bi2 = new BlockIndex(); DoubleMetaphone db = new DoubleMetaphone(); int numeroElementos = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); if (numeroElementos % 2 == 0) { numeroElementos++; } else { String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("title").toString(); String block2 = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); Vertice v1 = new Vertice(pk, "cd", -1, block, block2); bi2.insertVertice(keyBlock, v1); numeroElementos++; } } return bi2; }
From source file:query.QueryExperimento.java
public BlockIndex blocaConsultaCora(CSVSource dataSource) { BlockIndex bi2 = new BlockIndex(); DoubleMetaphone db = new DoubleMetaphone(); for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("ID").toString(); JsonValue block = next.getAttributeValue("TITLE"); //JsonValue block2 = next.getAttributeValue("title"); if (block == null) { continue; } else {//from w w w .jav a 2 s .c om String block2 = ""; String keyBlock = db.encode(block.toString()); Vertice v1 = new Vertice(pk, "cora", -1, block.toString(), block2.toString()); bi2.insertVertice(keyBlock, v1); } } return bi2; }
From source file:query.QueryLevensteinExperimentCora.java
public void query() throws FileNotFoundException { GlobalConfig.getInstance().setInMemoryObjectThreshold(1000); // sets the CSV data source CSVSource dataSource = new CSVSource("cora", new File(".csv")); dataSource.enableHeader();// w ww .ja v a 2s . c om dataSource.addIdAttributes("pk"); long start = System.currentTimeMillis(); DoubleMetaphone db = new DoubleMetaphone(); int achou = 0; int nAchou = 0; int total = 0; for (Iterator<DuDeObject> iterator = dataSource.iterator(); iterator.hasNext();) { DuDeObject next = iterator.next(); String pk = next.getAttributeValue("pk").toString(); String block = next.getAttributeValue("artist").toString(); String keyBlock = db.encode(block); boolean clusterId = bi.getId(pk, keyBlock, "cd"); total++; if (clusterId) { achou++; // System.err.println(" Imprime " + pk + " " + clusterId); } else { nAchou++; //System.err.println(" Imprime " + pk + " " + clusterId); } } System.err.println((System.currentTimeMillis() - start) + " ms"); System.err.println("total " + total + " n achou " + nAchou + "Achou " + achou); }