Example usage for java.util Set size

List of usage examples for java.util Set size

Introduction

In this page you can find the example usage for java.util Set size.

Prototype

int size();

Source Link

Document

Returns the number of elements in this set (its cardinality).

Usage

From source file:fr.inria.atlanmod.kyanos.benchmarks.ase2015.NeoEMFMapQueryGetBranchStatements.java

public static void main(String[] args) {
    Options options = new Options();

    Option inputOpt = OptionBuilder.create(IN);
    inputOpt.setArgName("INPUT");
    inputOpt.setDescription("Input Kyanos resource directory");
    inputOpt.setArgs(1);// w ww .  j a v a 2  s .  c  o  m
    inputOpt.setRequired(true);

    Option inClassOpt = OptionBuilder.create(EPACKAGE_CLASS);
    inClassOpt.setArgName("CLASS");
    inClassOpt.setDescription("FQN of EPackage implementation class");
    inClassOpt.setArgs(1);
    inClassOpt.setRequired(true);

    Option optFileOpt = OptionBuilder.create(OPTIONS_FILE);
    optFileOpt.setArgName("FILE");
    optFileOpt.setDescription("Properties file holding the options to be used in the Kyanos Resource");
    optFileOpt.setArgs(1);

    options.addOption(inputOpt);
    options.addOption(inClassOpt);
    options.addOption(optFileOpt);

    CommandLineParser parser = new PosixParser();

    try {
        PersistenceBackendFactoryRegistry.getFactories().put(NeoMapURI.NEO_MAP_SCHEME,
                new MapPersistenceBackendFactory());

        CommandLine commandLine = parser.parse(options, args);

        URI uri = NeoMapURI.createNeoMapURI(new File(commandLine.getOptionValue(IN)));

        Class<?> inClazz = NeoEMFMapQueryGetBranchStatements.class.getClassLoader()
                .loadClass(commandLine.getOptionValue(EPACKAGE_CLASS));
        inClazz.getMethod("init").invoke(null);

        ResourceSet resourceSet = new ResourceSetImpl();
        resourceSet.getResourceFactoryRegistry().getProtocolToFactoryMap().put(NeoMapURI.NEO_MAP_SCHEME,
                PersistentResourceFactory.eINSTANCE);

        Resource resource = resourceSet.createResource(uri);

        Map<String, Object> loadOpts = new HashMap<String, Object>();

        if (commandLine.hasOption(OPTIONS_FILE)) {
            Properties properties = new Properties();
            properties.load(new FileInputStream(new File(commandLine.getOptionValue(OPTIONS_FILE))));
            for (final Entry<Object, Object> entry : properties.entrySet()) {
                loadOpts.put((String) entry.getKey(), (String) entry.getValue());
            }
        }
        // Add the LoadedObjectCounter store
        List<StoreOption> storeOptions = new ArrayList<StoreOption>();
        //         storeOptions.add(PersistentResourceOptions.EStoreOption.LOADED_OBJECT_COUNTER_LOGGING);
        storeOptions.add(MapResourceOptions.EStoreMapOption.AUTOCOMMIT);
        storeOptions.add(PersistentResourceOptions.EStoreOption.ESTRUCUTRALFEATURE_CACHING);
        storeOptions.add(PersistentResourceOptions.EStoreOption.IS_SET_CACHING);
        storeOptions.add(PersistentResourceOptions.EStoreOption.SIZE_CACHING);
        loadOpts.put(PersistentResourceOptions.STORE_OPTIONS, storeOptions);
        resource.load(loadOpts);
        {
            Runtime.getRuntime().gc();
            long initialUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
            LOG.log(Level.INFO, MessageFormat.format("Used memory before query: {0}",
                    MessageUtil.byteCountToDisplaySize(initialUsedMemory)));
            LOG.log(Level.INFO, "Start query");
            long begin = System.currentTimeMillis();
            Set<TextElement> list = ASE2015JavaQueries.getCommentsTagContent(resource);
            long end = System.currentTimeMillis();
            LOG.log(Level.INFO, "End query");
            LOG.log(Level.INFO, MessageFormat.format("Query result contains {0} elements", list.size()));
            LOG.log(Level.INFO, MessageFormat.format("Time spent: {0}", MessageUtil.formatMillis(end - begin)));
            Runtime.getRuntime().gc();
            long finalUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
            LOG.log(Level.INFO, MessageFormat.format("Used memory after query: {0}",
                    MessageUtil.byteCountToDisplaySize(finalUsedMemory)));
            LOG.log(Level.INFO, MessageFormat.format("Memory use increase: {0}",
                    MessageUtil.byteCountToDisplaySize(finalUsedMemory - initialUsedMemory)));
        }

        if (resource instanceof PersistentResourceImpl) {
            PersistentResourceImpl.shutdownWithoutUnload((PersistentResourceImpl) resource);
        } else {
            resource.unload();
        }

    } catch (ParseException e) {
        MessageUtil.showError(e.toString());
        MessageUtil.showError("Current arguments: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("java -jar <this-file.jar>", options, true);
    } catch (Throwable e) {
        MessageUtil.showError(e.toString());
    }
}

From source file:Main.java

public static void main(String[] args) {

    Set<String> s1 = new HashSet<>();

    // Add a few elements
    s1.add("HTML");
    s1.add("CSS");
    s1.add("XML");
    s1.add("XML"); // Duplicate

    // Create another set by copying s1
    Set<String> s2 = new HashSet<>(s1);
    // Add a few more elements 
    s2.add("Java");
    s2.add("SQL");
    s2.add(null); // one null is fine
    s2.add(null); // Duplicate

    System.out.println("s1: " + s1);
    System.out.println("s1.size(): " + s1.size());

    System.out.println("s2: " + s2);
    System.out.println("s2.size(): " + s2.size());
}

From source file:ISMAGS.CommandLineInterface.java

public static void main(String[] args) throws IOException {
    String folder = null, files = null, motifspec = null, output = null;

    Options opts = new Options();
    opts.addOption("folder", true, "Folder name");
    opts.addOption("linkfiles", true,
            "Link files seperated by spaces (format: linktype[char] directed[d/u] filename)");
    opts.addOption("motif", true, "Motif description by two strings (format: linktypes)");
    opts.addOption("output", true, "Output file name");

    CommandLineParser parser = new PosixParser();
    try {/*from   w w  w .j  a  va2 s  .c o  m*/
        CommandLine cmd = parser.parse(opts, args);
        if (cmd.hasOption("folder")) {
            folder = cmd.getOptionValue("folder");
        }
        if (cmd.hasOption("linkfiles")) {
            files = cmd.getOptionValue("linkfiles");
        }
        if (cmd.hasOption("motif")) {
            motifspec = cmd.getOptionValue("motif");
        }
        if (cmd.hasOption("output")) {
            output = cmd.getOptionValue("output");
        }
    } catch (ParseException e) {
        Die("Error: Parsing error");
    }

    if (print) {
        printBanner(folder, files, motifspec, output);
    }

    if (folder == null || files == null || motifspec == null || output == null) {
        Die("Error: not all options are provided");
    } else {
        ArrayList<String> linkfiles = new ArrayList<String>();
        ArrayList<String> linkTypes = new ArrayList<String>();
        ArrayList<String> sourcenetworks = new ArrayList<String>();
        ArrayList<String> destinationnetworks = new ArrayList<String>();
        ArrayList<Boolean> directed = new ArrayList<Boolean>();
        StringTokenizer st = new StringTokenizer(files, " ");
        while (st.hasMoreTokens()) {
            linkTypes.add(st.nextToken());
            directed.add(st.nextToken().equals("d"));
            sourcenetworks.add(st.nextToken());
            destinationnetworks.add(st.nextToken());
            linkfiles.add(folder + st.nextToken());
        }
        ArrayList<LinkType> allLinkTypes = new ArrayList<LinkType>();
        HashMap<Character, LinkType> typeTranslation = new HashMap<Character, LinkType>();
        for (int i = 0; i < linkTypes.size(); i++) {
            String n = linkTypes.get(i);
            char nn = n.charAt(0);
            LinkType t = typeTranslation.get(nn);
            if (t == null) {
                t = new LinkType(directed.get(i), n, i, nn, sourcenetworks.get(i), destinationnetworks.get(i));
            }
            allLinkTypes.add(t);
            typeTranslation.put(nn, t);
        }
        if (print) {
            System.out.println("Reading network..");
        }
        Network network = Network.readNetworkFromFiles(linkfiles, allLinkTypes);

        Motif motif = getMotif(motifspec, typeTranslation);

        if (print) {
            System.out.println("Starting the search..");
        }
        MotifFinder mf = new MotifFinder(network);
        long tijd = System.nanoTime();
        Set<MotifInstance> motifs = mf.findMotif(motif, false);
        tijd = System.nanoTime() - tijd;
        if (print) {
            System.out.println("Completed search in " + tijd / 1000000 + " milliseconds");
        }
        if (print) {
            System.out.println("Found " + motifs.size() + " instances of " + motifspec + " motif");
        }
        if (print) {
            System.out.println("Writing instances to file: " + output);
        }
        printMotifs(motifs, output);
        if (print) {
            System.out.println("Done.");
        }
        //            Set<MotifInstance> motifs=null;
        //            MotifFinder mf=null;
        //            System.out.println("Starting the search..");
        //            long tstart = System.nanoTime();
        //            for (int i = 0; i < it; i++) {
        //
        //                mf = new MotifFinder(network, allLinkTypes, true);
        //                motifs = mf.findMotif(motif);
        //            }
        //
        //            long tend = System.nanoTime();
        //            double time_in_ms = (tend - tstart) / 1000000.0;
        //            System.out.println("Found " + mf.totalFound + " motifs, " + time_in_ms + " ms");
        ////        System.out.println("Evaluated " + mf.totalNrMappedNodes+ " search nodes");
        ////        System.out.println("Found " + motifs.size() + " motifs, " + time_in_ms + " ms");
        //            printMotifs(motifs, output);

    }

}

From source file:hyperloglog.tools.HyperLogLogCLI.java

public static void main(String[] args) {
    Options options = new Options();
    addOptions(options);//from w w w  .  j  a  v a2 s. com

    CommandLineParser parser = new BasicParser();
    CommandLine cli = null;
    long n = 0;
    long seed = 123;
    EncodingType enc = EncodingType.SPARSE;
    int p = 14;
    int hb = 64;
    boolean bitPack = true;
    boolean noBias = true;
    int unique = -1;
    String filePath = null;
    BufferedReader br = null;
    String outFile = null;
    String inFile = null;
    FileOutputStream fos = null;
    DataOutputStream out = null;
    FileInputStream fis = null;
    DataInputStream in = null;
    try {
        cli = parser.parse(options, args);

        if (!(cli.hasOption('n') || cli.hasOption('f') || cli.hasOption('d'))) {
            System.out.println("Example usage: hll -n 1000 " + "<OR> hll -f /tmp/input.txt "
                    + "<OR> hll -d -i /tmp/out.hll");
            usage(options);
            return;
        }

        if (cli.hasOption('n')) {
            n = Long.parseLong(cli.getOptionValue('n'));
        }

        if (cli.hasOption('e')) {
            String value = cli.getOptionValue('e');
            if (value.equals(EncodingType.DENSE.name())) {
                enc = EncodingType.DENSE;
            }
        }

        if (cli.hasOption('p')) {
            p = Integer.parseInt(cli.getOptionValue('p'));
            if (p < 4 && p > 16) {
                System.out.println("Warning! Out-of-range value specified for p. Using to p=14.");
                p = 14;
            }
        }

        if (cli.hasOption('h')) {
            hb = Integer.parseInt(cli.getOptionValue('h'));
        }

        if (cli.hasOption('c')) {
            noBias = Boolean.parseBoolean(cli.getOptionValue('c'));
        }

        if (cli.hasOption('b')) {
            bitPack = Boolean.parseBoolean(cli.getOptionValue('b'));
        }

        if (cli.hasOption('f')) {
            filePath = cli.getOptionValue('f');
            br = new BufferedReader(new FileReader(new File(filePath)));
        }

        if (filePath != null && cli.hasOption('n')) {
            System.out.println("'-f' (input file) specified. Ignoring -n.");
        }

        if (cli.hasOption('s')) {
            if (cli.hasOption('o')) {
                outFile = cli.getOptionValue('o');
                fos = new FileOutputStream(new File(outFile));
                out = new DataOutputStream(fos);
            } else {
                System.err.println("Specify output file. Example usage: hll -s -o /tmp/out.hll");
                usage(options);
                return;
            }
        }

        if (cli.hasOption('d')) {
            if (cli.hasOption('i')) {
                inFile = cli.getOptionValue('i');
                fis = new FileInputStream(new File(inFile));
                in = new DataInputStream(fis);
            } else {
                System.err.println("Specify input file. Example usage: hll -d -i /tmp/in.hll");
                usage(options);
                return;
            }
        }

        // return after deserialization
        if (fis != null && in != null) {
            long start = System.currentTimeMillis();
            HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
            long end = System.currentTimeMillis();
            System.out.println(deserializedHLL.toString());
            System.out.println("Count after deserialization: " + deserializedHLL.count());
            System.out.println("Deserialization time: " + (end - start) + " ms");
            return;
        }

        // construct hll and serialize it if required
        HyperLogLog hll = HyperLogLog.builder().enableBitPacking(bitPack).enableNoBias(noBias).setEncoding(enc)
                .setNumHashBits(hb).setNumRegisterIndexBits(p).build();

        if (br != null) {
            Set<String> hashset = new HashSet<String>();
            String line;
            while ((line = br.readLine()) != null) {
                hll.addString(line);
                hashset.add(line);
            }
            n = hashset.size();
        } else {
            Random rand = new Random(seed);
            for (int i = 0; i < n; i++) {
                if (unique < 0) {
                    hll.addLong(rand.nextLong());
                } else {
                    int val = rand.nextInt(unique);
                    hll.addLong(val);
                }
            }
        }

        long estCount = hll.count();
        System.out.println("Actual count: " + n);
        System.out.println(hll.toString());
        System.out.println("Relative error: " + HyperLogLogUtils.getRelativeError(n, estCount) + "%");
        if (fos != null && out != null) {
            long start = System.currentTimeMillis();
            HyperLogLogUtils.serializeHLL(out, hll);
            long end = System.currentTimeMillis();
            System.out.println("Serialized hyperloglog to " + outFile);
            System.out.println("Serialized size: " + out.size() + " bytes");
            System.out.println("Serialization time: " + (end - start) + " ms");
            out.close();
        }
    } catch (ParseException e) {
        System.err.println("Invalid parameter.");
        usage(options);
    } catch (NumberFormatException e) {
        System.err.println("Invalid type for parameter.");
        usage(options);
    } catch (FileNotFoundException e) {
        System.err.println("Specified file not found.");
        usage(options);
    } catch (IOException e) {
        System.err.println("Exception occured while reading file.");
        usage(options);
    }
}

From source file:act.installer.bing.BingSearchRanker.java

public static void main(final String[] args) throws Exception {

    // Parse the command line options
    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/* w  ww  . j a va2s . c  om*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s\n", e.getMessage());
        HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
        return;
    }

    String inputPath = cl.getOptionValue(OPTION_INPUT_FILEPATH);
    String outputPath = cl.getOptionValue(OPTION_OUTPUT_FILEPATH);
    Boolean isTSVInput = cl.hasOption(OPTION_TSV_INPUT);

    // Read the molecule corpus
    LOGGER.info("Reading the input molecule corpus");
    MoleculeCorpus moleculeCorpus = new MoleculeCorpus();
    if (isTSVInput) {
        LOGGER.info("Input format is TSV");
        moleculeCorpus.buildCorpusFromTSVFile(inputPath);
    } else {
        LOGGER.info("Input format is raw InChIs");
        moleculeCorpus.buildCorpusFromRawInchis(inputPath);
    }

    // Get the inchi set
    Set<String> inchis = moleculeCorpus.getMolecules();
    LOGGER.info("Found %d molecules in the input corpus", inchis.size());

    // Update the Bing Search results in the Installer database
    BingSearchRanker bingSearchRanker = new BingSearchRanker(cl.hasOption(OPTION_INCLUDE_CHEBI_APPLICATIONS),
            cl.hasOption(OPTION_INCLUDE_WIKIPEDIA_URL), cl.hasOption(OPTION_INCLUDE_USAGE_EXPLORER_URL),
            cl.hasOption(OPTION_FORCE_UPDATE));
    LOGGER.info("Updating the Bing Search results in the Installer database");
    bingSearchRanker.addBingSearchResults(inchis);
    LOGGER.info("Done updating the Bing Search results");

    // Write the results in a TSV file
    LOGGER.info("Writing results to output file");
    bingSearchRanker.writeBingSearchRanksAsTSV(inchis, outputPath);
    LOGGER.info("Bing Search ranker is done. \"I'm tired, boss.\"");
}

From source file:SetTest.java

public static void main(String[] args) {
    Set<String> words = new HashSet<String>(); // HashSet implements Set
    long totalTime = 0;

    Scanner in = new Scanner(System.in);
    while (in.hasNext()) {
        String word = in.next();//from ww w. j  a  va2s  .  c om
        long callTime = System.currentTimeMillis();
        words.add(word);
        callTime = System.currentTimeMillis() - callTime;
        totalTime += callTime;
    }

    Iterator<String> iter = words.iterator();
    for (int i = 1; i <= 20 && iter.hasNext(); i++)
        System.out.println(iter.next());
    System.out.println(". . .");
    System.out.println(words.size() + " distinct words. " + totalTime + " milliseconds.");
}

From source file:edu.umass.cs.gigapaxos.PaxosPacketBatcher.java

/**
 * @param args/*from www  .  j  a  v  a2s  .  c  o m*/
 */
public static void main(String[] args) {
    Util.assertAssertionsEnabled();
    Ballot b1 = new Ballot(23, 456);
    Ballot b2 = new Ballot(23, 456);
    assert (b1.equals(b2));
    Set<Ballot> bset = new HashSet<Ballot>();
    bset.add(b1);
    assert (bset.contains(b1));
    assert (bset.contains(b2));
    bset.add(b2);
    assert (bset.size() == 1) : bset.size();
}

From source file:com.termmed.sampling.ConceptsWithMoreThanThreeRoleGroups.java

/**
 * The main method./*from w  w w. j  a v a2  s.  c  o m*/
 *
 * @param args the arguments
 * @throws Exception the exception
 */
public static void main(String[] args) throws Exception {
    System.out.println("Starting...");
    Map<String, Set<String>> groupsMap = new HashMap<String, Set<String>>();
    File relsFile = new File(
            "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20160131-1/Snapshot/Terminology/sct2_Relationship_Snapshot_INT_20160131.txt");
    BufferedReader br2 = new BufferedReader(new FileReader(relsFile));
    String line2;
    int count2 = 0;
    while ((line2 = br2.readLine()) != null) {
        // process the line.
        count2++;
        if (count2 % 10000 == 0) {
            //System.out.println(count2);
        }
        List<String> columns = Arrays.asList(line2.split("\t", -1));
        if (columns.size() >= 6) {
            if (columns.get(2).equals("1") && !columns.get(6).equals("0")) {
                if (!groupsMap.containsKey(columns.get(4))) {
                    groupsMap.put(columns.get(4), new HashSet<String>());
                }
                groupsMap.get(columns.get(4)).add(columns.get(6));
            }
        }
    }
    System.out.println("Relationship groups loaded");
    Gson gson = new Gson();
    System.out.println("Reading JSON 1");
    File crossoverFile1 = new File("/Users/alo/Downloads/crossover_role_to_group.json");
    String contents = FileUtils.readFileToString(crossoverFile1, "utf-8");
    Type collectionType = new TypeToken<Collection<ControlResultLine>>() {
    }.getType();
    List<ControlResultLine> lineObject = gson.fromJson(contents, collectionType);
    Set<String> crossovers1 = new HashSet<String>();
    for (ControlResultLine loopResult : lineObject) {
        crossovers1.add(loopResult.conceptId);
    }
    System.out.println("Crossovers 1 loaded, " + lineObject.size() + " Objects");

    System.out.println("Reading JSON 2");
    File crossoverFile2 = new File("/Users/alo/Downloads/crossover_group_to_group.json");
    String contents2 = FileUtils.readFileToString(crossoverFile2, "utf-8");
    List<ControlResultLine> lineObject2 = gson.fromJson(contents2, collectionType);
    Set<String> crossovers2 = new HashSet<String>();
    for (ControlResultLine loopResult : lineObject2) {
        crossovers2.add(loopResult.conceptId);
    }
    System.out.println("Crossovers 2 loaded, " + lineObject2.size() + " Objects");

    Set<String> foundConcepts = new HashSet<String>();
    int count3 = 0;
    BufferedWriter writer = new BufferedWriter(
            new FileWriter(new File("ConceptsWithMoreThanThreeRoleGroups.csv")));
    ;
    for (String loopConcept : groupsMap.keySet()) {
        if (groupsMap.get(loopConcept).size() > 3) {
            writer.write(loopConcept);
            writer.newLine();
            foundConcepts.add(loopConcept);
            count3++;
        }
    }
    writer.close();
    System.out.println("Found " + foundConcepts.size() + " concepts");

    int countCrossover1 = 0;
    for (String loopConcept : foundConcepts) {
        if (crossovers1.contains(loopConcept)) {
            countCrossover1++;
        }
    }
    System.out.println(countCrossover1 + " are present in crossover_role_to_group");

    int countCrossover2 = 0;
    for (String loopConcept : foundConcepts) {
        if (crossovers2.contains(loopConcept)) {
            countCrossover2++;
        }
    }
    System.out.println(countCrossover2 + " are present in crossover_group_to_group");

    System.out.println("Done");
}

From source file:it.units.malelab.ege.MappingPropertiesExperimenter.java

public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
    final int n = 10000;
    final int nDist = 10000;
    //prepare problems and methods
    List<String> problems = Lists.newArrayList("bool-parity5", "bool-mopm3", "sr-keijzer6", "sr-nguyen7",
            "sr-pagie1", "sr-vladislavleva4", "other-klandscapes3", "other-klandscapes7", "other-text");
    List<String> mappers = new ArrayList<>();
    for (int gs : new int[] { 64, 128, 256, 512, 1024 }) {
        mappers.add("ge-" + gs + "-2");
        mappers.add("ge-" + gs + "-4");
        mappers.add("ge-" + gs + "-8");
        mappers.add("ge-" + gs + "-12");
        mappers.add("pige-" + gs + "-4");
        mappers.add("pige-" + gs + "-8");
        mappers.add("pige-" + gs + "-16");
        mappers.add("pige-" + gs + "-24");
        mappers.add("hge-" + gs + "-0");
        mappers.add("whge-" + gs + "-2");
        mappers.add("whge-" + gs + "-3");
        mappers.add("whge-" + gs + "-5");
    }// w  ww  . j a va2 s .  c om
    mappers.add("sge-0-5");
    mappers.add("sge-0-6");
    mappers.add("sge-0-7");
    mappers.add("sge-0-8");
    mappers.clear();
    mappers.addAll(Lists.newArrayList("ge-1024-8", "pige-1024-16", "hge-1024-0", "whge-1024-3", "sge-0-6"));
    PrintStream filePrintStream = null;
    if (args.length > 0) {
        filePrintStream = new PrintStream(args[0]);
    } else {
        filePrintStream = System.out;
    }
    filePrintStream.printf("problem;mapper;genotypeSize;param;property;value%n");
    //prepare distances
    Distance<Node<String>> phenotypeDistance = new CachedDistance<>(new LeavesEdit<String>());
    Distance<Sequence> genotypeDistance = new CachedDistance<>(new Hamming());
    //iterate
    for (String problemName : problems) {
        for (String mapperName : mappers) {
            System.out.printf("%20.20s, %20.20s", problemName, mapperName);
            //build problem
            Problem<String, NumericFitness> problem = null;
            if (problemName.equals("bool-parity5")) {
                problem = new Parity(5);
            } else if (problemName.equals("bool-mopm3")) {
                problem = new MultipleOutputParallelMultiplier(3);
            } else if (problemName.equals("sr-keijzer6")) {
                problem = new HarmonicCurve();
            } else if (problemName.equals("sr-nguyen7")) {
                problem = new Nguyen7(1);
            } else if (problemName.equals("sr-pagie1")) {
                problem = new Pagie1();
            } else if (problemName.equals("sr-vladislavleva4")) {
                problem = new Vladislavleva4(1);
            } else if (problemName.equals("other-klandscapes3")) {
                problem = new KLandscapes(3);
            } else if (problemName.equals("other-klandscapes7")) {
                problem = new KLandscapes(7);
            } else if (problemName.equals("other-text")) {
                problem = new Text();
            }
            //build configuration and evolver
            Mapper mapper = null;
            int genotypeSize = Integer.parseInt(mapperName.split("-")[1]);
            int mapperMainParam = Integer.parseInt(mapperName.split("-")[2]);
            if (mapperName.split("-")[0].equals("ge")) {
                mapper = new StandardGEMapper<>(mapperMainParam, 1, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("pige")) {
                mapper = new PiGEMapper<>(mapperMainParam, 1, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("sge")) {
                mapper = new SGEMapper<>(mapperMainParam, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("hge")) {
                mapper = new HierarchicalMapper<>(problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("whge")) {
                mapper = new WeightedHierarchicalMapper<>(mapperMainParam, false, true, problem.getGrammar());
            }
            //prepare things
            Random random = new Random(1);
            Set<Sequence> genotypes = new LinkedHashSet<>(n);
            //build genotypes
            if (mapperName.split("-")[0].equals("sge")) {
                SGEGenotypeFactory<String> factory = new SGEGenotypeFactory<>((SGEMapper) mapper);
                while (genotypes.size() < n) {
                    genotypes.add(factory.build(random));
                }
                genotypeSize = factory.getBitSize();
            } else {
                BitsGenotypeFactory factory = new BitsGenotypeFactory(genotypeSize);
                while (genotypes.size() < n) {
                    genotypes.add(factory.build(random));
                }
            }
            //build and fill map
            Multimap<Node<String>, Sequence> multimap = HashMultimap.create();
            int progress = 0;
            for (Sequence genotype : genotypes) {
                Node<String> phenotype;
                try {
                    if (mapperName.split("-")[0].equals("sge")) {
                        phenotype = mapper.map((SGEGenotype<String>) genotype, new HashMap<>());
                    } else {
                        phenotype = mapper.map((BitsGenotype) genotype, new HashMap<>());
                    }
                } catch (MappingException e) {
                    phenotype = Node.EMPTY_TREE;
                }
                multimap.put(phenotype, genotype);
                progress = progress + 1;
                if (progress % Math.round(n / 10) == 0) {
                    System.out.print(".");
                }
            }
            System.out.println();
            //compute distances
            List<Pair<Double, Double>> allDistances = new ArrayList<>();
            List<Pair<Double, Double>> allValidDistances = new ArrayList<>();
            Multimap<Node<String>, Double> genotypeDistances = ArrayListMultimap.create();
            for (Node<String> phenotype : multimap.keySet()) {
                for (Sequence genotype1 : multimap.get(phenotype)) {
                    for (Sequence genotype2 : multimap.get(phenotype)) {
                        double gDistance = genotypeDistance.d(genotype1, genotype2);
                        genotypeDistances.put(phenotype, gDistance);
                        if (genotypeDistances.get(phenotype).size() > nDist) {
                            break;
                        }
                    }
                    if (genotypeDistances.get(phenotype).size() > nDist) {
                        break;
                    }
                }
            }
            List<Map.Entry<Node<String>, Sequence>> entries = new ArrayList<>(multimap.entries());
            Collections.shuffle(entries, random);
            for (Map.Entry<Node<String>, Sequence> entry1 : entries) {
                for (Map.Entry<Node<String>, Sequence> entry2 : entries) {
                    double gDistance = genotypeDistance.d(entry1.getValue(), entry2.getValue());
                    double pDistance = phenotypeDistance.d(entry1.getKey(), entry2.getKey());
                    allDistances.add(new Pair<>(gDistance, pDistance));
                    if (!Node.EMPTY_TREE.equals(entry1.getKey()) && !Node.EMPTY_TREE.equals(entry2.getKey())) {
                        allValidDistances.add(new Pair<>(gDistance, pDistance));
                    }
                    if (allDistances.size() > nDist) {
                        break;
                    }
                }
                if (allDistances.size() > nDist) {
                    break;
                }
            }
            //compute properties
            double invalidity = (double) multimap.get(Node.EMPTY_TREE).size() / (double) genotypes.size();
            double redundancy = 1 - (double) multimap.keySet().size() / (double) genotypes.size();
            double validRedundancy = redundancy;
            if (multimap.keySet().contains(Node.EMPTY_TREE)) {
                validRedundancy = 1 - ((double) multimap.keySet().size() - 1d)
                        / (double) (genotypes.size() - multimap.get(Node.EMPTY_TREE).size());
            }
            double locality = Utils.pearsonCorrelation(allDistances);
            double validLocality = Utils.pearsonCorrelation(allValidDistances);
            double[] sizes = new double[multimap.keySet().size()];
            double[] meanGenotypeDistances = new double[multimap.keySet().size()];
            int invalidIndex = -1;
            int c = 0;
            for (Node<String> phenotype : multimap.keySet()) {
                if (Node.EMPTY_TREE.equals(phenotype)) {
                    invalidIndex = c;
                }
                sizes[c] = multimap.get(phenotype).size();
                double[] distances = new double[genotypeDistances.get(phenotype).size()];
                int k = 0;
                for (Double distance : genotypeDistances.get(phenotype)) {
                    distances[k] = distance;
                    k = k + 1;
                }
                meanGenotypeDistances[c] = StatUtils.mean(distances);
                c = c + 1;
            }
            double nonUniformity = Math.sqrt(StatUtils.variance(sizes)) / StatUtils.mean(sizes);
            double nonSynonymousity = StatUtils.mean(meanGenotypeDistances)
                    / StatUtils.mean(firsts(allDistances));
            double validNonUniformity = nonUniformity;
            double validNonSynonymousity = nonSynonymousity;
            if (invalidIndex != -1) {
                double[] validSizes = new double[multimap.keySet().size() - 1];
                double[] validMeanGenotypeDistances = new double[multimap.keySet().size() - 1];
                if (invalidIndex > 0) {
                    System.arraycopy(sizes, 0, validSizes, 0, invalidIndex);
                    System.arraycopy(meanGenotypeDistances, 0, validMeanGenotypeDistances, 0, invalidIndex);
                }
                System.arraycopy(sizes, invalidIndex + 1, validSizes, invalidIndex,
                        sizes.length - invalidIndex - 1);
                System.arraycopy(meanGenotypeDistances, invalidIndex + 1, validMeanGenotypeDistances,
                        invalidIndex, meanGenotypeDistances.length - invalidIndex - 1);
                validNonUniformity = Math.sqrt(StatUtils.variance(validSizes)) / StatUtils.mean(validSizes);
                validNonSynonymousity = StatUtils.mean(validMeanGenotypeDistances)
                        / StatUtils.mean(firsts(allValidDistances));
            }
            //compute locality
            filePrintStream.printf("%s;%s;%d;%d;invalidity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, invalidity);
            filePrintStream.printf("%s;%s;%d;%d;redundancy;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, redundancy);
            filePrintStream.printf("%s;%s;%d;%d;validRedundancy;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, validRedundancy);
            filePrintStream.printf("%s;%s;%d;%d;locality;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, locality);
            filePrintStream.printf("%s;%s;%d;%d;validLLocality;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, validLocality);
            filePrintStream.printf("%s;%s;%d;%d;nonUniformity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, nonUniformity);
            filePrintStream.printf("%s;%s;%d;%d;validNonUniformity;%f %n", problemName,
                    mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonUniformity);
            filePrintStream.printf("%s;%s;%d;%d;nonSynonymousity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, nonSynonymousity);
            filePrintStream.printf("%s;%s;%d;%d;validNonSynonymousity;%f %n", problemName,
                    mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonSynonymousity);
        }
    }
    if (filePrintStream != null) {
        filePrintStream.close();
    }
}

From source file:dkpro.similarity.algorithms.vsm.store.convert.ConvertLuceneToVectorIndex.java

public static void main(String[] args) throws Exception {
    File inputPath = new File(args[0]);
    File outputPath = new File(args[1]);

    deleteQuietly(outputPath);/*from w w  w . j a  va2  s  . c o  m*/
    outputPath.mkdirs();

    boolean ignoreNumerics = true;
    boolean ignoreCardinal = true;
    boolean ignoreMonetary = true;
    int minTermLength = 3;
    int minDocFreq = 5;

    System.out.println("Quality criteria");
    System.out.println("Minimum term length            : " + minTermLength);
    System.out.println("Minimum document frequency     : " + minDocFreq);
    System.out.println("Ignore numeric tokens          : " + ignoreNumerics);
    System.out.println("Ignore cardinal numeric tokens : " + ignoreNumerics);
    System.out.println("Ignore money values            : " + ignoreMonetary);

    System.out.print("Fetching terms list... ");

    IndexReader reader = IndexReader.open(FSDirectory.open(inputPath));
    TermEnum termEnum = reader.terms();
    Set<String> terms = new HashSet<String>();
    int ignoredTerms = 0;
    while (termEnum.next()) {
        String term = termEnum.term().text();
        if (((minTermLength > 0) && (term.length() < minTermLength)) || (ignoreCardinal && isCardinal(term))
                || (ignoreMonetary && isMonetary(term)) || (ignoreNumerics && isNumericSpace(term))
                || ((minDocFreq > 0) && (termEnum.docFreq() < minDocFreq))) {
            ignoredTerms++;
            continue;
        }

        terms.add(term);
    }
    reader.close();

    System.out.println(terms.size() + " terms found. " + ignoredTerms + " terms ignored.");

    System.out.println("Opening source ESA index " + inputPath);
    VectorReader source = new LuceneVectorReader(inputPath);
    System.out.println("Opening destination ESA index " + inputPath);
    VectorIndexWriter esaWriter = new VectorIndexWriter(outputPath, source.getConceptCount());

    ProgressMeter p = new ProgressMeter(terms.size());
    for (String term : terms) {
        Vector vector = source.getVector(term);
        esaWriter.put(term, vector);

        p.next();
        System.out.println("[" + term + "] " + p);
    }

    esaWriter.close();
}