Example usage for java.util Set size

Introduction

In this page you can find the example usage for java.util Set size.

Prototype

int size();

Source Link

Document

Returns the number of elements in this set (its cardinality).

Usage

From source file:fr.inria.atlanmod.kyanos.benchmarks.ase2015.NeoEMFMapQueryGetBranchStatements.java

public static void main(String[] args) {
    Options options = new Options();

    Option inputOpt = OptionBuilder.create(IN);
    inputOpt.setArgName("INPUT");
    inputOpt.setDescription("Input Kyanos resource directory");
    inputOpt.setArgs(1);// w ww .  j a v a 2  s .  c  o  m
    inputOpt.setRequired(true);

    Option inClassOpt = OptionBuilder.create(EPACKAGE_CLASS);
    inClassOpt.setArgName("CLASS");
    inClassOpt.setDescription("FQN of EPackage implementation class");
    inClassOpt.setArgs(1);
    inClassOpt.setRequired(true);

    Option optFileOpt = OptionBuilder.create(OPTIONS_FILE);
    optFileOpt.setArgName("FILE");
    optFileOpt.setDescription("Properties file holding the options to be used in the Kyanos Resource");
    optFileOpt.setArgs(1);

    options.addOption(inputOpt);
    options.addOption(inClassOpt);
    options.addOption(optFileOpt);

    CommandLineParser parser = new PosixParser();

    try {
        PersistenceBackendFactoryRegistry.getFactories().put(NeoMapURI.NEO_MAP_SCHEME,
                new MapPersistenceBackendFactory());

        CommandLine commandLine = parser.parse(options, args);

        URI uri = NeoMapURI.createNeoMapURI(new File(commandLine.getOptionValue(IN)));

        Class<?> inClazz = NeoEMFMapQueryGetBranchStatements.class.getClassLoader()
                .loadClass(commandLine.getOptionValue(EPACKAGE_CLASS));
        inClazz.getMethod("init").invoke(null);

        ResourceSet resourceSet = new ResourceSetImpl();
        resourceSet.getResourceFactoryRegistry().getProtocolToFactoryMap().put(NeoMapURI.NEO_MAP_SCHEME,
                PersistentResourceFactory.eINSTANCE);

        Resource resource = resourceSet.createResource(uri);

        Map<String, Object> loadOpts = new HashMap<String, Object>();

        if (commandLine.hasOption(OPTIONS_FILE)) {
            Properties properties = new Properties();
            properties.load(new FileInputStream(new File(commandLine.getOptionValue(OPTIONS_FILE))));
            for (final Entry<Object, Object> entry : properties.entrySet()) {
                loadOpts.put((String) entry.getKey(), (String) entry.getValue());
            }
        }
        // Add the LoadedObjectCounter store
        List<StoreOption> storeOptions = new ArrayList<StoreOption>();
        //         storeOptions.add(PersistentResourceOptions.EStoreOption.LOADED_OBJECT_COUNTER_LOGGING);
        storeOptions.add(MapResourceOptions.EStoreMapOption.AUTOCOMMIT);
        storeOptions.add(PersistentResourceOptions.EStoreOption.ESTRUCUTRALFEATURE_CACHING);
        storeOptions.add(PersistentResourceOptions.EStoreOption.IS_SET_CACHING);
        storeOptions.add(PersistentResourceOptions.EStoreOption.SIZE_CACHING);
        loadOpts.put(PersistentResourceOptions.STORE_OPTIONS, storeOptions);
        resource.load(loadOpts);
        {
            Runtime.getRuntime().gc();
            long initialUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
            LOG.log(Level.INFO, MessageFormat.format("Used memory before query: {0}",
                    MessageUtil.byteCountToDisplaySize(initialUsedMemory)));
            LOG.log(Level.INFO, "Start query");
            long begin = System.currentTimeMillis();
            Set<TextElement> list = ASE2015JavaQueries.getCommentsTagContent(resource);
            long end = System.currentTimeMillis();
            LOG.log(Level.INFO, "End query");
            LOG.log(Level.INFO, MessageFormat.format("Query result contains {0} elements", list.size()));
            LOG.log(Level.INFO, MessageFormat.format("Time spent: {0}", MessageUtil.formatMillis(end - begin)));
            Runtime.getRuntime().gc();
            long finalUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
            LOG.log(Level.INFO, MessageFormat.format("Used memory after query: {0}",
                    MessageUtil.byteCountToDisplaySize(finalUsedMemory)));
            LOG.log(Level.INFO, MessageFormat.format("Memory use increase: {0}",
                    MessageUtil.byteCountToDisplaySize(finalUsedMemory - initialUsedMemory)));
        }

        if (resource instanceof PersistentResourceImpl) {
            PersistentResourceImpl.shutdownWithoutUnload((PersistentResourceImpl) resource);
        } else {
            resource.unload();
        }

    } catch (ParseException e) {
        MessageUtil.showError(e.toString());
        MessageUtil.showError("Current arguments: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("java -jar <this-file.jar>", options, true);
    } catch (Throwable e) {
        MessageUtil.showError(e.toString());
    }
}

From source file:Main.java

public static void main(String[] args) {

    Set<String> s1 = new HashSet<>();

    // Add a few elements
    s1.add("HTML");
    s1.add("CSS");
    s1.add("XML");
    s1.add("XML"); // Duplicate

    // Create another set by copying s1
    Set<String> s2 = new HashSet<>(s1);
    // Add a few more elements 
    s2.add("Java");
    s2.add("SQL");
    s2.add(null); // one null is fine
    s2.add(null); // Duplicate

    System.out.println("s1: " + s1);
    System.out.println("s1.size(): " + s1.size());

    System.out.println("s2: " + s2);
    System.out.println("s2.size(): " + s2.size());
}

From source file:ISMAGS.CommandLineInterface.java

public static void main(String[] args) throws IOException {
    String folder = null, files = null, motifspec = null, output = null;

    Options opts = new Options();
    opts.addOption("folder", true, "Folder name");
    opts.addOption("linkfiles", true,
            "Link files seperated by spaces (format: linktype[char] directed[d/u] filename)");
    opts.addOption("motif", true, "Motif description by two strings (format: linktypes)");
    opts.addOption("output", true, "Output file name");

    CommandLineParser parser = new PosixParser();
    try {/*from   w w  w .j  a  va2 s  .c o  m*/
        CommandLine cmd = parser.parse(opts, args);
        if (cmd.hasOption("folder")) {
            folder = cmd.getOptionValue("folder");
        }
        if (cmd.hasOption("linkfiles")) {
            files = cmd.getOptionValue("linkfiles");
        }
        if (cmd.hasOption("motif")) {
            motifspec = cmd.getOptionValue("motif");
        }
        if (cmd.hasOption("output")) {
            output = cmd.getOptionValue("output");
        }
    } catch (ParseException e) {
        Die("Error: Parsing error");
    }

    if (print) {
        printBanner(folder, files, motifspec, output);
    }

    if (folder == null || files == null || motifspec == null || output == null) {
        Die("Error: not all options are provided");
    } else {
        ArrayList<String> linkfiles = new ArrayList<String>();
        ArrayList<String> linkTypes = new ArrayList<String>();
        ArrayList<String> sourcenetworks = new ArrayList<String>();
        ArrayList<String> destinationnetworks = new ArrayList<String>();
        ArrayList<Boolean> directed = new ArrayList<Boolean>();
        StringTokenizer st = new StringTokenizer(files, " ");
        while (st.hasMoreTokens()) {
            linkTypes.add(st.nextToken());
            directed.add(st.nextToken().equals("d"));
            sourcenetworks.add(st.nextToken());
            destinationnetworks.add(st.nextToken());
            linkfiles.add(folder + st.nextToken());
        }
        ArrayList<LinkType> allLinkTypes = new ArrayList<LinkType>();
        HashMap<Character, LinkType> typeTranslation = new HashMap<Character, LinkType>();
        for (int i = 0; i < linkTypes.size(); i++) {
            String n = linkTypes.get(i);
            char nn = n.charAt(0);
            LinkType t = typeTranslation.get(nn);
            if (t == null) {
                t = new LinkType(directed.get(i), n, i, nn, sourcenetworks.get(i), destinationnetworks.get(i));
            }
            allLinkTypes.add(t);
            typeTranslation.put(nn, t);
        }
        if (print) {
            System.out.println("Reading network..");
        }
        Network network = Network.readNetworkFromFiles(linkfiles, allLinkTypes);

        Motif motif = getMotif(motifspec, typeTranslation);

        if (print) {
            System.out.println("Starting the search..");
        }
        MotifFinder mf = new MotifFinder(network);
        long tijd = System.nanoTime();
        Set<MotifInstance> motifs = mf.findMotif(motif, false);
        tijd = System.nanoTime() - tijd;
        if (print) {
            System.out.println("Completed search in " + tijd / 1000000 + " milliseconds");
        }
        if (print) {
            System.out.println("Found " + motifs.size() + " instances of " + motifspec + " motif");
        }
        if (print) {
            System.out.println("Writing instances to file: " + output);
        }
        printMotifs(motifs, output);
        if (print) {
            System.out.println("Done.");
        }
        //            Set<MotifInstance> motifs=null;
        //            MotifFinder mf=null;
        //            System.out.println("Starting the search..");
        //            long tstart = System.nanoTime();
        //            for (int i = 0; i < it; i++) {
        //
        //                mf = new MotifFinder(network, allLinkTypes, true);
        //                motifs = mf.findMotif(motif);
        //            }
        //
        //            long tend = System.nanoTime();
        //            double time_in_ms = (tend - tstart) / 1000000.0;
        //            System.out.println("Found " + mf.totalFound + " motifs, " + time_in_ms + " ms");
        ////        System.out.println("Evaluated " + mf.totalNrMappedNodes+ " search nodes");
        ////        System.out.println("Found " + motifs.size() + " motifs, " + time_in_ms + " ms");
        //            printMotifs(motifs, output);

    }

}

From source file:hyperloglog.tools.HyperLogLogCLI.java

public static void main(String[] args) {
    Options options = new Options();
    addOptions(options);//from w w w  .  j  a  v a2 s. com

    CommandLineParser parser = new BasicParser();
    CommandLine cli = null;
    long n = 0;
    long seed = 123;
    EncodingType enc = EncodingType.SPARSE;
    int p = 14;
    int hb = 64;
    boolean bitPack = true;
    boolean noBias = true;
    int unique = -1;
    String filePath = null;
    BufferedReader br = null;
    String outFile = null;
    String inFile = null;
    FileOutputStream fos = null;
    DataOutputStream out = null;
    FileInputStream fis = null;
    DataInputStream in = null;
    try {
        cli = parser.parse(options, args);

        if (!(cli.hasOption('n') || cli.hasOption('f') || cli.hasOption('d'))) {
            System.out.println("Example usage: hll -n 1000 " + "<OR> hll -f /tmp/input.txt "
                    + "<OR> hll -d -i /tmp/out.hll");
            usage(options);
            return;
        }

        if (cli.hasOption('n')) {
            n = Long.parseLong(cli.getOptionValue('n'));
        }

        if (cli.hasOption('e')) {
            String value = cli.getOptionValue('e');
            if (value.equals(EncodingType.DENSE.name())) {
                enc = EncodingType.DENSE;
            }
        }

        if (cli.hasOption('p')) {
            p = Integer.parseInt(cli.getOptionValue('p'));
            if (p < 4 && p > 16) {
                System.out.println("Warning! Out-of-range value specified for p. Using to p=14.");
                p = 14;
            }
        }

        if (cli.hasOption('h')) {
            hb = Integer.parseInt(cli.getOptionValue('h'));
        }

        if (cli.hasOption('c')) {
            noBias = Boolean.parseBoolean(cli.getOptionValue('c'));
        }

        if (cli.hasOption('b')) {
            bitPack = Boolean.parseBoolean(cli.getOptionValue('b'));
        }

        if (cli.hasOption('f')) {
            filePath = cli.getOptionValue('f');
            br = new BufferedReader(new FileReader(new File(filePath)));
        }

        if (filePath != null && cli.hasOption('n')) {
            System.out.println("'-f' (input file) specified. Ignoring -n.");
        }

        if (cli.hasOption('s')) {
            if (cli.hasOption('o')) {
                outFile = cli.getOptionValue('o');
                fos = new FileOutputStream(new File(outFile));
                out = new DataOutputStream(fos);
            } else {
                System.err.println("Specify output file. Example usage: hll -s -o /tmp/out.hll");
                usage(options);
                return;
            }
        }

        if (cli.hasOption('d')) {
            if (cli.hasOption('i')) {
                inFile = cli.getOptionValue('i');
                fis = new FileInputStream(new File(inFile));
                in = new DataInputStream(fis);
            } else {
                System.err.println("Specify input file. Example usage: hll -d -i /tmp/in.hll");
                usage(options);
                return;
            }
        }

        // return after deserialization
        if (fis != null && in != null) {
            long start = System.currentTimeMillis();
            HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
            long end = System.currentTimeMillis();
            System.out.println(deserializedHLL.toString());
            System.out.println("Count after deserialization: " + deserializedHLL.count());
            System.out.println("Deserialization time: " + (end - start) + " ms");
            return;
        }

        // construct hll and serialize it if required
        HyperLogLog hll = HyperLogLog.builder().enableBitPacking(bitPack).enableNoBias(noBias).setEncoding(enc)
                .setNumHashBits(hb).setNumRegisterIndexBits(p).build();

        if (br != null) {
            Set<String> hashset = new HashSet<String>();
            String line;
            while ((line = br.readLine()) != null) {
                hll.addString(line);
                hashset.add(line);
            }
            n = hashset.size();
        } else {
            Random rand = new Random(seed);
            for (int i = 0; i < n; i++) {
                if (unique < 0) {
                    hll.addLong(rand.nextLong());
                } else {
                    int val = rand.nextInt(unique);
                    hll.addLong(val);
                }
            }
        }

        long estCount = hll.count();
        System.out.println("Actual count: " + n);
        System.out.println(hll.toString());
        System.out.println("Relative error: " + HyperLogLogUtils.getRelativeError(n, estCount) + "%");
        if (fos != null && out != null) {
            long start = System.currentTimeMillis();
            HyperLogLogUtils.serializeHLL(out, hll);
            long end = System.currentTimeMillis();
            System.out.println("Serialized hyperloglog to " + outFile);
            System.out.println("Serialized size: " + out.size() + " bytes");
            System.out.println("Serialization time: " + (end - start) + " ms");
            out.close();
        }
    } catch (ParseException e) {
        System.err.println("Invalid parameter.");
        usage(options);
    } catch (NumberFormatException e) {
        System.err.println("Invalid type for parameter.");
        usage(options);
    } catch (FileNotFoundException e) {
        System.err.println("Specified file not found.");
        usage(options);
    } catch (IOException e) {
        System.err.println("Exception occured while reading file.");
        usage(options);
    }
}

From source file:act.installer.bing.BingSearchRanker.java

public static void main(final String[] args) throws Exception {

    // Parse the command line options
    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/* w  ww  . j a va2s . c  om*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s\n", e.getMessage());
        HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
        return;
    }

    String inputPath = cl.getOptionValue(OPTION_INPUT_FILEPATH);
    String outputPath = cl.getOptionValue(OPTION_OUTPUT_FILEPATH);
    Boolean isTSVInput = cl.hasOption(OPTION_TSV_INPUT);

    // Read the molecule corpus
    LOGGER.info("Reading the input molecule corpus");
    MoleculeCorpus moleculeCorpus = new MoleculeCorpus();
    if (isTSVInput) {
        LOGGER.info("Input format is TSV");
        moleculeCorpus.buildCorpusFromTSVFile(inputPath);
    } else {
        LOGGER.info("Input format is raw InChIs");
        moleculeCorpus.buildCorpusFromRawInchis(inputPath);
    }

    // Get the inchi set
    Set<String> inchis = moleculeCorpus.getMolecules();
    LOGGER.info("Found %d molecules in the input corpus", inchis.size());

    // Update the Bing Search results in the Installer database
    BingSearchRanker bingSearchRanker = new BingSearchRanker(cl.hasOption(OPTION_INCLUDE_CHEBI_APPLICATIONS),
            cl.hasOption(OPTION_INCLUDE_WIKIPEDIA_URL), cl.hasOption(OPTION_INCLUDE_USAGE_EXPLORER_URL),
            cl.hasOption(OPTION_FORCE_UPDATE));
    LOGGER.info("Updating the Bing Search results in the Installer database");
    bingSearchRanker.addBingSearchResults(inchis);
    LOGGER.info("Done updating the Bing Search results");

    // Write the results in a TSV file
    LOGGER.info("Writing results to output file");
    bingSearchRanker.writeBingSearchRanksAsTSV(inchis, outputPath);
    LOGGER.info("Bing Search ranker is done. \"I'm tired, boss.\"");
}

From source file:SetTest.java

public static void main(String[] args) {
    Set<String> words = new HashSet<String>(); // HashSet implements Set
    long totalTime = 0;

    Scanner in = new Scanner(System.in);
    while (in.hasNext()) {
        String word = in.next();//from ww w. j  a  va2s  .  c om
        long callTime = System.currentTimeMillis();
        words.add(word);
        callTime = System.currentTimeMillis() - callTime;
        totalTime += callTime;
    }

    Iterator<String> iter = words.iterator();
    for (int i = 1; i <= 20 && iter.hasNext(); i++)
        System.out.println(iter.next());
    System.out.println(". . .");
    System.out.println(words.size() + " distinct words. " + totalTime + " milliseconds.");
}

From source file:edu.umass.cs.gigapaxos.PaxosPacketBatcher.java

/**
 * @param args/*from www  .  j  a  v  a2s  .  c  o m*/
 */
public static void main(String[] args) {
    Util.assertAssertionsEnabled();
    Ballot b1 = new Ballot(23, 456);
    Ballot b2 = new Ballot(23, 456);
    assert (b1.equals(b2));
    Set<Ballot> bset = new HashSet<Ballot>();
    bset.add(b1);
    assert (bset.contains(b1));
    assert (bset.contains(b2));
    bset.add(b2);
    assert (bset.size() == 1) : bset.size();
}

From source file:com.termmed.sampling.ConceptsWithMoreThanThreeRoleGroups.java

/**
 * The main method./*from w  w w. j  a v a2  s.  c  o m*/
 *
 * @param args the arguments
 * @throws Exception the exception
 */
public static void main(String[] args) throws Exception {
    System.out.println("Starting...");
    Map<String, Set<String>> groupsMap = new HashMap<String, Set<String>>();
    File relsFile = new File(
            "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20160131-1/Snapshot/Terminology/sct2_Relationship_Snapshot_INT_20160131.txt");
    BufferedReader br2 = new BufferedReader(new FileReader(relsFile));
    String line2;
    int count2 = 0;
    while ((line2 = br2.readLine()) != null) {
        // process the line.
        count2++;
        if (count2 % 10000 == 0) {
            //System.out.println(count2);
        }
        List<String> columns = Arrays.asList(line2.split("\t", -1));
        if (columns.size() >= 6) {
            if (columns.get(2).equals("1") && !columns.get(6).equals("0")) {
                if (!groupsMap.containsKey(columns.get(4))) {
                    groupsMap.put(columns.get(4), new HashSet<String>());
                }
                groupsMap.get(columns.get(4)).add(columns.get(6));
            }
        }
    }
    System.out.println("Relationship groups loaded");
    Gson gson = new Gson();
    System.out.println("Reading JSON 1");
    File crossoverFile1 = new File("/Users/alo/Downloads/crossover_role_to_group.json");
    String contents = FileUtils.readFileToString(crossoverFile1, "utf-8");
    Type collectionType = new TypeToken<Collection<ControlResultLine>>() {
    }.getType();
    List<ControlResultLine> lineObject = gson.fromJson(contents, collectionType);
    Set<String> crossovers1 = new HashSet<String>();
    for (ControlResultLine loopResult : lineObject) {
        crossovers1.add(loopResult.conceptId);
    }
    System.out.println("Crossovers 1 loaded, " + lineObject.size() + " Objects");

    System.out.println("Reading JSON 2");
    File crossoverFile2 = new File("/Users/alo/Downloads/crossover_group_to_group.json");
    String contents2 = FileUtils.readFileToString(crossoverFile2, "utf-8");
    List<ControlResultLine> lineObject2 = gson.fromJson(contents2, collectionType);
    Set<String> crossovers2 = new HashSet<String>();
    for (ControlResultLine loopResult : lineObject2) {
        crossovers2.add(loopResult.conceptId);
    }
    System.out.println("Crossovers 2 loaded, " + lineObject2.size() + " Objects");

    Set<String> foundConcepts = new HashSet<String>();
    int count3 = 0;
    BufferedWriter writer = new BufferedWriter(
            new FileWriter(new File("ConceptsWithMoreThanThreeRoleGroups.csv")));
    ;
    for (String loopConcept : groupsMap.keySet()) {
        if (groupsMap.get(loopConcept).size() > 3) {
            writer.write(loopConcept);
            writer.newLine();
            foundConcepts.add(loopConcept);
            count3++;
        }
    }
    writer.close();
    System.out.println("Found " + foundConcepts.size() + " concepts");

    int countCrossover1 = 0;
    for (String loopConcept : foundConcepts) {
        if (crossovers1.contains(loopConcept)) {
            countCrossover1++;
        }
    }
    System.out.println(countCrossover1 + " are present in crossover_role_to_group");

    int countCrossover2 = 0;
    for (String loopConcept : foundConcepts) {
        if (crossovers2.contains(loopConcept)) {
            countCrossover2++;
        }
    }
    System.out.println(countCrossover2 + " are present in crossover_group_to_group");

    System.out.println("Done");
}

From source file:it.units.malelab.ege.MappingPropertiesExperimenter.java

public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
    final int n = 10000;
    final int nDist = 10000;
    //prepare problems and methods
    List<String> problems = Lists.newArrayList("bool-parity5", "bool-mopm3", "sr-keijzer6", "sr-nguyen7",
            "sr-pagie1", "sr-vladislavleva4", "other-klandscapes3", "other-klandscapes7", "other-text");
    List<String> mappers = new ArrayList<>();
    for (int gs : new int[] { 64, 128, 256, 512, 1024 }) {
        mappers.add("ge-" + gs + "-2");
        mappers.add("ge-" + gs + "-4");
        mappers.add("ge-" + gs + "-8");
        mappers.add("ge-" + gs + "-12");
        mappers.add("pige-" + gs + "-4");
        mappers.add("pige-" + gs + "-8");
        mappers.add("pige-" + gs + "-16");
        mappers.add("pige-" + gs + "-24");
        mappers.add("hge-" + gs + "-0");
        mappers.add("whge-" + gs + "-2");
        mappers.add("whge-" + gs + "-3");
        mappers.add("whge-" + gs + "-5");
    }// w  ww  . j a va2 s .  c om
    mappers.add("sge-0-5");
    mappers.add("sge-0-6");
    mappers.add("sge-0-7");
    mappers.add("sge-0-8");
    mappers.clear();
    mappers.addAll(Lists.newArrayList("ge-1024-8", "pige-1024-16", "hge-1024-0", "whge-1024-3", "sge-0-6"));
    PrintStream filePrintStream = null;
    if (args.length > 0) {
        filePrintStream = new PrintStream(args[0]);
    } else {
        filePrintStream = System.out;
    }
    filePrintStream.printf("problem;mapper;genotypeSize;param;property;value%n");
    //prepare distances
    Distance<Node<String>> phenotypeDistance = new CachedDistance<>(new LeavesEdit<String>());
    Distance<Sequence> genotypeDistance = new CachedDistance<>(new Hamming());
    //iterate
    for (String problemName : problems) {
        for (String mapperName : mappers) {
            System.out.printf("%20.20s, %20.20s", problemName, mapperName);
            //build problem
            Problem<String, NumericFitness> problem = null;
            if (problemName.equals("bool-parity5")) {
                problem = new Parity(5);
            } else if (problemName.equals("bool-mopm3")) {
                problem = new MultipleOutputParallelMultiplier(3);
            } else if (problemName.equals("sr-keijzer6")) {
                problem = new HarmonicCurve();
            } else if (problemName.equals("sr-nguyen7")) {
                problem = new Nguyen7(1);
            } else if (problemName.equals("sr-pagie1")) {
                problem = new Pagie1();
            } else if (problemName.equals("sr-vladislavleva4")) {
                problem = new Vladislavleva4(1);
            } else if (problemName.equals("other-klandscapes3")) {
                problem = new KLandscapes(3);
            } else if (problemName.equals("other-klandscapes7")) {
                problem = new KLandscapes(7);
            } else if (problemName.equals("other-text")) {
                problem = new Text();
            }
            //build configuration and evolver
            Mapper mapper = null;
            int genotypeSize = Integer.parseInt(mapperName.split("-")[1]);
            int mapperMainParam = Integer.parseInt(mapperName.split("-")[2]);
            if (mapperName.split("-")[0].equals("ge")) {
                mapper = new StandardGEMapper<>(mapperMainParam, 1, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("pige")) {
                mapper = new PiGEMapper<>(mapperMainParam, 1, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("sge")) {
                mapper = new SGEMapper<>(mapperMainParam, problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("hge")) {
                mapper = new HierarchicalMapper<>(problem.getGrammar());
            } else if (mapperName.split("-")[0].equals("whge")) {
                mapper = new WeightedHierarchicalMapper<>(mapperMainParam, false, true, problem.getGrammar());
            }
            //prepare things
            Random random = new Random(1);
            Set<Sequence> genotypes = new LinkedHashSet<>(n);
            //build genotypes
            if (mapperName.split("-")[0].equals("sge")) {
                SGEGenotypeFactory<String> factory = new SGEGenotypeFactory<>((SGEMapper) mapper);
                while (genotypes.size() < n) {
                    genotypes.add(factory.build(random));
                }
                genotypeSize = factory.getBitSize();
            } else {
                BitsGenotypeFactory factory = new BitsGenotypeFactory(genotypeSize);
                while (genotypes.size() < n) {
                    genotypes.add(factory.build(random));
                }
            }
            //build and fill map
            Multimap<Node<String>, Sequence> multimap = HashMultimap.create();
            int progress = 0;
            for (Sequence genotype : genotypes) {
                Node<String> phenotype;
                try {
                    if (mapperName.split("-")[0].equals("sge")) {
                        phenotype = mapper.map((SGEGenotype<String>) genotype, new HashMap<>());
                    } else {
                        phenotype = mapper.map((BitsGenotype) genotype, new HashMap<>());
                    }
                } catch (MappingException e) {
                    phenotype = Node.EMPTY_TREE;
                }
                multimap.put(phenotype, genotype);
                progress = progress + 1;
                if (progress % Math.round(n / 10) == 0) {
                    System.out.print(".");
                }
            }
            System.out.println();
            //compute distances
            List<Pair<Double, Double>> allDistances = new ArrayList<>();
            List<Pair<Double, Double>> allValidDistances = new ArrayList<>();
            Multimap<Node<String>, Double> genotypeDistances = ArrayListMultimap.create();
            for (Node<String> phenotype : multimap.keySet()) {
                for (Sequence genotype1 : multimap.get(phenotype)) {
                    for (Sequence genotype2 : multimap.get(phenotype)) {
                        double gDistance = genotypeDistance.d(genotype1, genotype2);
                        genotypeDistances.put(phenotype, gDistance);
                        if (genotypeDistances.get(phenotype).size() > nDist) {
                            break;
                        }
                    }
                    if (genotypeDistances.get(phenotype).size() > nDist) {
                        break;
                    }
                }
            }
            List<Map.Entry<Node<String>, Sequence>> entries = new ArrayList<>(multimap.entries());
            Collections.shuffle(entries, random);
            for (Map.Entry<Node<String>, Sequence> entry1 : entries) {
                for (Map.Entry<Node<String>, Sequence> entry2 : entries) {
                    double gDistance = genotypeDistance.d(entry1.getValue(), entry2.getValue());
                    double pDistance = phenotypeDistance.d(entry1.getKey(), entry2.getKey());
                    allDistances.add(new Pair<>(gDistance, pDistance));
                    if (!Node.EMPTY_TREE.equals(entry1.getKey()) && !Node.EMPTY_TREE.equals(entry2.getKey())) {
                        allValidDistances.add(new Pair<>(gDistance, pDistance));
                    }
                    if (allDistances.size() > nDist) {
                        break;
                    }
                }
                if (allDistances.size() > nDist) {
                    break;
                }
            }
            //compute properties
            double invalidity = (double) multimap.get(Node.EMPTY_TREE).size() / (double) genotypes.size();
            double redundancy = 1 - (double) multimap.keySet().size() / (double) genotypes.size();
            double validRedundancy = redundancy;
            if (multimap.keySet().contains(Node.EMPTY_TREE)) {
                validRedundancy = 1 - ((double) multimap.keySet().size() - 1d)
                        / (double) (genotypes.size() - multimap.get(Node.EMPTY_TREE).size());
            }
            double locality = Utils.pearsonCorrelation(allDistances);
            double validLocality = Utils.pearsonCorrelation(allValidDistances);
            double[] sizes = new double[multimap.keySet().size()];
            double[] meanGenotypeDistances = new double[multimap.keySet().size()];
            int invalidIndex = -1;
            int c = 0;
            for (Node<String> phenotype : multimap.keySet()) {
                if (Node.EMPTY_TREE.equals(phenotype)) {
                    invalidIndex = c;
                }
                sizes[c] = multimap.get(phenotype).size();
                double[] distances = new double[genotypeDistances.get(phenotype).size()];
                int k = 0;
                for (Double distance : genotypeDistances.get(phenotype)) {
                    distances[k] = distance;
                    k = k + 1;
                }
                meanGenotypeDistances[c] = StatUtils.mean(distances);
                c = c + 1;
            }
            double nonUniformity = Math.sqrt(StatUtils.variance(sizes)) / StatUtils.mean(sizes);
            double nonSynonymousity = StatUtils.mean(meanGenotypeDistances)
                    / StatUtils.mean(firsts(allDistances));
            double validNonUniformity = nonUniformity;
            double validNonSynonymousity = nonSynonymousity;
            if (invalidIndex != -1) {
                double[] validSizes = new double[multimap.keySet().size() - 1];
                double[] validMeanGenotypeDistances = new double[multimap.keySet().size() - 1];
                if (invalidIndex > 0) {
                    System.arraycopy(sizes, 0, validSizes, 0, invalidIndex);
                    System.arraycopy(meanGenotypeDistances, 0, validMeanGenotypeDistances, 0, invalidIndex);
                }
                System.arraycopy(sizes, invalidIndex + 1, validSizes, invalidIndex,
                        sizes.length - invalidIndex - 1);
                System.arraycopy(meanGenotypeDistances, invalidIndex + 1, validMeanGenotypeDistances,
                        invalidIndex, meanGenotypeDistances.length - invalidIndex - 1);
                validNonUniformity = Math.sqrt(StatUtils.variance(validSizes)) / StatUtils.mean(validSizes);
                validNonSynonymousity = StatUtils.mean(validMeanGenotypeDistances)
                        / StatUtils.mean(firsts(allValidDistances));
            }
            //compute locality
            filePrintStream.printf("%s;%s;%d;%d;invalidity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, invalidity);
            filePrintStream.printf("%s;%s;%d;%d;redundancy;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, redundancy);
            filePrintStream.printf("%s;%s;%d;%d;validRedundancy;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, validRedundancy);
            filePrintStream.printf("%s;%s;%d;%d;locality;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, locality);
            filePrintStream.printf("%s;%s;%d;%d;validLLocality;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, validLocality);
            filePrintStream.printf("%s;%s;%d;%d;nonUniformity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, nonUniformity);
            filePrintStream.printf("%s;%s;%d;%d;validNonUniformity;%f %n", problemName,
                    mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonUniformity);
            filePrintStream.printf("%s;%s;%d;%d;nonSynonymousity;%f %n", problemName, mapperName.split("-")[0],
                    genotypeSize, mapperMainParam, nonSynonymousity);
            filePrintStream.printf("%s;%s;%d;%d;validNonSynonymousity;%f %n", problemName,
                    mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonSynonymousity);
        }
    }
    if (filePrintStream != null) {
        filePrintStream.close();
    }
}

From source file:dkpro.similarity.algorithms.vsm.store.convert.ConvertLuceneToVectorIndex.java

public static void main(String[] args) throws Exception {
    File inputPath = new File(args[0]);
    File outputPath = new File(args[1]);

    deleteQuietly(outputPath);/*from w w  w . j a  va2  s  . c o  m*/
    outputPath.mkdirs();

    boolean ignoreNumerics = true;
    boolean ignoreCardinal = true;
    boolean ignoreMonetary = true;
    int minTermLength = 3;
    int minDocFreq = 5;

    System.out.println("Quality criteria");
    System.out.println("Minimum term length            : " + minTermLength);
    System.out.println("Minimum document frequency     : " + minDocFreq);
    System.out.println("Ignore numeric tokens          : " + ignoreNumerics);
    System.out.println("Ignore cardinal numeric tokens : " + ignoreNumerics);
    System.out.println("Ignore money values            : " + ignoreMonetary);

    System.out.print("Fetching terms list... ");

    IndexReader reader = IndexReader.open(FSDirectory.open(inputPath));
    TermEnum termEnum = reader.terms();
    Set<String> terms = new HashSet<String>();
    int ignoredTerms = 0;
    while (termEnum.next()) {
        String term = termEnum.term().text();
        if (((minTermLength > 0) && (term.length() < minTermLength)) || (ignoreCardinal && isCardinal(term))
                || (ignoreMonetary && isMonetary(term)) || (ignoreNumerics && isNumericSpace(term))
                || ((minDocFreq > 0) && (termEnum.docFreq() < minDocFreq))) {
            ignoredTerms++;
            continue;
        }

        terms.add(term);
    }
    reader.close();

    System.out.println(terms.size() + " terms found. " + ignoredTerms + " terms ignored.");

    System.out.println("Opening source ESA index " + inputPath);
    VectorReader source = new LuceneVectorReader(inputPath);
    System.out.println("Opening destination ESA index " + inputPath);
    VectorIndexWriter esaWriter = new VectorIndexWriter(outputPath, source.getConceptCount());

    ProgressMeter p = new ProgressMeter(terms.size());
    for (String term : terms) {
        Vector vector = source.getVector(term);
        esaWriter.put(term, vector);

        p.next();
        System.out.println("[" + term + "] " + p);
    }

    esaWriter.close();
}