List of usage examples for java.util Set size
int size();
From source file:fr.inria.atlanmod.kyanos.benchmarks.ase2015.NeoEMFMapQueryGetBranchStatements.java
public static void main(String[] args) { Options options = new Options(); Option inputOpt = OptionBuilder.create(IN); inputOpt.setArgName("INPUT"); inputOpt.setDescription("Input Kyanos resource directory"); inputOpt.setArgs(1);// w ww . j a v a 2 s . c o m inputOpt.setRequired(true); Option inClassOpt = OptionBuilder.create(EPACKAGE_CLASS); inClassOpt.setArgName("CLASS"); inClassOpt.setDescription("FQN of EPackage implementation class"); inClassOpt.setArgs(1); inClassOpt.setRequired(true); Option optFileOpt = OptionBuilder.create(OPTIONS_FILE); optFileOpt.setArgName("FILE"); optFileOpt.setDescription("Properties file holding the options to be used in the Kyanos Resource"); optFileOpt.setArgs(1); options.addOption(inputOpt); options.addOption(inClassOpt); options.addOption(optFileOpt); CommandLineParser parser = new PosixParser(); try { PersistenceBackendFactoryRegistry.getFactories().put(NeoMapURI.NEO_MAP_SCHEME, new MapPersistenceBackendFactory()); CommandLine commandLine = parser.parse(options, args); URI uri = NeoMapURI.createNeoMapURI(new File(commandLine.getOptionValue(IN))); Class<?> inClazz = NeoEMFMapQueryGetBranchStatements.class.getClassLoader() .loadClass(commandLine.getOptionValue(EPACKAGE_CLASS)); inClazz.getMethod("init").invoke(null); ResourceSet resourceSet = new ResourceSetImpl(); resourceSet.getResourceFactoryRegistry().getProtocolToFactoryMap().put(NeoMapURI.NEO_MAP_SCHEME, PersistentResourceFactory.eINSTANCE); Resource resource = resourceSet.createResource(uri); Map<String, Object> loadOpts = new HashMap<String, Object>(); if (commandLine.hasOption(OPTIONS_FILE)) { Properties properties = new Properties(); properties.load(new FileInputStream(new File(commandLine.getOptionValue(OPTIONS_FILE)))); for (final Entry<Object, Object> entry : properties.entrySet()) { loadOpts.put((String) entry.getKey(), (String) entry.getValue()); } } // Add the LoadedObjectCounter store List<StoreOption> storeOptions = new ArrayList<StoreOption>(); // storeOptions.add(PersistentResourceOptions.EStoreOption.LOADED_OBJECT_COUNTER_LOGGING); storeOptions.add(MapResourceOptions.EStoreMapOption.AUTOCOMMIT); storeOptions.add(PersistentResourceOptions.EStoreOption.ESTRUCUTRALFEATURE_CACHING); storeOptions.add(PersistentResourceOptions.EStoreOption.IS_SET_CACHING); storeOptions.add(PersistentResourceOptions.EStoreOption.SIZE_CACHING); loadOpts.put(PersistentResourceOptions.STORE_OPTIONS, storeOptions); resource.load(loadOpts); { Runtime.getRuntime().gc(); long initialUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); LOG.log(Level.INFO, MessageFormat.format("Used memory before query: {0}", MessageUtil.byteCountToDisplaySize(initialUsedMemory))); LOG.log(Level.INFO, "Start query"); long begin = System.currentTimeMillis(); Set<TextElement> list = ASE2015JavaQueries.getCommentsTagContent(resource); long end = System.currentTimeMillis(); LOG.log(Level.INFO, "End query"); LOG.log(Level.INFO, MessageFormat.format("Query result contains {0} elements", list.size())); LOG.log(Level.INFO, MessageFormat.format("Time spent: {0}", MessageUtil.formatMillis(end - begin))); Runtime.getRuntime().gc(); long finalUsedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); LOG.log(Level.INFO, MessageFormat.format("Used memory after query: {0}", MessageUtil.byteCountToDisplaySize(finalUsedMemory))); LOG.log(Level.INFO, MessageFormat.format("Memory use increase: {0}", MessageUtil.byteCountToDisplaySize(finalUsedMemory - initialUsedMemory))); } if (resource instanceof PersistentResourceImpl) { PersistentResourceImpl.shutdownWithoutUnload((PersistentResourceImpl) resource); } else { resource.unload(); } } catch (ParseException e) { MessageUtil.showError(e.toString()); MessageUtil.showError("Current arguments: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("java -jar <this-file.jar>", options, true); } catch (Throwable e) { MessageUtil.showError(e.toString()); } }
From source file:Main.java
public static void main(String[] args) { Set<String> s1 = new HashSet<>(); // Add a few elements s1.add("HTML"); s1.add("CSS"); s1.add("XML"); s1.add("XML"); // Duplicate // Create another set by copying s1 Set<String> s2 = new HashSet<>(s1); // Add a few more elements s2.add("Java"); s2.add("SQL"); s2.add(null); // one null is fine s2.add(null); // Duplicate System.out.println("s1: " + s1); System.out.println("s1.size(): " + s1.size()); System.out.println("s2: " + s2); System.out.println("s2.size(): " + s2.size()); }
From source file:ISMAGS.CommandLineInterface.java
public static void main(String[] args) throws IOException { String folder = null, files = null, motifspec = null, output = null; Options opts = new Options(); opts.addOption("folder", true, "Folder name"); opts.addOption("linkfiles", true, "Link files seperated by spaces (format: linktype[char] directed[d/u] filename)"); opts.addOption("motif", true, "Motif description by two strings (format: linktypes)"); opts.addOption("output", true, "Output file name"); CommandLineParser parser = new PosixParser(); try {/*from w w w .j a va2 s .c o m*/ CommandLine cmd = parser.parse(opts, args); if (cmd.hasOption("folder")) { folder = cmd.getOptionValue("folder"); } if (cmd.hasOption("linkfiles")) { files = cmd.getOptionValue("linkfiles"); } if (cmd.hasOption("motif")) { motifspec = cmd.getOptionValue("motif"); } if (cmd.hasOption("output")) { output = cmd.getOptionValue("output"); } } catch (ParseException e) { Die("Error: Parsing error"); } if (print) { printBanner(folder, files, motifspec, output); } if (folder == null || files == null || motifspec == null || output == null) { Die("Error: not all options are provided"); } else { ArrayList<String> linkfiles = new ArrayList<String>(); ArrayList<String> linkTypes = new ArrayList<String>(); ArrayList<String> sourcenetworks = new ArrayList<String>(); ArrayList<String> destinationnetworks = new ArrayList<String>(); ArrayList<Boolean> directed = new ArrayList<Boolean>(); StringTokenizer st = new StringTokenizer(files, " "); while (st.hasMoreTokens()) { linkTypes.add(st.nextToken()); directed.add(st.nextToken().equals("d")); sourcenetworks.add(st.nextToken()); destinationnetworks.add(st.nextToken()); linkfiles.add(folder + st.nextToken()); } ArrayList<LinkType> allLinkTypes = new ArrayList<LinkType>(); HashMap<Character, LinkType> typeTranslation = new HashMap<Character, LinkType>(); for (int i = 0; i < linkTypes.size(); i++) { String n = linkTypes.get(i); char nn = n.charAt(0); LinkType t = typeTranslation.get(nn); if (t == null) { t = new LinkType(directed.get(i), n, i, nn, sourcenetworks.get(i), destinationnetworks.get(i)); } allLinkTypes.add(t); typeTranslation.put(nn, t); } if (print) { System.out.println("Reading network.."); } Network network = Network.readNetworkFromFiles(linkfiles, allLinkTypes); Motif motif = getMotif(motifspec, typeTranslation); if (print) { System.out.println("Starting the search.."); } MotifFinder mf = new MotifFinder(network); long tijd = System.nanoTime(); Set<MotifInstance> motifs = mf.findMotif(motif, false); tijd = System.nanoTime() - tijd; if (print) { System.out.println("Completed search in " + tijd / 1000000 + " milliseconds"); } if (print) { System.out.println("Found " + motifs.size() + " instances of " + motifspec + " motif"); } if (print) { System.out.println("Writing instances to file: " + output); } printMotifs(motifs, output); if (print) { System.out.println("Done."); } // Set<MotifInstance> motifs=null; // MotifFinder mf=null; // System.out.println("Starting the search.."); // long tstart = System.nanoTime(); // for (int i = 0; i < it; i++) { // // mf = new MotifFinder(network, allLinkTypes, true); // motifs = mf.findMotif(motif); // } // // long tend = System.nanoTime(); // double time_in_ms = (tend - tstart) / 1000000.0; // System.out.println("Found " + mf.totalFound + " motifs, " + time_in_ms + " ms"); //// System.out.println("Evaluated " + mf.totalNrMappedNodes+ " search nodes"); //// System.out.println("Found " + motifs.size() + " motifs, " + time_in_ms + " ms"); // printMotifs(motifs, output); } }
From source file:hyperloglog.tools.HyperLogLogCLI.java
public static void main(String[] args) { Options options = new Options(); addOptions(options);//from w w w . j a v a2 s. com CommandLineParser parser = new BasicParser(); CommandLine cli = null; long n = 0; long seed = 123; EncodingType enc = EncodingType.SPARSE; int p = 14; int hb = 64; boolean bitPack = true; boolean noBias = true; int unique = -1; String filePath = null; BufferedReader br = null; String outFile = null; String inFile = null; FileOutputStream fos = null; DataOutputStream out = null; FileInputStream fis = null; DataInputStream in = null; try { cli = parser.parse(options, args); if (!(cli.hasOption('n') || cli.hasOption('f') || cli.hasOption('d'))) { System.out.println("Example usage: hll -n 1000 " + "<OR> hll -f /tmp/input.txt " + "<OR> hll -d -i /tmp/out.hll"); usage(options); return; } if (cli.hasOption('n')) { n = Long.parseLong(cli.getOptionValue('n')); } if (cli.hasOption('e')) { String value = cli.getOptionValue('e'); if (value.equals(EncodingType.DENSE.name())) { enc = EncodingType.DENSE; } } if (cli.hasOption('p')) { p = Integer.parseInt(cli.getOptionValue('p')); if (p < 4 && p > 16) { System.out.println("Warning! Out-of-range value specified for p. Using to p=14."); p = 14; } } if (cli.hasOption('h')) { hb = Integer.parseInt(cli.getOptionValue('h')); } if (cli.hasOption('c')) { noBias = Boolean.parseBoolean(cli.getOptionValue('c')); } if (cli.hasOption('b')) { bitPack = Boolean.parseBoolean(cli.getOptionValue('b')); } if (cli.hasOption('f')) { filePath = cli.getOptionValue('f'); br = new BufferedReader(new FileReader(new File(filePath))); } if (filePath != null && cli.hasOption('n')) { System.out.println("'-f' (input file) specified. Ignoring -n."); } if (cli.hasOption('s')) { if (cli.hasOption('o')) { outFile = cli.getOptionValue('o'); fos = new FileOutputStream(new File(outFile)); out = new DataOutputStream(fos); } else { System.err.println("Specify output file. Example usage: hll -s -o /tmp/out.hll"); usage(options); return; } } if (cli.hasOption('d')) { if (cli.hasOption('i')) { inFile = cli.getOptionValue('i'); fis = new FileInputStream(new File(inFile)); in = new DataInputStream(fis); } else { System.err.println("Specify input file. Example usage: hll -d -i /tmp/in.hll"); usage(options); return; } } // return after deserialization if (fis != null && in != null) { long start = System.currentTimeMillis(); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); long end = System.currentTimeMillis(); System.out.println(deserializedHLL.toString()); System.out.println("Count after deserialization: " + deserializedHLL.count()); System.out.println("Deserialization time: " + (end - start) + " ms"); return; } // construct hll and serialize it if required HyperLogLog hll = HyperLogLog.builder().enableBitPacking(bitPack).enableNoBias(noBias).setEncoding(enc) .setNumHashBits(hb).setNumRegisterIndexBits(p).build(); if (br != null) { Set<String> hashset = new HashSet<String>(); String line; while ((line = br.readLine()) != null) { hll.addString(line); hashset.add(line); } n = hashset.size(); } else { Random rand = new Random(seed); for (int i = 0; i < n; i++) { if (unique < 0) { hll.addLong(rand.nextLong()); } else { int val = rand.nextInt(unique); hll.addLong(val); } } } long estCount = hll.count(); System.out.println("Actual count: " + n); System.out.println(hll.toString()); System.out.println("Relative error: " + HyperLogLogUtils.getRelativeError(n, estCount) + "%"); if (fos != null && out != null) { long start = System.currentTimeMillis(); HyperLogLogUtils.serializeHLL(out, hll); long end = System.currentTimeMillis(); System.out.println("Serialized hyperloglog to " + outFile); System.out.println("Serialized size: " + out.size() + " bytes"); System.out.println("Serialization time: " + (end - start) + " ms"); out.close(); } } catch (ParseException e) { System.err.println("Invalid parameter."); usage(options); } catch (NumberFormatException e) { System.err.println("Invalid type for parameter."); usage(options); } catch (FileNotFoundException e) { System.err.println("Specified file not found."); usage(options); } catch (IOException e) { System.err.println("Exception occured while reading file."); usage(options); } }
From source file:act.installer.bing.BingSearchRanker.java
public static void main(final String[] args) throws Exception { // Parse the command line options Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/* w ww . j a va2s . c om*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(BingSearchRanker.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } String inputPath = cl.getOptionValue(OPTION_INPUT_FILEPATH); String outputPath = cl.getOptionValue(OPTION_OUTPUT_FILEPATH); Boolean isTSVInput = cl.hasOption(OPTION_TSV_INPUT); // Read the molecule corpus LOGGER.info("Reading the input molecule corpus"); MoleculeCorpus moleculeCorpus = new MoleculeCorpus(); if (isTSVInput) { LOGGER.info("Input format is TSV"); moleculeCorpus.buildCorpusFromTSVFile(inputPath); } else { LOGGER.info("Input format is raw InChIs"); moleculeCorpus.buildCorpusFromRawInchis(inputPath); } // Get the inchi set Set<String> inchis = moleculeCorpus.getMolecules(); LOGGER.info("Found %d molecules in the input corpus", inchis.size()); // Update the Bing Search results in the Installer database BingSearchRanker bingSearchRanker = new BingSearchRanker(cl.hasOption(OPTION_INCLUDE_CHEBI_APPLICATIONS), cl.hasOption(OPTION_INCLUDE_WIKIPEDIA_URL), cl.hasOption(OPTION_INCLUDE_USAGE_EXPLORER_URL), cl.hasOption(OPTION_FORCE_UPDATE)); LOGGER.info("Updating the Bing Search results in the Installer database"); bingSearchRanker.addBingSearchResults(inchis); LOGGER.info("Done updating the Bing Search results"); // Write the results in a TSV file LOGGER.info("Writing results to output file"); bingSearchRanker.writeBingSearchRanksAsTSV(inchis, outputPath); LOGGER.info("Bing Search ranker is done. \"I'm tired, boss.\""); }
From source file:SetTest.java
public static void main(String[] args) { Set<String> words = new HashSet<String>(); // HashSet implements Set long totalTime = 0; Scanner in = new Scanner(System.in); while (in.hasNext()) { String word = in.next();//from ww w. j a va2s . c om long callTime = System.currentTimeMillis(); words.add(word); callTime = System.currentTimeMillis() - callTime; totalTime += callTime; } Iterator<String> iter = words.iterator(); for (int i = 1; i <= 20 && iter.hasNext(); i++) System.out.println(iter.next()); System.out.println(". . ."); System.out.println(words.size() + " distinct words. " + totalTime + " milliseconds."); }
From source file:edu.umass.cs.gigapaxos.PaxosPacketBatcher.java
/** * @param args/*from www . j a v a2s . c o m*/ */ public static void main(String[] args) { Util.assertAssertionsEnabled(); Ballot b1 = new Ballot(23, 456); Ballot b2 = new Ballot(23, 456); assert (b1.equals(b2)); Set<Ballot> bset = new HashSet<Ballot>(); bset.add(b1); assert (bset.contains(b1)); assert (bset.contains(b2)); bset.add(b2); assert (bset.size() == 1) : bset.size(); }
From source file:com.termmed.sampling.ConceptsWithMoreThanThreeRoleGroups.java
/** * The main method./*from w w w. j a v a2 s. c o m*/ * * @param args the arguments * @throws Exception the exception */ public static void main(String[] args) throws Exception { System.out.println("Starting..."); Map<String, Set<String>> groupsMap = new HashMap<String, Set<String>>(); File relsFile = new File( "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20160131-1/Snapshot/Terminology/sct2_Relationship_Snapshot_INT_20160131.txt"); BufferedReader br2 = new BufferedReader(new FileReader(relsFile)); String line2; int count2 = 0; while ((line2 = br2.readLine()) != null) { // process the line. count2++; if (count2 % 10000 == 0) { //System.out.println(count2); } List<String> columns = Arrays.asList(line2.split("\t", -1)); if (columns.size() >= 6) { if (columns.get(2).equals("1") && !columns.get(6).equals("0")) { if (!groupsMap.containsKey(columns.get(4))) { groupsMap.put(columns.get(4), new HashSet<String>()); } groupsMap.get(columns.get(4)).add(columns.get(6)); } } } System.out.println("Relationship groups loaded"); Gson gson = new Gson(); System.out.println("Reading JSON 1"); File crossoverFile1 = new File("/Users/alo/Downloads/crossover_role_to_group.json"); String contents = FileUtils.readFileToString(crossoverFile1, "utf-8"); Type collectionType = new TypeToken<Collection<ControlResultLine>>() { }.getType(); List<ControlResultLine> lineObject = gson.fromJson(contents, collectionType); Set<String> crossovers1 = new HashSet<String>(); for (ControlResultLine loopResult : lineObject) { crossovers1.add(loopResult.conceptId); } System.out.println("Crossovers 1 loaded, " + lineObject.size() + " Objects"); System.out.println("Reading JSON 2"); File crossoverFile2 = new File("/Users/alo/Downloads/crossover_group_to_group.json"); String contents2 = FileUtils.readFileToString(crossoverFile2, "utf-8"); List<ControlResultLine> lineObject2 = gson.fromJson(contents2, collectionType); Set<String> crossovers2 = new HashSet<String>(); for (ControlResultLine loopResult : lineObject2) { crossovers2.add(loopResult.conceptId); } System.out.println("Crossovers 2 loaded, " + lineObject2.size() + " Objects"); Set<String> foundConcepts = new HashSet<String>(); int count3 = 0; BufferedWriter writer = new BufferedWriter( new FileWriter(new File("ConceptsWithMoreThanThreeRoleGroups.csv"))); ; for (String loopConcept : groupsMap.keySet()) { if (groupsMap.get(loopConcept).size() > 3) { writer.write(loopConcept); writer.newLine(); foundConcepts.add(loopConcept); count3++; } } writer.close(); System.out.println("Found " + foundConcepts.size() + " concepts"); int countCrossover1 = 0; for (String loopConcept : foundConcepts) { if (crossovers1.contains(loopConcept)) { countCrossover1++; } } System.out.println(countCrossover1 + " are present in crossover_role_to_group"); int countCrossover2 = 0; for (String loopConcept : foundConcepts) { if (crossovers2.contains(loopConcept)) { countCrossover2++; } } System.out.println(countCrossover2 + " are present in crossover_group_to_group"); System.out.println("Done"); }
From source file:it.units.malelab.ege.MappingPropertiesExperimenter.java
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException { final int n = 10000; final int nDist = 10000; //prepare problems and methods List<String> problems = Lists.newArrayList("bool-parity5", "bool-mopm3", "sr-keijzer6", "sr-nguyen7", "sr-pagie1", "sr-vladislavleva4", "other-klandscapes3", "other-klandscapes7", "other-text"); List<String> mappers = new ArrayList<>(); for (int gs : new int[] { 64, 128, 256, 512, 1024 }) { mappers.add("ge-" + gs + "-2"); mappers.add("ge-" + gs + "-4"); mappers.add("ge-" + gs + "-8"); mappers.add("ge-" + gs + "-12"); mappers.add("pige-" + gs + "-4"); mappers.add("pige-" + gs + "-8"); mappers.add("pige-" + gs + "-16"); mappers.add("pige-" + gs + "-24"); mappers.add("hge-" + gs + "-0"); mappers.add("whge-" + gs + "-2"); mappers.add("whge-" + gs + "-3"); mappers.add("whge-" + gs + "-5"); }// w ww . j a va2 s . c om mappers.add("sge-0-5"); mappers.add("sge-0-6"); mappers.add("sge-0-7"); mappers.add("sge-0-8"); mappers.clear(); mappers.addAll(Lists.newArrayList("ge-1024-8", "pige-1024-16", "hge-1024-0", "whge-1024-3", "sge-0-6")); PrintStream filePrintStream = null; if (args.length > 0) { filePrintStream = new PrintStream(args[0]); } else { filePrintStream = System.out; } filePrintStream.printf("problem;mapper;genotypeSize;param;property;value%n"); //prepare distances Distance<Node<String>> phenotypeDistance = new CachedDistance<>(new LeavesEdit<String>()); Distance<Sequence> genotypeDistance = new CachedDistance<>(new Hamming()); //iterate for (String problemName : problems) { for (String mapperName : mappers) { System.out.printf("%20.20s, %20.20s", problemName, mapperName); //build problem Problem<String, NumericFitness> problem = null; if (problemName.equals("bool-parity5")) { problem = new Parity(5); } else if (problemName.equals("bool-mopm3")) { problem = new MultipleOutputParallelMultiplier(3); } else if (problemName.equals("sr-keijzer6")) { problem = new HarmonicCurve(); } else if (problemName.equals("sr-nguyen7")) { problem = new Nguyen7(1); } else if (problemName.equals("sr-pagie1")) { problem = new Pagie1(); } else if (problemName.equals("sr-vladislavleva4")) { problem = new Vladislavleva4(1); } else if (problemName.equals("other-klandscapes3")) { problem = new KLandscapes(3); } else if (problemName.equals("other-klandscapes7")) { problem = new KLandscapes(7); } else if (problemName.equals("other-text")) { problem = new Text(); } //build configuration and evolver Mapper mapper = null; int genotypeSize = Integer.parseInt(mapperName.split("-")[1]); int mapperMainParam = Integer.parseInt(mapperName.split("-")[2]); if (mapperName.split("-")[0].equals("ge")) { mapper = new StandardGEMapper<>(mapperMainParam, 1, problem.getGrammar()); } else if (mapperName.split("-")[0].equals("pige")) { mapper = new PiGEMapper<>(mapperMainParam, 1, problem.getGrammar()); } else if (mapperName.split("-")[0].equals("sge")) { mapper = new SGEMapper<>(mapperMainParam, problem.getGrammar()); } else if (mapperName.split("-")[0].equals("hge")) { mapper = new HierarchicalMapper<>(problem.getGrammar()); } else if (mapperName.split("-")[0].equals("whge")) { mapper = new WeightedHierarchicalMapper<>(mapperMainParam, false, true, problem.getGrammar()); } //prepare things Random random = new Random(1); Set<Sequence> genotypes = new LinkedHashSet<>(n); //build genotypes if (mapperName.split("-")[0].equals("sge")) { SGEGenotypeFactory<String> factory = new SGEGenotypeFactory<>((SGEMapper) mapper); while (genotypes.size() < n) { genotypes.add(factory.build(random)); } genotypeSize = factory.getBitSize(); } else { BitsGenotypeFactory factory = new BitsGenotypeFactory(genotypeSize); while (genotypes.size() < n) { genotypes.add(factory.build(random)); } } //build and fill map Multimap<Node<String>, Sequence> multimap = HashMultimap.create(); int progress = 0; for (Sequence genotype : genotypes) { Node<String> phenotype; try { if (mapperName.split("-")[0].equals("sge")) { phenotype = mapper.map((SGEGenotype<String>) genotype, new HashMap<>()); } else { phenotype = mapper.map((BitsGenotype) genotype, new HashMap<>()); } } catch (MappingException e) { phenotype = Node.EMPTY_TREE; } multimap.put(phenotype, genotype); progress = progress + 1; if (progress % Math.round(n / 10) == 0) { System.out.print("."); } } System.out.println(); //compute distances List<Pair<Double, Double>> allDistances = new ArrayList<>(); List<Pair<Double, Double>> allValidDistances = new ArrayList<>(); Multimap<Node<String>, Double> genotypeDistances = ArrayListMultimap.create(); for (Node<String> phenotype : multimap.keySet()) { for (Sequence genotype1 : multimap.get(phenotype)) { for (Sequence genotype2 : multimap.get(phenotype)) { double gDistance = genotypeDistance.d(genotype1, genotype2); genotypeDistances.put(phenotype, gDistance); if (genotypeDistances.get(phenotype).size() > nDist) { break; } } if (genotypeDistances.get(phenotype).size() > nDist) { break; } } } List<Map.Entry<Node<String>, Sequence>> entries = new ArrayList<>(multimap.entries()); Collections.shuffle(entries, random); for (Map.Entry<Node<String>, Sequence> entry1 : entries) { for (Map.Entry<Node<String>, Sequence> entry2 : entries) { double gDistance = genotypeDistance.d(entry1.getValue(), entry2.getValue()); double pDistance = phenotypeDistance.d(entry1.getKey(), entry2.getKey()); allDistances.add(new Pair<>(gDistance, pDistance)); if (!Node.EMPTY_TREE.equals(entry1.getKey()) && !Node.EMPTY_TREE.equals(entry2.getKey())) { allValidDistances.add(new Pair<>(gDistance, pDistance)); } if (allDistances.size() > nDist) { break; } } if (allDistances.size() > nDist) { break; } } //compute properties double invalidity = (double) multimap.get(Node.EMPTY_TREE).size() / (double) genotypes.size(); double redundancy = 1 - (double) multimap.keySet().size() / (double) genotypes.size(); double validRedundancy = redundancy; if (multimap.keySet().contains(Node.EMPTY_TREE)) { validRedundancy = 1 - ((double) multimap.keySet().size() - 1d) / (double) (genotypes.size() - multimap.get(Node.EMPTY_TREE).size()); } double locality = Utils.pearsonCorrelation(allDistances); double validLocality = Utils.pearsonCorrelation(allValidDistances); double[] sizes = new double[multimap.keySet().size()]; double[] meanGenotypeDistances = new double[multimap.keySet().size()]; int invalidIndex = -1; int c = 0; for (Node<String> phenotype : multimap.keySet()) { if (Node.EMPTY_TREE.equals(phenotype)) { invalidIndex = c; } sizes[c] = multimap.get(phenotype).size(); double[] distances = new double[genotypeDistances.get(phenotype).size()]; int k = 0; for (Double distance : genotypeDistances.get(phenotype)) { distances[k] = distance; k = k + 1; } meanGenotypeDistances[c] = StatUtils.mean(distances); c = c + 1; } double nonUniformity = Math.sqrt(StatUtils.variance(sizes)) / StatUtils.mean(sizes); double nonSynonymousity = StatUtils.mean(meanGenotypeDistances) / StatUtils.mean(firsts(allDistances)); double validNonUniformity = nonUniformity; double validNonSynonymousity = nonSynonymousity; if (invalidIndex != -1) { double[] validSizes = new double[multimap.keySet().size() - 1]; double[] validMeanGenotypeDistances = new double[multimap.keySet().size() - 1]; if (invalidIndex > 0) { System.arraycopy(sizes, 0, validSizes, 0, invalidIndex); System.arraycopy(meanGenotypeDistances, 0, validMeanGenotypeDistances, 0, invalidIndex); } System.arraycopy(sizes, invalidIndex + 1, validSizes, invalidIndex, sizes.length - invalidIndex - 1); System.arraycopy(meanGenotypeDistances, invalidIndex + 1, validMeanGenotypeDistances, invalidIndex, meanGenotypeDistances.length - invalidIndex - 1); validNonUniformity = Math.sqrt(StatUtils.variance(validSizes)) / StatUtils.mean(validSizes); validNonSynonymousity = StatUtils.mean(validMeanGenotypeDistances) / StatUtils.mean(firsts(allValidDistances)); } //compute locality filePrintStream.printf("%s;%s;%d;%d;invalidity;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, invalidity); filePrintStream.printf("%s;%s;%d;%d;redundancy;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, redundancy); filePrintStream.printf("%s;%s;%d;%d;validRedundancy;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, validRedundancy); filePrintStream.printf("%s;%s;%d;%d;locality;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, locality); filePrintStream.printf("%s;%s;%d;%d;validLLocality;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, validLocality); filePrintStream.printf("%s;%s;%d;%d;nonUniformity;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, nonUniformity); filePrintStream.printf("%s;%s;%d;%d;validNonUniformity;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonUniformity); filePrintStream.printf("%s;%s;%d;%d;nonSynonymousity;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, nonSynonymousity); filePrintStream.printf("%s;%s;%d;%d;validNonSynonymousity;%f %n", problemName, mapperName.split("-")[0], genotypeSize, mapperMainParam, validNonSynonymousity); } } if (filePrintStream != null) { filePrintStream.close(); } }
From source file:dkpro.similarity.algorithms.vsm.store.convert.ConvertLuceneToVectorIndex.java
public static void main(String[] args) throws Exception { File inputPath = new File(args[0]); File outputPath = new File(args[1]); deleteQuietly(outputPath);/*from w w w . j a va2 s . c o m*/ outputPath.mkdirs(); boolean ignoreNumerics = true; boolean ignoreCardinal = true; boolean ignoreMonetary = true; int minTermLength = 3; int minDocFreq = 5; System.out.println("Quality criteria"); System.out.println("Minimum term length : " + minTermLength); System.out.println("Minimum document frequency : " + minDocFreq); System.out.println("Ignore numeric tokens : " + ignoreNumerics); System.out.println("Ignore cardinal numeric tokens : " + ignoreNumerics); System.out.println("Ignore money values : " + ignoreMonetary); System.out.print("Fetching terms list... "); IndexReader reader = IndexReader.open(FSDirectory.open(inputPath)); TermEnum termEnum = reader.terms(); Set<String> terms = new HashSet<String>(); int ignoredTerms = 0; while (termEnum.next()) { String term = termEnum.term().text(); if (((minTermLength > 0) && (term.length() < minTermLength)) || (ignoreCardinal && isCardinal(term)) || (ignoreMonetary && isMonetary(term)) || (ignoreNumerics && isNumericSpace(term)) || ((minDocFreq > 0) && (termEnum.docFreq() < minDocFreq))) { ignoredTerms++; continue; } terms.add(term); } reader.close(); System.out.println(terms.size() + " terms found. " + ignoredTerms + " terms ignored."); System.out.println("Opening source ESA index " + inputPath); VectorReader source = new LuceneVectorReader(inputPath); System.out.println("Opening destination ESA index " + inputPath); VectorIndexWriter esaWriter = new VectorIndexWriter(outputPath, source.getConceptCount()); ProgressMeter p = new ProgressMeter(terms.size()); for (String term : terms) { Vector vector = source.getVector(term); esaWriter.put(term, vector); p.next(); System.out.println("[" + term + "] " + p); } esaWriter.close(); }