List of usage examples for java.util Set contains
boolean contains(Object o);
From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException, InstantiationException, IllegalAccessException, ClassNotFoundException, Exception { String directory;// w w w . j a va 2 s. c om String gold_standard_lexicon; String output_lexicon; String configFile; Language language; String output; Stopwords stopwords = new Stopwords(); HashMap<String, Double> maxima; maxima = new HashMap<String, Double>(); if (args.length < 3) { System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n"); return; } // Classifier classifier; directory = args[1]; configFile = args[2]; final Config config = new Config(); config.loadFromFile(configFile); gold_standard_lexicon = config.getGoldStandardLexicon(); String model_file = config.getModel(); output_lexicon = config.getOutputLexicon(); output = config.getOutput(); language = config.getLanguage(); LexiconLoader loader = new LexiconLoader(); Lexicon gold = loader.loadFromFile(gold_standard_lexicon); Set<String> uris = new HashSet<>(); // Map<Integer,String> sentence_list = new HashMap<>(); Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>(); //consider only properties for (LexicalEntry entry : gold.getEntries()) { try { for (Sense sense : entry.getSenseBehaviours().keySet()) { String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", ""); if (!Character.isUpperCase(tmp_uri.charAt(0))) { uris.add(sense.getReference().getURI()); } } } catch (Exception e) { } ; } ModelPreprocessor preprocessor = new ModelPreprocessor(language); preprocessor.setCoreferenceResolution(false); Set<String> dep = new HashSet<>(); dep.add("prep"); dep.add("appos"); dep.add("nn"); dep.add("dobj"); dep.add("pobj"); dep.add("num"); preprocessor.setDEP(dep); List<File> list_files = new ArrayList<>(); if (config.getFiles().isEmpty()) { File folder = new File(directory); File[] files = folder.listFiles(); for (File file : files) { if (file.toString().contains(".ttl")) list_files.add(file); } } else { list_files.addAll(config.getFiles()); } System.out.println(list_files.size()); int sentence_counter = 0; Map<String, Set<Integer>> bag_words_uri = new HashMap<>(); Map<String, Integer> mapping_word_id = new HashMap<>(); for (File file : list_files) { Model model = RDFDataMgr.loadModel(file.toString()); for (Model sentence : getSentences(model)) { String reference = getReference(sentence); reference = reference.replace("http://dbpedia/", "http://dbpedia.org/"); if (uris.contains(reference)) { sentence_counter += 1; Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id); //TODO: add sentence preprocessing String obj = getObject(sentence); String subj = getSubject(sentence); preprocessor.preprocess(sentence, subj, obj, language); //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course) String parsed_sentence = getParsedSentence(sentence); try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true); BufferedWriter bw = new BufferedWriter(fw); PrintWriter out = new PrintWriter(bw)) { out.println(sentence_counter + "\t" + parsed_sentence); } catch (IOException e) { e.printStackTrace(); } for (Integer word_id : words_ids) { if (mapping_words_sentences.containsKey(word_id)) { Set<Integer> tmp_set = mapping_words_sentences.get(word_id); tmp_set.add(sentence_counter); mapping_words_sentences.put(word_id, tmp_set); } else { Set<Integer> tmp_set = new HashSet<>(); tmp_set.add(sentence_counter); mapping_words_sentences.put(word_id, tmp_set); } } if (bag_words_uri.containsKey(reference)) { Set<Integer> tmp = bag_words_uri.get(reference); for (Integer w : words_ids) { tmp.add(w); } bag_words_uri.put(reference, tmp); } else { Set<Integer> tmp = new HashSet<>(); for (Integer w : words_ids) { tmp.add(w); } bag_words_uri.put(reference, tmp); } } } model.close(); } PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv"); StringBuilder string_builder = new StringBuilder(); for (String r : bag_words_uri.keySet()) { string_builder.append(r); for (Integer i : bag_words_uri.get(r)) { string_builder.append("\t"); string_builder.append(i); } string_builder.append("\n"); } writer.write(string_builder.toString()); writer.close(); writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv"); string_builder = new StringBuilder(); for (Integer w : mapping_words_sentences.keySet()) { string_builder.append(w); for (int i : mapping_words_sentences.get(w)) { string_builder.append("\t"); string_builder.append(i); } string_builder.append("\n"); } writer.write(string_builder.toString()); writer.close(); }
From source file:com.act.analysis.surfactant.AnalysisDriver.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/*www .java2 s .c o m*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } Set<String> seenOutputIds = new HashSet<>(); TSVWriter<String, String> tsvWriter = null; if (cl.hasOption(OPTION_OUTPUT_FILE)) { File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_FILE)); List<Map<String, String>> oldResults = null; if (outputFile.exists()) { System.err.format( "Output file already exists, reading old results and skipping processed molecules.\n"); TSVParser outputParser = new TSVParser(); outputParser.parse(outputFile); oldResults = outputParser.getResults(); for (Map<String, String> row : oldResults) { // TODO: verify that the last row was written cleanly/completely. seenOutputIds.add(row.get("id")); } } List<String> header = new ArrayList<>(); header.add("name"); header.add("id"); header.add("inchi"); header.add("label"); for (SurfactantAnalysis.FEATURES f : SurfactantAnalysis.FEATURES.values()) { header.add(f.toString()); } // TODO: make this API more auto-closable friendly. tsvWriter = new TSVWriter<>(header); tsvWriter.open(outputFile); if (oldResults != null) { System.out.format("Re-writing %d existing result rows\n", oldResults.size()); tsvWriter.append(oldResults); } } try { Map<SurfactantAnalysis.FEATURES, Double> analysisFeatures; LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE)); if (cl.hasOption(OPTION_INCHI)) { analysisFeatures = SurfactantAnalysis.performAnalysis(cl.getOptionValue(OPTION_INCHI), cl.hasOption(OPTION_DISPLAY)); Map<String, String> tsvFeatures = new HashMap<>(); // Convert features to strings to avoid some weird formatting issues. It's ugly, but it works. for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) { tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue())); } tsvFeatures.put("name", "direct-inchi-input"); if (tsvWriter != null) { tsvWriter.append(tsvFeatures); } } else if (cl.hasOption(OPTION_INPUT_FILE)) { TSVParser parser = new TSVParser(); parser.parse(new File(cl.getOptionValue(OPTION_INPUT_FILE))); int i = 0; List<Map<String, String>> inputRows = parser.getResults(); for (Map<String, String> row : inputRows) { i++; // Just for warning messages. if (!row.containsKey("name") || !row.containsKey("id") || !row.containsKey("inchi")) { System.err.format( "WARNING: TSV rows must contain at least name, id, and inchi, skipping row %d\n", i); continue; } if (seenOutputIds.contains(row.get("id"))) { System.out.format("Skipping input row with id already in output: %s\n", row.get("id")); continue; } System.out.format("Analysis for chemical %s\n", row.get("name")); try { analysisFeatures = SurfactantAnalysis.performAnalysis(row.get("inchi"), false); } catch (Exception e) { // Ignore exceptions for now. Sometimes the regression analysis or Chemaxon processing chokes unexpectedly. System.err.format("ERROR caught exception while processing '%s':\n", row.get("name")); System.err.format("%s\n", e.getMessage()); e.printStackTrace(System.err); System.err.println("Skipping..."); continue; } System.out.format("--- Done analysis for chemical %s\n", row.get("name")); // This is a duplicate of the OPTION_INCHI block code, but it's inside of a tight loop, so... Map<String, String> tsvFeatures = new HashMap<>(); for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) { tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue())); } tsvFeatures.put("name", row.get("name")); tsvFeatures.put("id", row.get("id")); tsvFeatures.put("inchi", row.get("inchi")); tsvFeatures.put("label", row.containsKey("label") ? row.get("label") : "?"); if (tsvWriter != null) { tsvWriter.append(tsvFeatures); // Flush every time in case we crash or get interrupted. The features must flow! tsvWriter.flush(); } } } else { throw new RuntimeException("Must specify inchi or input file"); } } finally { if (tsvWriter != null) { tsvWriter.close(); } } }
From source file:com.github.ansell.shp.SHPDump.java
public static void main(String... args) throws Exception { final OptionParser parser = new OptionParser(); final OptionSpec<Void> help = parser.accepts("help").forHelp(); final OptionSpec<File> input = parser.accepts("input").withRequiredArg().ofType(File.class).required() .describedAs("The input SHP file"); final OptionSpec<File> output = parser.accepts("output").withRequiredArg().ofType(File.class).required() .describedAs("The output directory to use for debugging files"); final OptionSpec<String> outputPrefix = parser.accepts("prefix").withRequiredArg().ofType(String.class) .defaultsTo("shp-debug").describedAs("The output prefix to use for debugging files"); final OptionSpec<File> outputMappingTemplate = parser.accepts("output-mapping").withRequiredArg() .ofType(File.class).describedAs("The output mapping template file if it needs to be generated."); final OptionSpec<Integer> resolution = parser.accepts("resolution").withRequiredArg().ofType(Integer.class) .defaultsTo(2048).describedAs("The output image file resolution"); final OptionSpec<String> format = parser.accepts("format").withRequiredArg().ofType(String.class) .defaultsTo("png").describedAs("The output image format"); final OptionSpec<String> removeIfEmpty = parser.accepts("remove-if-empty").withRequiredArg() .ofType(String.class).describedAs( "The name of an attribute to remove if its value is empty before outputting the resulting shapefile. Use multiple times to specify multiple fields to check"); OptionSet options = null;/*from w w w . j av a 2 s . com*/ try { options = parser.parse(args); } catch (final OptionException e) { System.out.println(e.getMessage()); parser.printHelpOn(System.out); throw e; } if (options.has(help)) { parser.printHelpOn(System.out); return; } final Path inputPath = input.value(options).toPath(); if (!Files.exists(inputPath)) { throw new FileNotFoundException("Could not find input SHP file: " + inputPath.toString()); } final Path outputPath = output.value(options).toPath(); if (!Files.exists(outputPath)) { throw new FileNotFoundException("Output directory does not exist: " + outputPath.toString()); } final Path outputMappingPath = options.has(outputMappingTemplate) ? outputMappingTemplate.value(options).toPath() : null; if (options.has(outputMappingTemplate) && Files.exists(outputMappingPath)) { throw new FileNotFoundException( "Output mapping template file already exists: " + outputMappingPath.toString()); } final Set<String> filterFields = ConcurrentHashMap.newKeySet(); if (options.has(removeIfEmpty)) { for (String nextFilterField : removeIfEmpty.values(options)) { System.out.println("Will filter field if empty value found: " + nextFilterField); filterFields.add(nextFilterField); } } if (!filterFields.isEmpty()) { System.out.println("Full set of filter fields: " + filterFields); } final String prefix = outputPrefix.value(options); FileDataStore store = FileDataStoreFinder.getDataStore(inputPath.toFile()); if (store == null) { throw new RuntimeException("Could not read the given input as an ESRI Shapefile: " + inputPath.toAbsolutePath().toString()); } for (String typeName : new LinkedHashSet<>(Arrays.asList(store.getTypeNames()))) { System.out.println(""); System.out.println("Type: " + typeName); SimpleFeatureSource featureSource = store.getFeatureSource(typeName); SimpleFeatureType schema = featureSource.getSchema(); Name outputSchemaName = new NameImpl(schema.getName().getNamespaceURI(), schema.getName().getLocalPart().replace(" ", "").replace("%20", "")); System.out.println("Replacing name on schema: " + schema.getName() + " with " + outputSchemaName); SimpleFeatureType outputSchema = SHPUtils.changeSchemaName(schema, outputSchemaName); List<String> attributeList = new ArrayList<>(); for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) { System.out.println("Attribute: " + attribute.getName().toString()); attributeList.add(attribute.getName().toString()); } CsvSchema csvSchema = CSVUtil.buildSchema(attributeList); SimpleFeatureCollection collection = featureSource.getFeatures(); int featureCount = 0; Path nextCSVFile = outputPath.resolve(prefix + ".csv"); Path nextSummaryCSVFile = outputPath .resolve(prefix + "-" + outputSchema.getTypeName() + "-Summary.csv"); List<SimpleFeature> outputFeatureList = new CopyOnWriteArrayList<>(); try (SimpleFeatureIterator iterator = collection.features(); Writer bufferedWriter = Files.newBufferedWriter(nextCSVFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW); SequenceWriter csv = CSVUtil.newCSVWriter(bufferedWriter, csvSchema);) { List<String> nextLine = new ArrayList<>(); while (iterator.hasNext()) { SimpleFeature feature = iterator.next(); featureCount++; if (featureCount <= 2) { System.out.println(""); System.out.println(feature.getIdentifier()); } else if (featureCount % 100 == 0) { System.out.print("."); } boolean filterThisFeature = false; for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) { String featureString = Optional.ofNullable(feature.getAttribute(attribute.getName())) .orElse("").toString(); nextLine.add(featureString); if (filterFields.contains(attribute.getName().toString()) && featureString.trim().isEmpty()) { filterThisFeature = true; } if (featureString.length() > 100) { featureString = featureString.substring(0, 100) + "..."; } if (featureCount <= 2) { System.out.print(attribute.getName() + "="); System.out.println(featureString); } } if (!filterThisFeature) { outputFeatureList.add(SHPUtils.changeSchemaName(feature, outputSchema)); csv.write(nextLine); } nextLine.clear(); } } try (Reader csvReader = Files.newBufferedReader(nextCSVFile, StandardCharsets.UTF_8); Writer summaryOutput = Files.newBufferedWriter(nextSummaryCSVFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW); final Writer mappingWriter = options.has(outputMappingTemplate) ? Files.newBufferedWriter(outputMappingPath) : NullWriter.NULL_WRITER) { CSVSummariser.runSummarise(csvReader, summaryOutput, mappingWriter, CSVSummariser.DEFAULT_SAMPLE_COUNT, false); } if (featureCount > 100) { System.out.println(""); } System.out.println(""); System.out.println("Feature count: " + featureCount); SimpleFeatureCollection outputCollection = new ListFeatureCollection(outputSchema, outputFeatureList); Path outputShapefilePath = outputPath.resolve(prefix + "-" + outputSchema.getTypeName() + "-dump"); if (!Files.exists(outputShapefilePath)) { Files.createDirectory(outputShapefilePath); } SHPUtils.writeShapefile(outputCollection, outputShapefilePath); // Create ZIP file from the contents to keep the subfiles together Path outputShapefileZipPath = outputPath .resolve(prefix + "-" + outputSchema.getTypeName() + "-dump.zip"); try (final OutputStream out = Files.newOutputStream(outputShapefileZipPath, StandardOpenOption.CREATE_NEW); final ZipOutputStream zip = new ZipOutputStream(out, StandardCharsets.UTF_8);) { Files.list(outputShapefilePath).forEachOrdered(Unchecked.consumer(e -> { zip.putNextEntry(new ZipEntry(e.getFileName().toString())); Files.copy(e, zip); zip.closeEntry(); })); } try (final OutputStream outputStream = Files.newOutputStream( outputPath.resolve(prefix + "." + format.value(options)), StandardOpenOption.CREATE_NEW);) { MapContent map = new MapContent(); map.setTitle(prefix + "-" + outputSchema.getTypeName()); Style style = SLD.createSimpleStyle(featureSource.getSchema()); Layer layer = new FeatureLayer(new CollectionFeatureSource(outputCollection), style); map.addLayer(layer); SHPUtils.renderImage(map, outputStream, resolution.value(options), format.value(options)); } } }
From source file:Main.java
protected static boolean isAnyPolicy(Set policySet) { return policySet == null || policySet.contains(ANY_POLICY) || policySet.isEmpty(); }
From source file:Main.java
private static void appendTypeName(StringBuffer sb, Set typeNamesAppended, String name) { if (!typeNamesAppended.contains(name)) { if (sb.length() != 0) sb.append("+"); sb.append(name);// www. ja va2 s . c om typeNamesAppended.add(name); } }
From source file:Main.java
public static <T> Collection<T> union(Collection<T> a, Collection<T> b) { Set<T> set = new HashSet<T>(a); for (T obj : b) { if (!set.contains(obj)) { set.add(obj);/*from w w w.j a v a 2s. c o m*/ } } return set; }
From source file:Main.java
public static <A> Set<A> setIntersection(Set<A> a, Set<A> b) { Set<A> set = new HashSet<A>(); for (A x : a) if (b.contains(x)) set.add(x);/* w ww.jav a 2s . c o m*/ return set; }
From source file:Main.java
/** * Returns whether speech synthesis using a specific language on the * specified TTS requires a network connection. * * @param tts The TTS engine to check./*www .ja v a 2 s. c o m*/ * @param language The language to check. * @return {@code true} if the language requires a network connection. */ public static boolean isNetworkRequiredForSynthesis(TextToSpeech tts, Locale language) { final Set<String> features = tts.getFeatures(language); return features.contains(Engine.KEY_FEATURE_NETWORK_SYNTHESIS) && !features.contains(Engine.KEY_FEATURE_EMBEDDED_SYNTHESIS); }
From source file:Main.java
@SuppressWarnings({ "rawtypes", "unchecked" }) public static boolean containsOnly(Collection collection, Object object) { if (collection == null) return false; Set duplicatesRemoved = new HashSet(collection); return duplicatesRemoved.size() == 1 && duplicatesRemoved.contains(object); }
From source file:com.gistlabs.mechanize.document.html.JsoupDataUtil.java
private static void filterElementsByTag(List<Element> results, Element element, Set<String> tagSet) { if (tagSet.contains(element.tag().getName().toLowerCase())) results.add(element);/*from www . ja va 2 s.c om*/ for (Element child : element.children()) filterElementsByTag(results, child, tagSet); }