Example usage for java.util Set contains

List of usage examples for java.util Set contains

Introduction

In this page you can find the example usage for java.util Set contains.

Prototype

boolean contains(Object o);

Source Link

Document

Returns true if this set contains the specified element.

Usage

From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java

public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException,
        InstantiationException, IllegalAccessException, ClassNotFoundException, Exception {

    String directory;// w  w  w  . j  a va 2 s.  c  om
    String gold_standard_lexicon;
    String output_lexicon;
    String configFile;
    Language language;
    String output;

    Stopwords stopwords = new Stopwords();

    HashMap<String, Double> maxima;
    maxima = new HashMap<String, Double>();

    if (args.length < 3) {
        System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n");
        return;

    }

    //      Classifier classifier;

    directory = args[1];
    configFile = args[2];

    final Config config = new Config();

    config.loadFromFile(configFile);

    gold_standard_lexicon = config.getGoldStandardLexicon();

    String model_file = config.getModel();

    output_lexicon = config.getOutputLexicon();
    output = config.getOutput();

    language = config.getLanguage();

    LexiconLoader loader = new LexiconLoader();
    Lexicon gold = loader.loadFromFile(gold_standard_lexicon);

    Set<String> uris = new HashSet<>();
    //        Map<Integer,String> sentence_list = new HashMap<>();
    Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>();

    //consider only properties
    for (LexicalEntry entry : gold.getEntries()) {
        try {
            for (Sense sense : entry.getSenseBehaviours().keySet()) {
                String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", "");
                if (!Character.isUpperCase(tmp_uri.charAt(0))) {
                    uris.add(sense.getReference().getURI());
                }
            }
        } catch (Exception e) {
        }
        ;
    }

    ModelPreprocessor preprocessor = new ModelPreprocessor(language);
    preprocessor.setCoreferenceResolution(false);
    Set<String> dep = new HashSet<>();
    dep.add("prep");
    dep.add("appos");
    dep.add("nn");
    dep.add("dobj");
    dep.add("pobj");
    dep.add("num");
    preprocessor.setDEP(dep);

    List<File> list_files = new ArrayList<>();

    if (config.getFiles().isEmpty()) {
        File folder = new File(directory);
        File[] files = folder.listFiles();
        for (File file : files) {
            if (file.toString().contains(".ttl"))
                list_files.add(file);
        }
    } else {
        list_files.addAll(config.getFiles());
    }
    System.out.println(list_files.size());

    int sentence_counter = 0;
    Map<String, Set<Integer>> bag_words_uri = new HashMap<>();
    Map<String, Integer> mapping_word_id = new HashMap<>();
    for (File file : list_files) {
        Model model = RDFDataMgr.loadModel(file.toString());
        for (Model sentence : getSentences(model)) {
            String reference = getReference(sentence);
            reference = reference.replace("http://dbpedia/", "http://dbpedia.org/");
            if (uris.contains(reference)) {
                sentence_counter += 1;
                Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id);
                //TODO: add sentence preprocessing
                String obj = getObject(sentence);
                String subj = getSubject(sentence);
                preprocessor.preprocess(sentence, subj, obj, language);
                //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course)
                String parsed_sentence = getParsedSentence(sentence);
                try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        PrintWriter out = new PrintWriter(bw)) {
                    out.println(sentence_counter + "\t" + parsed_sentence);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                for (Integer word_id : words_ids) {
                    if (mapping_words_sentences.containsKey(word_id)) {
                        Set<Integer> tmp_set = mapping_words_sentences.get(word_id);
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);

                    } else {
                        Set<Integer> tmp_set = new HashSet<>();
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);
                    }

                }
                if (bag_words_uri.containsKey(reference)) {
                    Set<Integer> tmp = bag_words_uri.get(reference);
                    for (Integer w : words_ids) {
                        tmp.add(w);

                    }
                    bag_words_uri.put(reference, tmp);
                } else {
                    Set<Integer> tmp = new HashSet<>();
                    for (Integer w : words_ids) {
                        tmp.add(w);
                    }
                    bag_words_uri.put(reference, tmp);
                }
            }

        }
        model.close();

    }

    PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv");
    StringBuilder string_builder = new StringBuilder();
    for (String r : bag_words_uri.keySet()) {
        string_builder.append(r);
        for (Integer i : bag_words_uri.get(r)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

    writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv");
    string_builder = new StringBuilder();
    for (Integer w : mapping_words_sentences.keySet()) {
        string_builder.append(w);
        for (int i : mapping_words_sentences.get(w)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

}

From source file:com.act.analysis.surfactant.AnalysisDriver.java

public static void main(String[] args) throws Exception {
    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/*www .java2 s .c  o m*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s\n", e.getMessage());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        return;
    }

    Set<String> seenOutputIds = new HashSet<>();

    TSVWriter<String, String> tsvWriter = null;
    if (cl.hasOption(OPTION_OUTPUT_FILE)) {
        File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_FILE));
        List<Map<String, String>> oldResults = null;
        if (outputFile.exists()) {
            System.err.format(
                    "Output file already exists, reading old results and skipping processed molecules.\n");
            TSVParser outputParser = new TSVParser();
            outputParser.parse(outputFile);
            oldResults = outputParser.getResults();
            for (Map<String, String> row : oldResults) {
                // TODO: verify that the last row was written cleanly/completely.
                seenOutputIds.add(row.get("id"));
            }
        }

        List<String> header = new ArrayList<>();
        header.add("name");
        header.add("id");
        header.add("inchi");
        header.add("label");
        for (SurfactantAnalysis.FEATURES f : SurfactantAnalysis.FEATURES.values()) {
            header.add(f.toString());
        }
        // TODO: make this API more auto-closable friendly.
        tsvWriter = new TSVWriter<>(header);
        tsvWriter.open(outputFile);
        if (oldResults != null) {
            System.out.format("Re-writing %d existing result rows\n", oldResults.size());
            tsvWriter.append(oldResults);
        }
    }

    try {
        Map<SurfactantAnalysis.FEATURES, Double> analysisFeatures;

        LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE));
        if (cl.hasOption(OPTION_INCHI)) {
            analysisFeatures = SurfactantAnalysis.performAnalysis(cl.getOptionValue(OPTION_INCHI),
                    cl.hasOption(OPTION_DISPLAY));
            Map<String, String> tsvFeatures = new HashMap<>();
            // Convert features to strings to avoid some weird formatting issues.  It's ugly, but it works.
            for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
            }
            tsvFeatures.put("name", "direct-inchi-input");
            if (tsvWriter != null) {
                tsvWriter.append(tsvFeatures);
            }
        } else if (cl.hasOption(OPTION_INPUT_FILE)) {
            TSVParser parser = new TSVParser();
            parser.parse(new File(cl.getOptionValue(OPTION_INPUT_FILE)));
            int i = 0;
            List<Map<String, String>> inputRows = parser.getResults();

            for (Map<String, String> row : inputRows) {
                i++; // Just for warning messages.
                if (!row.containsKey("name") || !row.containsKey("id") || !row.containsKey("inchi")) {
                    System.err.format(
                            "WARNING: TSV rows must contain at least name, id, and inchi, skipping row %d\n",
                            i);
                    continue;
                }
                if (seenOutputIds.contains(row.get("id"))) {
                    System.out.format("Skipping input row with id already in output: %s\n", row.get("id"));
                    continue;
                }

                System.out.format("Analysis for chemical %s\n", row.get("name"));
                try {
                    analysisFeatures = SurfactantAnalysis.performAnalysis(row.get("inchi"), false);
                } catch (Exception e) {
                    // Ignore exceptions for now.  Sometimes the regression analysis or Chemaxon processing chokes unexpectedly.
                    System.err.format("ERROR caught exception while processing '%s':\n", row.get("name"));
                    System.err.format("%s\n", e.getMessage());
                    e.printStackTrace(System.err);
                    System.err.println("Skipping...");
                    continue;
                }
                System.out.format("--- Done analysis for chemical %s\n", row.get("name"));

                // This is a duplicate of the OPTION_INCHI block code, but it's inside of a tight loop, so...
                Map<String, String> tsvFeatures = new HashMap<>();
                for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                    tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
                }
                tsvFeatures.put("name", row.get("name"));
                tsvFeatures.put("id", row.get("id"));
                tsvFeatures.put("inchi", row.get("inchi"));
                tsvFeatures.put("label", row.containsKey("label") ? row.get("label") : "?");
                if (tsvWriter != null) {
                    tsvWriter.append(tsvFeatures);
                    // Flush every time in case we crash or get interrupted.  The features must flow!
                    tsvWriter.flush();
                }
            }
        } else {
            throw new RuntimeException("Must specify inchi or input file");
        }
    } finally {
        if (tsvWriter != null) {
            tsvWriter.close();
        }
    }
}

From source file:com.github.ansell.shp.SHPDump.java

public static void main(String... args) throws Exception {
    final OptionParser parser = new OptionParser();

    final OptionSpec<Void> help = parser.accepts("help").forHelp();
    final OptionSpec<File> input = parser.accepts("input").withRequiredArg().ofType(File.class).required()
            .describedAs("The input SHP file");
    final OptionSpec<File> output = parser.accepts("output").withRequiredArg().ofType(File.class).required()
            .describedAs("The output directory to use for debugging files");
    final OptionSpec<String> outputPrefix = parser.accepts("prefix").withRequiredArg().ofType(String.class)
            .defaultsTo("shp-debug").describedAs("The output prefix to use for debugging files");
    final OptionSpec<File> outputMappingTemplate = parser.accepts("output-mapping").withRequiredArg()
            .ofType(File.class).describedAs("The output mapping template file if it needs to be generated.");
    final OptionSpec<Integer> resolution = parser.accepts("resolution").withRequiredArg().ofType(Integer.class)
            .defaultsTo(2048).describedAs("The output image file resolution");
    final OptionSpec<String> format = parser.accepts("format").withRequiredArg().ofType(String.class)
            .defaultsTo("png").describedAs("The output image format");
    final OptionSpec<String> removeIfEmpty = parser.accepts("remove-if-empty").withRequiredArg()
            .ofType(String.class).describedAs(
                    "The name of an attribute to remove if its value is empty before outputting the resulting shapefile. Use multiple times to specify multiple fields to check");

    OptionSet options = null;/*from  w w  w . j av  a 2  s  .  com*/

    try {
        options = parser.parse(args);
    } catch (final OptionException e) {
        System.out.println(e.getMessage());
        parser.printHelpOn(System.out);
        throw e;
    }

    if (options.has(help)) {
        parser.printHelpOn(System.out);
        return;
    }

    final Path inputPath = input.value(options).toPath();
    if (!Files.exists(inputPath)) {
        throw new FileNotFoundException("Could not find input SHP file: " + inputPath.toString());
    }

    final Path outputPath = output.value(options).toPath();
    if (!Files.exists(outputPath)) {
        throw new FileNotFoundException("Output directory does not exist: " + outputPath.toString());
    }

    final Path outputMappingPath = options.has(outputMappingTemplate)
            ? outputMappingTemplate.value(options).toPath()
            : null;
    if (options.has(outputMappingTemplate) && Files.exists(outputMappingPath)) {
        throw new FileNotFoundException(
                "Output mapping template file already exists: " + outputMappingPath.toString());
    }

    final Set<String> filterFields = ConcurrentHashMap.newKeySet();
    if (options.has(removeIfEmpty)) {
        for (String nextFilterField : removeIfEmpty.values(options)) {
            System.out.println("Will filter field if empty value found: " + nextFilterField);
            filterFields.add(nextFilterField);
        }
    }

    if (!filterFields.isEmpty()) {
        System.out.println("Full set of filter fields: " + filterFields);
    }

    final String prefix = outputPrefix.value(options);

    FileDataStore store = FileDataStoreFinder.getDataStore(inputPath.toFile());

    if (store == null) {
        throw new RuntimeException("Could not read the given input as an ESRI Shapefile: "
                + inputPath.toAbsolutePath().toString());
    }

    for (String typeName : new LinkedHashSet<>(Arrays.asList(store.getTypeNames()))) {
        System.out.println("");
        System.out.println("Type: " + typeName);
        SimpleFeatureSource featureSource = store.getFeatureSource(typeName);
        SimpleFeatureType schema = featureSource.getSchema();

        Name outputSchemaName = new NameImpl(schema.getName().getNamespaceURI(),
                schema.getName().getLocalPart().replace(" ", "").replace("%20", ""));
        System.out.println("Replacing name on schema: " + schema.getName() + " with " + outputSchemaName);
        SimpleFeatureType outputSchema = SHPUtils.changeSchemaName(schema, outputSchemaName);

        List<String> attributeList = new ArrayList<>();
        for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) {
            System.out.println("Attribute: " + attribute.getName().toString());
            attributeList.add(attribute.getName().toString());
        }
        CsvSchema csvSchema = CSVUtil.buildSchema(attributeList);

        SimpleFeatureCollection collection = featureSource.getFeatures();
        int featureCount = 0;
        Path nextCSVFile = outputPath.resolve(prefix + ".csv");
        Path nextSummaryCSVFile = outputPath
                .resolve(prefix + "-" + outputSchema.getTypeName() + "-Summary.csv");
        List<SimpleFeature> outputFeatureList = new CopyOnWriteArrayList<>();

        try (SimpleFeatureIterator iterator = collection.features();
                Writer bufferedWriter = Files.newBufferedWriter(nextCSVFile, StandardCharsets.UTF_8,
                        StandardOpenOption.CREATE_NEW);
                SequenceWriter csv = CSVUtil.newCSVWriter(bufferedWriter, csvSchema);) {
            List<String> nextLine = new ArrayList<>();
            while (iterator.hasNext()) {
                SimpleFeature feature = iterator.next();
                featureCount++;
                if (featureCount <= 2) {
                    System.out.println("");
                    System.out.println(feature.getIdentifier());
                } else if (featureCount % 100 == 0) {
                    System.out.print(".");
                }
                boolean filterThisFeature = false;
                for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) {
                    String featureString = Optional.ofNullable(feature.getAttribute(attribute.getName()))
                            .orElse("").toString();
                    nextLine.add(featureString);
                    if (filterFields.contains(attribute.getName().toString())
                            && featureString.trim().isEmpty()) {
                        filterThisFeature = true;
                    }
                    if (featureString.length() > 100) {
                        featureString = featureString.substring(0, 100) + "...";
                    }
                    if (featureCount <= 2) {
                        System.out.print(attribute.getName() + "=");
                        System.out.println(featureString);
                    }
                }
                if (!filterThisFeature) {
                    outputFeatureList.add(SHPUtils.changeSchemaName(feature, outputSchema));
                    csv.write(nextLine);
                }
                nextLine.clear();
            }
        }
        try (Reader csvReader = Files.newBufferedReader(nextCSVFile, StandardCharsets.UTF_8);
                Writer summaryOutput = Files.newBufferedWriter(nextSummaryCSVFile, StandardCharsets.UTF_8,
                        StandardOpenOption.CREATE_NEW);
                final Writer mappingWriter = options.has(outputMappingTemplate)
                        ? Files.newBufferedWriter(outputMappingPath)
                        : NullWriter.NULL_WRITER) {
            CSVSummariser.runSummarise(csvReader, summaryOutput, mappingWriter,
                    CSVSummariser.DEFAULT_SAMPLE_COUNT, false);
        }
        if (featureCount > 100) {
            System.out.println("");
        }
        System.out.println("");
        System.out.println("Feature count: " + featureCount);

        SimpleFeatureCollection outputCollection = new ListFeatureCollection(outputSchema, outputFeatureList);
        Path outputShapefilePath = outputPath.resolve(prefix + "-" + outputSchema.getTypeName() + "-dump");
        if (!Files.exists(outputShapefilePath)) {
            Files.createDirectory(outputShapefilePath);
        }
        SHPUtils.writeShapefile(outputCollection, outputShapefilePath);

        // Create ZIP file from the contents to keep the subfiles together
        Path outputShapefileZipPath = outputPath
                .resolve(prefix + "-" + outputSchema.getTypeName() + "-dump.zip");
        try (final OutputStream out = Files.newOutputStream(outputShapefileZipPath,
                StandardOpenOption.CREATE_NEW);
                final ZipOutputStream zip = new ZipOutputStream(out, StandardCharsets.UTF_8);) {
            Files.list(outputShapefilePath).forEachOrdered(Unchecked.consumer(e -> {
                zip.putNextEntry(new ZipEntry(e.getFileName().toString()));
                Files.copy(e, zip);
                zip.closeEntry();
            }));
        }

        try (final OutputStream outputStream = Files.newOutputStream(
                outputPath.resolve(prefix + "." + format.value(options)), StandardOpenOption.CREATE_NEW);) {
            MapContent map = new MapContent();
            map.setTitle(prefix + "-" + outputSchema.getTypeName());
            Style style = SLD.createSimpleStyle(featureSource.getSchema());
            Layer layer = new FeatureLayer(new CollectionFeatureSource(outputCollection), style);
            map.addLayer(layer);
            SHPUtils.renderImage(map, outputStream, resolution.value(options), format.value(options));
        }
    }
}

From source file:Main.java

protected static boolean isAnyPolicy(Set policySet) {
    return policySet == null || policySet.contains(ANY_POLICY) || policySet.isEmpty();
}

From source file:Main.java

private static void appendTypeName(StringBuffer sb, Set typeNamesAppended, String name) {
    if (!typeNamesAppended.contains(name)) {
        if (sb.length() != 0)
            sb.append("+");
        sb.append(name);// www.  ja  va2  s  . c om
        typeNamesAppended.add(name);
    }
}

From source file:Main.java

public static <T> Collection<T> union(Collection<T> a, Collection<T> b) {
    Set<T> set = new HashSet<T>(a);
    for (T obj : b) {
        if (!set.contains(obj)) {
            set.add(obj);/*from   w w  w.j a v a  2s.  c  o m*/
        }
    }
    return set;
}

From source file:Main.java

public static <A> Set<A> setIntersection(Set<A> a, Set<A> b) {
    Set<A> set = new HashSet<A>();
    for (A x : a)
        if (b.contains(x))
            set.add(x);/* w ww.jav a  2s  .  c o m*/
    return set;
}

From source file:Main.java

/**
 * Returns whether speech synthesis using a specific language on the
 * specified TTS requires a network connection.
 *
 * @param tts The TTS engine to check./*www .ja  v  a 2 s.  c o  m*/
 * @param language The language to check.
 * @return {@code true} if the language requires a network connection.
 */
public static boolean isNetworkRequiredForSynthesis(TextToSpeech tts, Locale language) {
    final Set<String> features = tts.getFeatures(language);
    return features.contains(Engine.KEY_FEATURE_NETWORK_SYNTHESIS)
            && !features.contains(Engine.KEY_FEATURE_EMBEDDED_SYNTHESIS);
}

From source file:Main.java

@SuppressWarnings({ "rawtypes", "unchecked" })
public static boolean containsOnly(Collection collection, Object object) {
    if (collection == null)
        return false;
    Set duplicatesRemoved = new HashSet(collection);
    return duplicatesRemoved.size() == 1 && duplicatesRemoved.contains(object);
}

From source file:com.gistlabs.mechanize.document.html.JsoupDataUtil.java

private static void filterElementsByTag(List<Element> results, Element element, Set<String> tagSet) {
    if (tagSet.contains(element.tag().getName().toLowerCase()))
        results.add(element);/*from   www . ja va  2  s.c  om*/

    for (Element child : element.children())
        filterElementsByTag(results, child, tagSet);
}