Example usage for java.util Set add

Introduction

In this page you can find the example usage for java.util Set add.

Prototype

boolean add(E e);

Source Link

Document

Adds the specified element to this set if it is not already present (optional operation).

Usage

From source file:HSqlPrimerDesign.java

public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException,
        ClassNotFoundException, InstantiationException, SQLException, FileNotFoundException {
    Class.forName(JDBC_DRIVER_HSQL).newInstance();
    conn = DriverManager.getConnection(DB_SERVER_URL, USER, PASS);
    PrintWriter log = new PrintWriter(new File("javalog.log"));
    DpalLoad.main(args);/* www  .  j  a  v  a 2  s  .  c om*/
    Dpal_Inst = DpalLoad.INSTANCE_WIN64;
    //        int[][] arr =calcHairpin("GGGGGGCCCCCCCCCCCCGGGGGGG",4);
    //        if(arr.length<=1){
    //            System.out.println("No Hairpin's found");
    //        }else{
    //            System.out.println("Hairpin(s) found");
    //        }
    Statement stat = conn.createStatement();
    ResultSet call = stat.executeQuery(
            "Select * From " + "Primerdb.primers where Cluster ='A1' and UniqueP =True and Bp = 20");
    Set<CharSequence> primers = new HashSet<>();
    while (call.next()) {
        primers.add(call.getString("Sequence"));
    }
    //        primers.stream().forEach(x->{
    //            log.println(x);
    //            log.println(complementarity(x, x, Dpal_Inst));
    //            int[][] arr =calcHairpin((String)x,4);
    //            if(arr.length<=1){
    //                log.println("No Hairpin's found");
    //            }else{
    //                log.println("Hairpin(s) found");
    //            }
    //            log.println();
    //            log.flush();
    //        });
}

From source file:de.jetwick.snacktory.HtmlFetcher.java

public static void main(String[] args) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader("urls.txt"));
    String line = null;/*ww w  .j  a v  a 2 s .  c  o m*/
    Set<String> existing = new LinkedHashSet<String>();
    while ((line = reader.readLine()) != null) {
        int index1 = line.indexOf("\"");
        int index2 = line.indexOf("\"", index1 + 1);
        String url = line.substring(index1 + 1, index2);
        String domainStr = SHelper.extractDomain(url, true);
        String counterStr = "";
        // TODO more similarities
        if (existing.contains(domainStr))
            counterStr = "2";
        else
            existing.add(domainStr);

        String html = new HtmlFetcher().fetchAsString(url, 20000);
        String outFile = domainStr + counterStr + ".html";
        BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
        writer.write(html);
        writer.close();
    }
    reader.close();
}

From source file:Main.java

public static void main(String[] args) throws Exception {
    List<Car> carsList = new ArrayList<Car>();
    Set<Car> carsset = new HashSet<Car>();
    File fXmlFile = new File("cars.xml");
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(fXmlFile);
    doc.getDocumentElement().normalize();
    NodeList nList = doc.getElementsByTagName("cars");
    Car tempCar = null;//from  www  .java  2  s  .c  o m
    for (int temp = 0; temp < nList.getLength(); temp++) {
        Node nNode = nList.item(temp);
        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
            tempCar = new Car();
            Element eElement = (Element) nNode;
            tempCar.setModel(eElement.getElementsByTagName("model").item(0).getTextContent());
            tempCar.setVersion(eElement.getElementsByTagName("version").item(0).getTextContent());
            carsList.add(tempCar);
            carsset.add(tempCar);

        }
    }
    for (Car cs : carsset) {
        int count = 0;
        for (Car cl : carsList) {
            if (cs.equals(cl)) {
                count = count + 1;
            }
        }
        System.out.println(cs + "\t" + count);
    }
}

From source file:com.act.lcms.db.analysis.IonDetectionAnalysis.java

public static void main(String[] args) throws Exception {

    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/*from www . j  a  va 2 s  .  com*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s", e.getMessage());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        return;
    }

    File lcmsDir = new File(cl.getOptionValue(OPTION_LCMS_FILE_DIRECTORY));
    if (!lcmsDir.isDirectory()) {
        System.err.format("File at %s is not a directory", lcmsDir.getAbsolutePath());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    String plottingDirectory = cl.getOptionValue(OPTION_PLOTTING_DIR);

    // Get include and excluse ions from command line
    Set<String> includeIons;
    if (cl.hasOption(OPTION_INCLUDE_IONS)) {
        includeIons = new HashSet<>(Arrays.asList(cl.getOptionValues(OPTION_INCLUDE_IONS)));
        LOGGER.info("Including ions in search: %s", StringUtils.join(includeIons, ", "));
    } else {
        includeIons = new HashSet<>();
        includeIons.add(DEFAULT_ION);
    }

    try (DB db = DB.openDBFromCLI(cl)) {
        ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir);

        File inputPredictionCorpus = new File(cl.getOptionValue(OPTION_INPUT_PREDICTION_CORPUS));

        Map<Double, Set<Pair<String, String>>> massChargeToChemicalAndIon = constructMassChargeToChemicalIonsFromInputFile(
                inputPredictionCorpus, includeIons, cl.hasOption(OPTION_LIST_OF_INCHIS_INPUT_FILE));

        Pair<Set<Pair<String, Double>>, Map<String, Double>> values = constructFakeNameToMassChargeAndSetOfMassChargePairs(
                massChargeToChemicalAndIon.keySet());
        Set<Pair<String, Double>> searchMZs = values.getLeft();
        Map<String, Double> chemIDToMassCharge = values.getRight();

        LOGGER.info("The number of mass charges are: %d", searchMZs.size());

        Map<ScanData.KIND, List<LCMSWell>> wellTypeToLCMSWells = readInputExperimentalSetup(db,
                new File(cl.getOptionValue(OPTION_INPUT_POSITIVE_AND_NEGATIVE_CONTROL_WELLS_FILE)));

        // Get experimental setup ie. positive and negative wells from config file
        List<LCMSWell> positiveWells = wellTypeToLCMSWells.get(ScanData.KIND.POS_SAMPLE);
        List<LCMSWell> negativeWells = wellTypeToLCMSWells.get(ScanData.KIND.NEG_CONTROL);

        LOGGER.info("Number of positive wells is: %d", positiveWells.size());
        LOGGER.info("Number of negative wells is: %d", negativeWells.size());

        HashMap<Integer, Plate> plateCache = new HashMap<>();
        String outputPrefix = cl.getOptionValue(OPTION_OUTPUT_PREFIX);

        IonDetectionAnalysis<LCMSWell> ionDetectionAnalysis = new IonDetectionAnalysis<LCMSWell>(lcmsDir,
                positiveWells, negativeWells, plottingDirectory, plateCache, searchMZs, db);

        ionDetectionAnalysis.runLCMSMiningAnalysisAndPlotResults(chemIDToMassCharge, massChargeToChemicalAndIon,
                outputPrefix, cl.hasOption(OPTION_NON_REPLICATE_ANALYSIS));
    }
}

From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    String imageID;/*from  w  ww . j  a v a2s .  c o  m*/
    String outpath;
    String friendlyname;
    final Set<String> exts = new HashSet<String>();

    if ("-f".equals(otherArgs[0])) {
        if (otherArgs.length != 4) {
            die();
        }

        // load extensions from file
        final Path extpath = new Path(otherArgs[1]);

        InputStream in = null;
        try {
            in = extpath.getFileSystem(conf).open(extpath);

            Reader r = null;
            try {
                r = new InputStreamReader(in);

                BufferedReader br = null;
                try {
                    br = new BufferedReader(r);

                    String line;
                    while ((line = br.readLine()) != null) {
                        exts.add(line.trim().toLowerCase());
                    }

                    br.close();
                } finally {
                    IOUtils.closeQuietly(br);
                }

                r.close();
            } finally {
                IOUtils.closeQuietly(r);
            }

            in.close();
        } finally {
            IOUtils.closeQuietly(in);
        }

        imageID = otherArgs[2];
        friendlyname = otherArgs[3];
        outpath = otherArgs[4];
    } else {
        if (otherArgs.length < 3) {
            die();
        }

        // read extensions from trailing args
        imageID = otherArgs[0];
        friendlyname = otherArgs[1];
        outpath = otherArgs[2];

        // lowercase all file extensions
        for (int i = 2; i < otherArgs.length; ++i) {
            exts.add(otherArgs[i].toLowerCase());
        }
    }

    conf.setStrings("extensions", exts.toArray(new String[exts.size()]));

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf);
    job.setJarByClass(SequenceFileExport.class);
    job.setMapperClass(SequenceFileExportMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(MapWritable.class);

    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    FsEntryHBaseInputFormat.setupJob(job, imageID);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    FileOutputFormat.setOutputPath(job, new Path(outpath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:gr.iti.mklab.bubing.parser.ITIHTMLParser.java

public static void main(String arg[]) throws IllegalArgumentException, IOException, URISyntaxException,
        JSAPException, NoSuchAlgorithmException {

    final SimpleJSAP jsap = new SimpleJSAP(ITIHTMLParser.class.getName(),
            "Produce the digest of a page: the page is downloaded or passed as argument by specifying a file",
            new Parameter[] {
                    new UnflaggedOption("url", JSAP.STRING_PARSER, JSAP.REQUIRED, "The url of the page."),
                    new Switch("crossAuthorityDuplicates", 'c', "cross-authority-duplicates"),
                    new FlaggedOption("charBufferSize", JSAP.INTSIZE_PARSER, Integer.toString(CHAR_BUFFER_SIZE),
                            JSAP.NOT_REQUIRED, 'b', "buffer",
                            "The size of the parser character buffer (0 for dynamic sizing)."),
                    new FlaggedOption("file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f',
                            "file", "The page to be processed."),
                    new FlaggedOption("digester", JSAP.STRING_PARSER, "MD5", JSAP.NOT_REQUIRED, 'd', "digester",
                            "The digester to be used.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        System.exit(1);/*from w  w w. j a  v  a 2 s .c o  m*/

    final String url = jsapResult.getString("url");
    final String digester = jsapResult.getString("digester");
    final boolean crossAuthorityDuplicates = jsapResult.userSpecified("crossAuthorityDuplicates");
    final int charBufferSize = jsapResult.getInt("charBufferSize");

    final ITIHTMLParser<Void> htmlParser = new ITIHTMLParser<Void>(BinaryParser.forName(digester),
            (TextProcessor<Void>) null, crossAuthorityDuplicates, charBufferSize);
    final SetLinkReceiver linkReceiver = new SetLinkReceiver();
    final byte[] digest;

    if (!jsapResult.userSpecified("file")) {
        final URI uri = new URI(url);
        final HttpGet request = new HttpGet(uri);
        request.setConfig(RequestConfig.custom().setRedirectsEnabled(false).build());
        digest = htmlParser.parse(uri, HttpClients.createDefault().execute(request), linkReceiver);
    } else {
        final String file = jsapResult.getString("file");
        String content = IOUtils.toString(new InputStreamReader(new FileInputStream(file)));
        digest = htmlParser.parse(BURL.parse(url), new StringHttpMessages.HttpResponse(content), linkReceiver);
    }

    System.out.println("DigestHexString: " + Hex.encodeHexString(digest));
    System.out.println("Links: " + linkReceiver.urls);

    Set<String> urlStrings = new ObjectOpenHashSet<String>();
    for (URI link : linkReceiver.urls)
        urlStrings.add(link.toString());
    if (urlStrings.size() != linkReceiver.urls.size())
        System.out.println(
                "There are " + linkReceiver.urls.size() + " URIs but " + urlStrings.size() + " strings");

}

From source file:edu.msu.cme.rdp.kmer.cli.FastKmerFilter.java

public static void main(String[] args) throws Exception {
    final KmerSet<Set<RefKmer>> kmerSet;
    final SeqReader queryReader;
    final SequenceType querySeqType;
    final File queryFile;
    final KmerStartsWriter out;
    final boolean translQuery;
    final int wordSize;
    final int translTable;
    final boolean alignedSeqs;
    final List<String> refLabels = new ArrayList();
    final int maxThreads;
    final int trieWordSize;

    try {/*  www  .jav a  2  s. co m*/
        CommandLine cmdLine = new PosixParser().parse(options, args);
        args = cmdLine.getArgs();

        if (args.length < 3) {
            throw new Exception("Unexpected number of arguments");
        }

        if (cmdLine.hasOption("out")) {
            out = new KmerStartsWriter(cmdLine.getOptionValue("out"));
        } else {
            out = new KmerStartsWriter(System.out);
        }

        if (cmdLine.hasOption("aligned")) {
            alignedSeqs = true;
        } else {
            alignedSeqs = false;
        }

        if (cmdLine.hasOption("transl-table")) {
            translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table"));
        } else {
            translTable = 11;
        }

        if (cmdLine.hasOption("threads")) {
            maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads"));
        } else {
            maxThreads = Runtime.getRuntime().availableProcessors();
        }

        queryFile = new File(args[1]);
        wordSize = Integer.valueOf(args[0]);
        SequenceType refSeqType = null;

        querySeqType = SeqUtils.guessSequenceType(queryFile);
        queryReader = new SequenceReader(queryFile);

        if (querySeqType == SequenceType.Protein) {
            throw new Exception("Expected nucl query sequences");
        }

        refSeqType = SeqUtils
                .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2]));

        translQuery = refSeqType == SequenceType.Protein;

        if (translQuery && wordSize % 3 != 0) {
            throw new Exception("Word size must be a multiple of 3 for nucl ref seqs");
        }

        if (translQuery) {
            trieWordSize = wordSize / 3;
        } else {
            trieWordSize = wordSize;
        }
        kmerSet = new KmerSet<Set<RefKmer>>();//new KmerTrie(trieWordSize, translQuery);

        for (int index = 2; index < args.length; index++) {
            String refName;
            String refFileName = args[index];
            if (refFileName.contains("=")) {
                String[] lexemes = refFileName.split("=");
                refName = lexemes[0];
                refFileName = lexemes[1];
            } else {
                String tmpName = new File(refFileName).getName();
                if (tmpName.contains(".")) {
                    refName = tmpName.substring(0, tmpName.lastIndexOf("."));
                } else {
                    refName = tmpName;
                }
            }

            File refFile = new File(refFileName);

            if (refSeqType != SeqUtils.guessSequenceType(refFile)) {
                throw new Exception(
                        "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile)
                                + " sequences but expected " + refSeqType + " sequences");
            }

            SequenceReader seqReader = new SequenceReader(refFile);
            Sequence seq;

            while ((seq = seqReader.readNextSequence()) != null) {
                if (seq.getSeqName().startsWith("#")) {
                    continue;
                }

                KmerGenerator kmers;
                try {
                    if (translQuery) { //protein ref
                        kmers = new ProtKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs);
                    } else {
                        kmers = new NuclKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs);
                    }
                    while (kmers.hasNext()) {
                        Kmer temp = kmers.next();
                        long[] next = temp.getLongKmers();
                        Set<RefKmer> refKmers = kmerSet.get(next);
                        if (refKmers == null) {
                            refKmers = new HashSet();
                            kmerSet.add(next, refKmers);
                        }

                        RefKmer kmerRef = new RefKmer();
                        kmerRef.modelPos = kmers.getPosition();
                        kmerRef.refFileIndex = refLabels.size();
                        kmerRef.refSeqid = seq.getSeqName();
                        refKmers.add(kmerRef);
                    }
                } catch (IllegalArgumentException ex) {
                    //System.err.println(seq.getSeqName()+ " " + ex.getMessage());
                }
            }
            seqReader.close();

            refLabels.add(refName);
        }

    } catch (Exception e) {
        new HelpFormatter().printHelp(
                "KmerSearch <kmerSize> <query_file> [name=]<ref_file> ...\nkmerSize should be multiple of 3, (recommend 45, minimum 30, maximum 63) ",
                options);
        e.printStackTrace();
        System.exit(1);
        throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables
    }

    long startTime = System.currentTimeMillis();
    long seqCount = 0;
    final int maxTasks = 25000;

    System.err.println("Starting kmer mapping at " + new Date());
    System.err.println("*  Number of threads:       " + maxThreads);
    System.err.println("*  References:              " + refLabels);
    System.err.println("*  Reads file:              " + queryFile);
    System.err.println("*  Kmer length:             " + trieWordSize);
    System.err.println("*  Kmer Refset Size:        " + kmerSet.size());

    final AtomicInteger processed = new AtomicInteger();
    final AtomicInteger outstandingTasks = new AtomicInteger();

    ExecutorService service = Executors.newFixedThreadPool(maxThreads);

    Sequence querySeq;

    while ((querySeq = queryReader.readNextSequence()) != null) {
        seqCount++;

        String seqString = querySeq.getSeqString();

        if ((!translQuery && seqString.length() < wordSize)
                || (translQuery && seqString.length() < wordSize + 2)) {
            //System.err.println(querySeq.getSeqName() + "\t" + seqString.length());
            continue;
        }

        final Sequence threadSeq = querySeq;

        Runnable r = new Runnable() {

            public void run() {
                try {
                    processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, false);
                    processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, true);

                    processed.incrementAndGet();
                    outstandingTasks.decrementAndGet();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        };

        outstandingTasks.incrementAndGet();
        service.submit(r);

        while (outstandingTasks.get() >= maxTasks)
            ;

        if ((processed.get() + 1) % 1000000 == 0) {
            System.err.println("Processed " + processed + " sequences in "
                    + (System.currentTimeMillis() - startTime) + " ms");
        }
    }

    service.shutdown();
    service.awaitTermination(1, TimeUnit.DAYS);

    System.err.println("Finished Processed " + processed + " sequences in "
            + (System.currentTimeMillis() - startTime) + " ms");

    out.close();
}

From source file:com.act.analysis.surfactant.AnalysisDriver.java

public static void main(String[] args) throws Exception {
    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/* ww  w  .j ava 2 s .c  o m*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s\n", e.getMessage());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        return;
    }

    Set<String> seenOutputIds = new HashSet<>();

    TSVWriter<String, String> tsvWriter = null;
    if (cl.hasOption(OPTION_OUTPUT_FILE)) {
        File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_FILE));
        List<Map<String, String>> oldResults = null;
        if (outputFile.exists()) {
            System.err.format(
                    "Output file already exists, reading old results and skipping processed molecules.\n");
            TSVParser outputParser = new TSVParser();
            outputParser.parse(outputFile);
            oldResults = outputParser.getResults();
            for (Map<String, String> row : oldResults) {
                // TODO: verify that the last row was written cleanly/completely.
                seenOutputIds.add(row.get("id"));
            }
        }

        List<String> header = new ArrayList<>();
        header.add("name");
        header.add("id");
        header.add("inchi");
        header.add("label");
        for (SurfactantAnalysis.FEATURES f : SurfactantAnalysis.FEATURES.values()) {
            header.add(f.toString());
        }
        // TODO: make this API more auto-closable friendly.
        tsvWriter = new TSVWriter<>(header);
        tsvWriter.open(outputFile);
        if (oldResults != null) {
            System.out.format("Re-writing %d existing result rows\n", oldResults.size());
            tsvWriter.append(oldResults);
        }
    }

    try {
        Map<SurfactantAnalysis.FEATURES, Double> analysisFeatures;

        LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE));
        if (cl.hasOption(OPTION_INCHI)) {
            analysisFeatures = SurfactantAnalysis.performAnalysis(cl.getOptionValue(OPTION_INCHI),
                    cl.hasOption(OPTION_DISPLAY));
            Map<String, String> tsvFeatures = new HashMap<>();
            // Convert features to strings to avoid some weird formatting issues.  It's ugly, but it works.
            for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
            }
            tsvFeatures.put("name", "direct-inchi-input");
            if (tsvWriter != null) {
                tsvWriter.append(tsvFeatures);
            }
        } else if (cl.hasOption(OPTION_INPUT_FILE)) {
            TSVParser parser = new TSVParser();
            parser.parse(new File(cl.getOptionValue(OPTION_INPUT_FILE)));
            int i = 0;
            List<Map<String, String>> inputRows = parser.getResults();

            for (Map<String, String> row : inputRows) {
                i++; // Just for warning messages.
                if (!row.containsKey("name") || !row.containsKey("id") || !row.containsKey("inchi")) {
                    System.err.format(
                            "WARNING: TSV rows must contain at least name, id, and inchi, skipping row %d\n",
                            i);
                    continue;
                }
                if (seenOutputIds.contains(row.get("id"))) {
                    System.out.format("Skipping input row with id already in output: %s\n", row.get("id"));
                    continue;
                }

                System.out.format("Analysis for chemical %s\n", row.get("name"));
                try {
                    analysisFeatures = SurfactantAnalysis.performAnalysis(row.get("inchi"), false);
                } catch (Exception e) {
                    // Ignore exceptions for now.  Sometimes the regression analysis or Chemaxon processing chokes unexpectedly.
                    System.err.format("ERROR caught exception while processing '%s':\n", row.get("name"));
                    System.err.format("%s\n", e.getMessage());
                    e.printStackTrace(System.err);
                    System.err.println("Skipping...");
                    continue;
                }
                System.out.format("--- Done analysis for chemical %s\n", row.get("name"));

                // This is a duplicate of the OPTION_INCHI block code, but it's inside of a tight loop, so...
                Map<String, String> tsvFeatures = new HashMap<>();
                for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                    tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
                }
                tsvFeatures.put("name", row.get("name"));
                tsvFeatures.put("id", row.get("id"));
                tsvFeatures.put("inchi", row.get("inchi"));
                tsvFeatures.put("label", row.containsKey("label") ? row.get("label") : "?");
                if (tsvWriter != null) {
                    tsvWriter.append(tsvFeatures);
                    // Flush every time in case we crash or get interrupted.  The features must flow!
                    tsvWriter.flush();
                }
            }
        } else {
            throw new RuntimeException("Must specify inchi or input file");
        }
    } finally {
        if (tsvWriter != null) {
            tsvWriter.close();
        }
    }
}

From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java

public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException,
        InstantiationException, IllegalAccessException, ClassNotFoundException, Exception {

    String directory;/*  www . jav a 2  s .c  o  m*/
    String gold_standard_lexicon;
    String output_lexicon;
    String configFile;
    Language language;
    String output;

    Stopwords stopwords = new Stopwords();

    HashMap<String, Double> maxima;
    maxima = new HashMap<String, Double>();

    if (args.length < 3) {
        System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n");
        return;

    }

    //      Classifier classifier;

    directory = args[1];
    configFile = args[2];

    final Config config = new Config();

    config.loadFromFile(configFile);

    gold_standard_lexicon = config.getGoldStandardLexicon();

    String model_file = config.getModel();

    output_lexicon = config.getOutputLexicon();
    output = config.getOutput();

    language = config.getLanguage();

    LexiconLoader loader = new LexiconLoader();
    Lexicon gold = loader.loadFromFile(gold_standard_lexicon);

    Set<String> uris = new HashSet<>();
    //        Map<Integer,String> sentence_list = new HashMap<>();
    Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>();

    //consider only properties
    for (LexicalEntry entry : gold.getEntries()) {
        try {
            for (Sense sense : entry.getSenseBehaviours().keySet()) {
                String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", "");
                if (!Character.isUpperCase(tmp_uri.charAt(0))) {
                    uris.add(sense.getReference().getURI());
                }
            }
        } catch (Exception e) {
        }
        ;
    }

    ModelPreprocessor preprocessor = new ModelPreprocessor(language);
    preprocessor.setCoreferenceResolution(false);
    Set<String> dep = new HashSet<>();
    dep.add("prep");
    dep.add("appos");
    dep.add("nn");
    dep.add("dobj");
    dep.add("pobj");
    dep.add("num");
    preprocessor.setDEP(dep);

    List<File> list_files = new ArrayList<>();

    if (config.getFiles().isEmpty()) {
        File folder = new File(directory);
        File[] files = folder.listFiles();
        for (File file : files) {
            if (file.toString().contains(".ttl"))
                list_files.add(file);
        }
    } else {
        list_files.addAll(config.getFiles());
    }
    System.out.println(list_files.size());

    int sentence_counter = 0;
    Map<String, Set<Integer>> bag_words_uri = new HashMap<>();
    Map<String, Integer> mapping_word_id = new HashMap<>();
    for (File file : list_files) {
        Model model = RDFDataMgr.loadModel(file.toString());
        for (Model sentence : getSentences(model)) {
            String reference = getReference(sentence);
            reference = reference.replace("http://dbpedia/", "http://dbpedia.org/");
            if (uris.contains(reference)) {
                sentence_counter += 1;
                Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id);
                //TODO: add sentence preprocessing
                String obj = getObject(sentence);
                String subj = getSubject(sentence);
                preprocessor.preprocess(sentence, subj, obj, language);
                //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course)
                String parsed_sentence = getParsedSentence(sentence);
                try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        PrintWriter out = new PrintWriter(bw)) {
                    out.println(sentence_counter + "\t" + parsed_sentence);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                for (Integer word_id : words_ids) {
                    if (mapping_words_sentences.containsKey(word_id)) {
                        Set<Integer> tmp_set = mapping_words_sentences.get(word_id);
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);

                    } else {
                        Set<Integer> tmp_set = new HashSet<>();
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);
                    }

                }
                if (bag_words_uri.containsKey(reference)) {
                    Set<Integer> tmp = bag_words_uri.get(reference);
                    for (Integer w : words_ids) {
                        tmp.add(w);

                    }
                    bag_words_uri.put(reference, tmp);
                } else {
                    Set<Integer> tmp = new HashSet<>();
                    for (Integer w : words_ids) {
                        tmp.add(w);
                    }
                    bag_words_uri.put(reference, tmp);
                }
            }

        }
        model.close();

    }

    PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv");
    StringBuilder string_builder = new StringBuilder();
    for (String r : bag_words_uri.keySet()) {
        string_builder.append(r);
        for (Integer i : bag_words_uri.get(r)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

    writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv");
    string_builder = new StringBuilder();
    for (Integer w : mapping_words_sentences.keySet()) {
        string_builder.append(w);
        for (int i : mapping_words_sentences.get(w)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

}

From source file:org.ala.hbase.RepoDataLoader.java

/**
 * This takes a list of infosource ids...
 * <p/>//w ww .  java2  s  .  c o m
 * Usage: -stats or -reindex or -gList and list of infosourceId
 *
 * @param args
 */
public static void main(String[] args) throws Exception {
    //RepoDataLoader loader = new RepoDataLoader();
    ApplicationContext context = SpringUtils.getContext();
    RepoDataLoader loader = (RepoDataLoader) context.getBean(RepoDataLoader.class);
    long start = System.currentTimeMillis();
    loader.loadInfoSources();
    String filePath = repositoryDir;
    if (args.length > 0) {
        if (args[0].equalsIgnoreCase("-stats")) {
            loader.statsOnly = true;
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
        }
        if (args[0].equalsIgnoreCase("-reindex")) {
            loader.reindex = true;
            loader.indexer = context.getBean(PartialIndex.class);
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
            logger.info("**** -reindex: " + loader.reindex);
            logger.debug("reindex url: " + loader.reindexUrl);
        }
        if (args[0].equalsIgnoreCase("-gList")) {
            loader.gList = true;
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
            logger.info("**** -gList: " + loader.gList);
        }
        if (args[0].equalsIgnoreCase("-biocache")) {
            Hashtable<String, String> hashTable = new Hashtable<String, String>();
            hashTable.put("accept", "application/json");
            ObjectMapper mapper = new ObjectMapper();
            mapper.getDeserializationConfig().set(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES,
                    false);
            RestfulClient restfulClient = new RestfulClient(0);
            String fq = "&fq=";
            if (args.length > 1) {
                java.util.Date date = new java.util.Date();
                if (args[1].equals("-lastWeek")) {
                    date = DateUtils.addWeeks(date, -1);
                } else if (args[1].equals("-lastMonth")) {
                    date = DateUtils.addMonths(date, -1);
                } else if (args[1].equals("-lastYear")) {
                    date = DateUtils.addYears(date, -1);
                } else
                    date = null;
                if (date != null) {
                    SimpleDateFormat sfd = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");

                    fq += "last_load_date:%5B" + sfd.format(date) + "%20TO%20*%5D";
                }
            }

            Object[] resp = restfulClient
                    .restGet("http://biocache.ala.org.au/ws/occurrences/search?q=multimedia:Image" + fq
                            + "&facets=data_resource_uid&pageSize=0", hashTable);
            logger.info("The URL: " + "http://biocache.ala.org.au/ws/occurrences/search?q=multimedia:Image" + fq
                    + "&facets=data_resource_uid&pageSize=0");
            if ((Integer) resp[0] == HttpStatus.SC_OK) {
                String content = resp[1].toString();
                logger.debug(resp[1]);
                if (content != null && content.length() > "[]".length()) {
                    Map map = mapper.readValue(content, Map.class);
                    try {
                        List<java.util.LinkedHashMap<String, String>> list = ((List<java.util.LinkedHashMap<String, String>>) ((java.util.LinkedHashMap) ((java.util.ArrayList) map
                                .get("facetResults")).get(0)).get("fieldResult"));
                        Set<String> arg = new LinkedHashSet<String>();
                        for (int i = 0; i < list.size(); i++) {
                            java.util.LinkedHashMap<String, String> value = list.get(i);
                            String dataResource = getDataResource(value.get("fq"));
                            Object provider = (loader.getUidInfoSourceMap().get(dataResource));
                            if (provider != null) {
                                arg.add(provider.toString());
                            }
                        }
                        logger.info("Set of biocache infosource ids to load: " + arg);
                        args = new String[] {};
                        args = arg.toArray(args);
                        //handle the situation where biocache-service reports no data resources
                        if (args.length < 1) {
                            logger.error("No biocache data resources found. Unable to load.");
                            System.exit(0);
                        }
                    } catch (Exception e) {
                        logger.error("ERROR: exit process....." + e);
                        e.printStackTrace();
                        System.exit(0);
                    }
                }
            } else {
                logger.warn("Unable to process url: ");
            }
        }
    }
    int filesRead = loader.load(filePath, args); //FIX ME - move to config
    long finish = System.currentTimeMillis();
    logger.info(filesRead + " files scanned/loaded in: " + ((finish - start) / 60000) + " minutes "
            + ((finish - start) / 1000) + " seconds.");
    System.exit(1);
}