Example usage for java.util Set add

List of usage examples for java.util Set add

Introduction

In this page you can find the example usage for java.util Set add.

Prototype

boolean add(E e);

Source Link

Document

Adds the specified element to this set if it is not already present (optional operation).

Usage

From source file:HSqlPrimerDesign.java

public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException,
        ClassNotFoundException, InstantiationException, SQLException, FileNotFoundException {
    Class.forName(JDBC_DRIVER_HSQL).newInstance();
    conn = DriverManager.getConnection(DB_SERVER_URL, USER, PASS);
    PrintWriter log = new PrintWriter(new File("javalog.log"));
    DpalLoad.main(args);/* www  .  j  a  v  a 2  s  .  c om*/
    Dpal_Inst = DpalLoad.INSTANCE_WIN64;
    //        int[][] arr =calcHairpin("GGGGGGCCCCCCCCCCCCGGGGGGG",4);
    //        if(arr.length<=1){
    //            System.out.println("No Hairpin's found");
    //        }else{
    //            System.out.println("Hairpin(s) found");
    //        }
    Statement stat = conn.createStatement();
    ResultSet call = stat.executeQuery(
            "Select * From " + "Primerdb.primers where Cluster ='A1' and UniqueP =True and Bp = 20");
    Set<CharSequence> primers = new HashSet<>();
    while (call.next()) {
        primers.add(call.getString("Sequence"));
    }
    //        primers.stream().forEach(x->{
    //            log.println(x);
    //            log.println(complementarity(x, x, Dpal_Inst));
    //            int[][] arr =calcHairpin((String)x,4);
    //            if(arr.length<=1){
    //                log.println("No Hairpin's found");
    //            }else{
    //                log.println("Hairpin(s) found");
    //            }
    //            log.println();
    //            log.flush();
    //        });
}

From source file:de.jetwick.snacktory.HtmlFetcher.java

public static void main(String[] args) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader("urls.txt"));
    String line = null;/*ww w  .j  a v  a 2 s .  c  o m*/
    Set<String> existing = new LinkedHashSet<String>();
    while ((line = reader.readLine()) != null) {
        int index1 = line.indexOf("\"");
        int index2 = line.indexOf("\"", index1 + 1);
        String url = line.substring(index1 + 1, index2);
        String domainStr = SHelper.extractDomain(url, true);
        String counterStr = "";
        // TODO more similarities
        if (existing.contains(domainStr))
            counterStr = "2";
        else
            existing.add(domainStr);

        String html = new HtmlFetcher().fetchAsString(url, 20000);
        String outFile = domainStr + counterStr + ".html";
        BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
        writer.write(html);
        writer.close();
    }
    reader.close();
}

From source file:Main.java

public static void main(String[] args) throws Exception {
    List<Car> carsList = new ArrayList<Car>();
    Set<Car> carsset = new HashSet<Car>();
    File fXmlFile = new File("cars.xml");
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(fXmlFile);
    doc.getDocumentElement().normalize();
    NodeList nList = doc.getElementsByTagName("cars");
    Car tempCar = null;//from  www  .java  2  s  .c  o m
    for (int temp = 0; temp < nList.getLength(); temp++) {
        Node nNode = nList.item(temp);
        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
            tempCar = new Car();
            Element eElement = (Element) nNode;
            tempCar.setModel(eElement.getElementsByTagName("model").item(0).getTextContent());
            tempCar.setVersion(eElement.getElementsByTagName("version").item(0).getTextContent());
            carsList.add(tempCar);
            carsset.add(tempCar);

        }
    }
    for (Car cs : carsset) {
        int count = 0;
        for (Car cl : carsList) {
            if (cs.equals(cl)) {
                count = count + 1;
            }
        }
        System.out.println(cs + "\t" + count);
    }
}

From source file:com.act.lcms.db.analysis.IonDetectionAnalysis.java

public static void main(String[] args) throws Exception {

    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/*from www . j  a  va 2 s  .  com*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s", e.getMessage());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        return;
    }

    File lcmsDir = new File(cl.getOptionValue(OPTION_LCMS_FILE_DIRECTORY));
    if (!lcmsDir.isDirectory()) {
        System.err.format("File at %s is not a directory", lcmsDir.getAbsolutePath());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    String plottingDirectory = cl.getOptionValue(OPTION_PLOTTING_DIR);

    // Get include and excluse ions from command line
    Set<String> includeIons;
    if (cl.hasOption(OPTION_INCLUDE_IONS)) {
        includeIons = new HashSet<>(Arrays.asList(cl.getOptionValues(OPTION_INCLUDE_IONS)));
        LOGGER.info("Including ions in search: %s", StringUtils.join(includeIons, ", "));
    } else {
        includeIons = new HashSet<>();
        includeIons.add(DEFAULT_ION);
    }

    try (DB db = DB.openDBFromCLI(cl)) {
        ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir);

        File inputPredictionCorpus = new File(cl.getOptionValue(OPTION_INPUT_PREDICTION_CORPUS));

        Map<Double, Set<Pair<String, String>>> massChargeToChemicalAndIon = constructMassChargeToChemicalIonsFromInputFile(
                inputPredictionCorpus, includeIons, cl.hasOption(OPTION_LIST_OF_INCHIS_INPUT_FILE));

        Pair<Set<Pair<String, Double>>, Map<String, Double>> values = constructFakeNameToMassChargeAndSetOfMassChargePairs(
                massChargeToChemicalAndIon.keySet());
        Set<Pair<String, Double>> searchMZs = values.getLeft();
        Map<String, Double> chemIDToMassCharge = values.getRight();

        LOGGER.info("The number of mass charges are: %d", searchMZs.size());

        Map<ScanData.KIND, List<LCMSWell>> wellTypeToLCMSWells = readInputExperimentalSetup(db,
                new File(cl.getOptionValue(OPTION_INPUT_POSITIVE_AND_NEGATIVE_CONTROL_WELLS_FILE)));

        // Get experimental setup ie. positive and negative wells from config file
        List<LCMSWell> positiveWells = wellTypeToLCMSWells.get(ScanData.KIND.POS_SAMPLE);
        List<LCMSWell> negativeWells = wellTypeToLCMSWells.get(ScanData.KIND.NEG_CONTROL);

        LOGGER.info("Number of positive wells is: %d", positiveWells.size());
        LOGGER.info("Number of negative wells is: %d", negativeWells.size());

        HashMap<Integer, Plate> plateCache = new HashMap<>();
        String outputPrefix = cl.getOptionValue(OPTION_OUTPUT_PREFIX);

        IonDetectionAnalysis<LCMSWell> ionDetectionAnalysis = new IonDetectionAnalysis<LCMSWell>(lcmsDir,
                positiveWells, negativeWells, plottingDirectory, plateCache, searchMZs, db);

        ionDetectionAnalysis.runLCMSMiningAnalysisAndPlotResults(chemIDToMassCharge, massChargeToChemicalAndIon,
                outputPrefix, cl.hasOption(OPTION_NON_REPLICATE_ANALYSIS));
    }
}

From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    String imageID;/*from  w  ww . j  a v a2s .  c o  m*/
    String outpath;
    String friendlyname;
    final Set<String> exts = new HashSet<String>();

    if ("-f".equals(otherArgs[0])) {
        if (otherArgs.length != 4) {
            die();
        }

        // load extensions from file
        final Path extpath = new Path(otherArgs[1]);

        InputStream in = null;
        try {
            in = extpath.getFileSystem(conf).open(extpath);

            Reader r = null;
            try {
                r = new InputStreamReader(in);

                BufferedReader br = null;
                try {
                    br = new BufferedReader(r);

                    String line;
                    while ((line = br.readLine()) != null) {
                        exts.add(line.trim().toLowerCase());
                    }

                    br.close();
                } finally {
                    IOUtils.closeQuietly(br);
                }

                r.close();
            } finally {
                IOUtils.closeQuietly(r);
            }

            in.close();
        } finally {
            IOUtils.closeQuietly(in);
        }

        imageID = otherArgs[2];
        friendlyname = otherArgs[3];
        outpath = otherArgs[4];
    } else {
        if (otherArgs.length < 3) {
            die();
        }

        // read extensions from trailing args
        imageID = otherArgs[0];
        friendlyname = otherArgs[1];
        outpath = otherArgs[2];

        // lowercase all file extensions
        for (int i = 2; i < otherArgs.length; ++i) {
            exts.add(otherArgs[i].toLowerCase());
        }
    }

    conf.setStrings("extensions", exts.toArray(new String[exts.size()]));

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf);
    job.setJarByClass(SequenceFileExport.class);
    job.setMapperClass(SequenceFileExportMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(MapWritable.class);

    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    FsEntryHBaseInputFormat.setupJob(job, imageID);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    FileOutputFormat.setOutputPath(job, new Path(outpath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:gr.iti.mklab.bubing.parser.ITIHTMLParser.java

public static void main(String arg[]) throws IllegalArgumentException, IOException, URISyntaxException,
        JSAPException, NoSuchAlgorithmException {

    final SimpleJSAP jsap = new SimpleJSAP(ITIHTMLParser.class.getName(),
            "Produce the digest of a page: the page is downloaded or passed as argument by specifying a file",
            new Parameter[] {
                    new UnflaggedOption("url", JSAP.STRING_PARSER, JSAP.REQUIRED, "The url of the page."),
                    new Switch("crossAuthorityDuplicates", 'c', "cross-authority-duplicates"),
                    new FlaggedOption("charBufferSize", JSAP.INTSIZE_PARSER, Integer.toString(CHAR_BUFFER_SIZE),
                            JSAP.NOT_REQUIRED, 'b', "buffer",
                            "The size of the parser character buffer (0 for dynamic sizing)."),
                    new FlaggedOption("file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'f',
                            "file", "The page to be processed."),
                    new FlaggedOption("digester", JSAP.STRING_PARSER, "MD5", JSAP.NOT_REQUIRED, 'd', "digester",
                            "The digester to be used.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        System.exit(1);/*from w  w w. j a  v  a 2 s .c o  m*/

    final String url = jsapResult.getString("url");
    final String digester = jsapResult.getString("digester");
    final boolean crossAuthorityDuplicates = jsapResult.userSpecified("crossAuthorityDuplicates");
    final int charBufferSize = jsapResult.getInt("charBufferSize");

    final ITIHTMLParser<Void> htmlParser = new ITIHTMLParser<Void>(BinaryParser.forName(digester),
            (TextProcessor<Void>) null, crossAuthorityDuplicates, charBufferSize);
    final SetLinkReceiver linkReceiver = new SetLinkReceiver();
    final byte[] digest;

    if (!jsapResult.userSpecified("file")) {
        final URI uri = new URI(url);
        final HttpGet request = new HttpGet(uri);
        request.setConfig(RequestConfig.custom().setRedirectsEnabled(false).build());
        digest = htmlParser.parse(uri, HttpClients.createDefault().execute(request), linkReceiver);
    } else {
        final String file = jsapResult.getString("file");
        String content = IOUtils.toString(new InputStreamReader(new FileInputStream(file)));
        digest = htmlParser.parse(BURL.parse(url), new StringHttpMessages.HttpResponse(content), linkReceiver);
    }

    System.out.println("DigestHexString: " + Hex.encodeHexString(digest));
    System.out.println("Links: " + linkReceiver.urls);

    Set<String> urlStrings = new ObjectOpenHashSet<String>();
    for (URI link : linkReceiver.urls)
        urlStrings.add(link.toString());
    if (urlStrings.size() != linkReceiver.urls.size())
        System.out.println(
                "There are " + linkReceiver.urls.size() + " URIs but " + urlStrings.size() + " strings");

}

From source file:edu.msu.cme.rdp.kmer.cli.FastKmerFilter.java

public static void main(String[] args) throws Exception {
    final KmerSet<Set<RefKmer>> kmerSet;
    final SeqReader queryReader;
    final SequenceType querySeqType;
    final File queryFile;
    final KmerStartsWriter out;
    final boolean translQuery;
    final int wordSize;
    final int translTable;
    final boolean alignedSeqs;
    final List<String> refLabels = new ArrayList();
    final int maxThreads;
    final int trieWordSize;

    try {/*  www  .jav a  2  s. co m*/
        CommandLine cmdLine = new PosixParser().parse(options, args);
        args = cmdLine.getArgs();

        if (args.length < 3) {
            throw new Exception("Unexpected number of arguments");
        }

        if (cmdLine.hasOption("out")) {
            out = new KmerStartsWriter(cmdLine.getOptionValue("out"));
        } else {
            out = new KmerStartsWriter(System.out);
        }

        if (cmdLine.hasOption("aligned")) {
            alignedSeqs = true;
        } else {
            alignedSeqs = false;
        }

        if (cmdLine.hasOption("transl-table")) {
            translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table"));
        } else {
            translTable = 11;
        }

        if (cmdLine.hasOption("threads")) {
            maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads"));
        } else {
            maxThreads = Runtime.getRuntime().availableProcessors();
        }

        queryFile = new File(args[1]);
        wordSize = Integer.valueOf(args[0]);
        SequenceType refSeqType = null;

        querySeqType = SeqUtils.guessSequenceType(queryFile);
        queryReader = new SequenceReader(queryFile);

        if (querySeqType == SequenceType.Protein) {
            throw new Exception("Expected nucl query sequences");
        }

        refSeqType = SeqUtils
                .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2]));

        translQuery = refSeqType == SequenceType.Protein;

        if (translQuery && wordSize % 3 != 0) {
            throw new Exception("Word size must be a multiple of 3 for nucl ref seqs");
        }

        if (translQuery) {
            trieWordSize = wordSize / 3;
        } else {
            trieWordSize = wordSize;
        }
        kmerSet = new KmerSet<Set<RefKmer>>();//new KmerTrie(trieWordSize, translQuery);

        for (int index = 2; index < args.length; index++) {
            String refName;
            String refFileName = args[index];
            if (refFileName.contains("=")) {
                String[] lexemes = refFileName.split("=");
                refName = lexemes[0];
                refFileName = lexemes[1];
            } else {
                String tmpName = new File(refFileName).getName();
                if (tmpName.contains(".")) {
                    refName = tmpName.substring(0, tmpName.lastIndexOf("."));
                } else {
                    refName = tmpName;
                }
            }

            File refFile = new File(refFileName);

            if (refSeqType != SeqUtils.guessSequenceType(refFile)) {
                throw new Exception(
                        "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile)
                                + " sequences but expected " + refSeqType + " sequences");
            }

            SequenceReader seqReader = new SequenceReader(refFile);
            Sequence seq;

            while ((seq = seqReader.readNextSequence()) != null) {
                if (seq.getSeqName().startsWith("#")) {
                    continue;
                }

                KmerGenerator kmers;
                try {
                    if (translQuery) { //protein ref
                        kmers = new ProtKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs);
                    } else {
                        kmers = new NuclKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs);
                    }
                    while (kmers.hasNext()) {
                        Kmer temp = kmers.next();
                        long[] next = temp.getLongKmers();
                        Set<RefKmer> refKmers = kmerSet.get(next);
                        if (refKmers == null) {
                            refKmers = new HashSet();
                            kmerSet.add(next, refKmers);
                        }

                        RefKmer kmerRef = new RefKmer();
                        kmerRef.modelPos = kmers.getPosition();
                        kmerRef.refFileIndex = refLabels.size();
                        kmerRef.refSeqid = seq.getSeqName();
                        refKmers.add(kmerRef);
                    }
                } catch (IllegalArgumentException ex) {
                    //System.err.println(seq.getSeqName()+ " " + ex.getMessage());
                }
            }
            seqReader.close();

            refLabels.add(refName);
        }

    } catch (Exception e) {
        new HelpFormatter().printHelp(
                "KmerSearch <kmerSize> <query_file> [name=]<ref_file> ...\nkmerSize should be multiple of 3, (recommend 45, minimum 30, maximum 63) ",
                options);
        e.printStackTrace();
        System.exit(1);
        throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables
    }

    long startTime = System.currentTimeMillis();
    long seqCount = 0;
    final int maxTasks = 25000;

    System.err.println("Starting kmer mapping at " + new Date());
    System.err.println("*  Number of threads:       " + maxThreads);
    System.err.println("*  References:              " + refLabels);
    System.err.println("*  Reads file:              " + queryFile);
    System.err.println("*  Kmer length:             " + trieWordSize);
    System.err.println("*  Kmer Refset Size:        " + kmerSet.size());

    final AtomicInteger processed = new AtomicInteger();
    final AtomicInteger outstandingTasks = new AtomicInteger();

    ExecutorService service = Executors.newFixedThreadPool(maxThreads);

    Sequence querySeq;

    while ((querySeq = queryReader.readNextSequence()) != null) {
        seqCount++;

        String seqString = querySeq.getSeqString();

        if ((!translQuery && seqString.length() < wordSize)
                || (translQuery && seqString.length() < wordSize + 2)) {
            //System.err.println(querySeq.getSeqName() + "\t" + seqString.length());
            continue;
        }

        final Sequence threadSeq = querySeq;

        Runnable r = new Runnable() {

            public void run() {
                try {
                    processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, false);
                    processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, true);

                    processed.incrementAndGet();
                    outstandingTasks.decrementAndGet();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        };

        outstandingTasks.incrementAndGet();
        service.submit(r);

        while (outstandingTasks.get() >= maxTasks)
            ;

        if ((processed.get() + 1) % 1000000 == 0) {
            System.err.println("Processed " + processed + " sequences in "
                    + (System.currentTimeMillis() - startTime) + " ms");
        }
    }

    service.shutdown();
    service.awaitTermination(1, TimeUnit.DAYS);

    System.err.println("Finished Processed " + processed + " sequences in "
            + (System.currentTimeMillis() - startTime) + " ms");

    out.close();
}

From source file:com.act.analysis.surfactant.AnalysisDriver.java

public static void main(String[] args) throws Exception {
    Options opts = new Options();
    for (Option.Builder b : OPTION_BUILDERS) {
        opts.addOption(b.build());/* ww  w  .j ava 2 s .c  o m*/
    }

    CommandLine cl = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cl = parser.parse(opts, args);
    } catch (ParseException e) {
        System.err.format("Argument parsing failed: %s\n", e.getMessage());
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        System.exit(1);
    }

    if (cl.hasOption("help")) {
        HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                true);
        return;
    }

    Set<String> seenOutputIds = new HashSet<>();

    TSVWriter<String, String> tsvWriter = null;
    if (cl.hasOption(OPTION_OUTPUT_FILE)) {
        File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_FILE));
        List<Map<String, String>> oldResults = null;
        if (outputFile.exists()) {
            System.err.format(
                    "Output file already exists, reading old results and skipping processed molecules.\n");
            TSVParser outputParser = new TSVParser();
            outputParser.parse(outputFile);
            oldResults = outputParser.getResults();
            for (Map<String, String> row : oldResults) {
                // TODO: verify that the last row was written cleanly/completely.
                seenOutputIds.add(row.get("id"));
            }
        }

        List<String> header = new ArrayList<>();
        header.add("name");
        header.add("id");
        header.add("inchi");
        header.add("label");
        for (SurfactantAnalysis.FEATURES f : SurfactantAnalysis.FEATURES.values()) {
            header.add(f.toString());
        }
        // TODO: make this API more auto-closable friendly.
        tsvWriter = new TSVWriter<>(header);
        tsvWriter.open(outputFile);
        if (oldResults != null) {
            System.out.format("Re-writing %d existing result rows\n", oldResults.size());
            tsvWriter.append(oldResults);
        }
    }

    try {
        Map<SurfactantAnalysis.FEATURES, Double> analysisFeatures;

        LicenseManager.setLicenseFile(cl.getOptionValue(OPTION_LICENSE_FILE));
        if (cl.hasOption(OPTION_INCHI)) {
            analysisFeatures = SurfactantAnalysis.performAnalysis(cl.getOptionValue(OPTION_INCHI),
                    cl.hasOption(OPTION_DISPLAY));
            Map<String, String> tsvFeatures = new HashMap<>();
            // Convert features to strings to avoid some weird formatting issues.  It's ugly, but it works.
            for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
            }
            tsvFeatures.put("name", "direct-inchi-input");
            if (tsvWriter != null) {
                tsvWriter.append(tsvFeatures);
            }
        } else if (cl.hasOption(OPTION_INPUT_FILE)) {
            TSVParser parser = new TSVParser();
            parser.parse(new File(cl.getOptionValue(OPTION_INPUT_FILE)));
            int i = 0;
            List<Map<String, String>> inputRows = parser.getResults();

            for (Map<String, String> row : inputRows) {
                i++; // Just for warning messages.
                if (!row.containsKey("name") || !row.containsKey("id") || !row.containsKey("inchi")) {
                    System.err.format(
                            "WARNING: TSV rows must contain at least name, id, and inchi, skipping row %d\n",
                            i);
                    continue;
                }
                if (seenOutputIds.contains(row.get("id"))) {
                    System.out.format("Skipping input row with id already in output: %s\n", row.get("id"));
                    continue;
                }

                System.out.format("Analysis for chemical %s\n", row.get("name"));
                try {
                    analysisFeatures = SurfactantAnalysis.performAnalysis(row.get("inchi"), false);
                } catch (Exception e) {
                    // Ignore exceptions for now.  Sometimes the regression analysis or Chemaxon processing chokes unexpectedly.
                    System.err.format("ERROR caught exception while processing '%s':\n", row.get("name"));
                    System.err.format("%s\n", e.getMessage());
                    e.printStackTrace(System.err);
                    System.err.println("Skipping...");
                    continue;
                }
                System.out.format("--- Done analysis for chemical %s\n", row.get("name"));

                // This is a duplicate of the OPTION_INCHI block code, but it's inside of a tight loop, so...
                Map<String, String> tsvFeatures = new HashMap<>();
                for (Map.Entry<SurfactantAnalysis.FEATURES, Double> entry : analysisFeatures.entrySet()) {
                    tsvFeatures.put(entry.getKey().toString(), String.format("%.6f", entry.getValue()));
                }
                tsvFeatures.put("name", row.get("name"));
                tsvFeatures.put("id", row.get("id"));
                tsvFeatures.put("inchi", row.get("inchi"));
                tsvFeatures.put("label", row.containsKey("label") ? row.get("label") : "?");
                if (tsvWriter != null) {
                    tsvWriter.append(tsvFeatures);
                    // Flush every time in case we crash or get interrupted.  The features must flow!
                    tsvWriter.flush();
                }
            }
        } else {
            throw new RuntimeException("Must specify inchi or input file");
        }
    } finally {
        if (tsvWriter != null) {
            tsvWriter.close();
        }
    }
}

From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java

public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException,
        InstantiationException, IllegalAccessException, ClassNotFoundException, Exception {

    String directory;/*  www . jav a 2  s .c  o  m*/
    String gold_standard_lexicon;
    String output_lexicon;
    String configFile;
    Language language;
    String output;

    Stopwords stopwords = new Stopwords();

    HashMap<String, Double> maxima;
    maxima = new HashMap<String, Double>();

    if (args.length < 3) {
        System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n");
        return;

    }

    //      Classifier classifier;

    directory = args[1];
    configFile = args[2];

    final Config config = new Config();

    config.loadFromFile(configFile);

    gold_standard_lexicon = config.getGoldStandardLexicon();

    String model_file = config.getModel();

    output_lexicon = config.getOutputLexicon();
    output = config.getOutput();

    language = config.getLanguage();

    LexiconLoader loader = new LexiconLoader();
    Lexicon gold = loader.loadFromFile(gold_standard_lexicon);

    Set<String> uris = new HashSet<>();
    //        Map<Integer,String> sentence_list = new HashMap<>();
    Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>();

    //consider only properties
    for (LexicalEntry entry : gold.getEntries()) {
        try {
            for (Sense sense : entry.getSenseBehaviours().keySet()) {
                String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", "");
                if (!Character.isUpperCase(tmp_uri.charAt(0))) {
                    uris.add(sense.getReference().getURI());
                }
            }
        } catch (Exception e) {
        }
        ;
    }

    ModelPreprocessor preprocessor = new ModelPreprocessor(language);
    preprocessor.setCoreferenceResolution(false);
    Set<String> dep = new HashSet<>();
    dep.add("prep");
    dep.add("appos");
    dep.add("nn");
    dep.add("dobj");
    dep.add("pobj");
    dep.add("num");
    preprocessor.setDEP(dep);

    List<File> list_files = new ArrayList<>();

    if (config.getFiles().isEmpty()) {
        File folder = new File(directory);
        File[] files = folder.listFiles();
        for (File file : files) {
            if (file.toString().contains(".ttl"))
                list_files.add(file);
        }
    } else {
        list_files.addAll(config.getFiles());
    }
    System.out.println(list_files.size());

    int sentence_counter = 0;
    Map<String, Set<Integer>> bag_words_uri = new HashMap<>();
    Map<String, Integer> mapping_word_id = new HashMap<>();
    for (File file : list_files) {
        Model model = RDFDataMgr.loadModel(file.toString());
        for (Model sentence : getSentences(model)) {
            String reference = getReference(sentence);
            reference = reference.replace("http://dbpedia/", "http://dbpedia.org/");
            if (uris.contains(reference)) {
                sentence_counter += 1;
                Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id);
                //TODO: add sentence preprocessing
                String obj = getObject(sentence);
                String subj = getSubject(sentence);
                preprocessor.preprocess(sentence, subj, obj, language);
                //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course)
                String parsed_sentence = getParsedSentence(sentence);
                try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        PrintWriter out = new PrintWriter(bw)) {
                    out.println(sentence_counter + "\t" + parsed_sentence);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                for (Integer word_id : words_ids) {
                    if (mapping_words_sentences.containsKey(word_id)) {
                        Set<Integer> tmp_set = mapping_words_sentences.get(word_id);
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);

                    } else {
                        Set<Integer> tmp_set = new HashSet<>();
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);
                    }

                }
                if (bag_words_uri.containsKey(reference)) {
                    Set<Integer> tmp = bag_words_uri.get(reference);
                    for (Integer w : words_ids) {
                        tmp.add(w);

                    }
                    bag_words_uri.put(reference, tmp);
                } else {
                    Set<Integer> tmp = new HashSet<>();
                    for (Integer w : words_ids) {
                        tmp.add(w);
                    }
                    bag_words_uri.put(reference, tmp);
                }
            }

        }
        model.close();

    }

    PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv");
    StringBuilder string_builder = new StringBuilder();
    for (String r : bag_words_uri.keySet()) {
        string_builder.append(r);
        for (Integer i : bag_words_uri.get(r)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

    writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv");
    string_builder = new StringBuilder();
    for (Integer w : mapping_words_sentences.keySet()) {
        string_builder.append(w);
        for (int i : mapping_words_sentences.get(w)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

}

From source file:org.ala.hbase.RepoDataLoader.java

/**
 * This takes a list of infosource ids...
 * <p/>//w ww .  java2  s  .  c o m
 * Usage: -stats or -reindex or -gList and list of infosourceId
 *
 * @param args
 */
public static void main(String[] args) throws Exception {
    //RepoDataLoader loader = new RepoDataLoader();
    ApplicationContext context = SpringUtils.getContext();
    RepoDataLoader loader = (RepoDataLoader) context.getBean(RepoDataLoader.class);
    long start = System.currentTimeMillis();
    loader.loadInfoSources();
    String filePath = repositoryDir;
    if (args.length > 0) {
        if (args[0].equalsIgnoreCase("-stats")) {
            loader.statsOnly = true;
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
        }
        if (args[0].equalsIgnoreCase("-reindex")) {
            loader.reindex = true;
            loader.indexer = context.getBean(PartialIndex.class);
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
            logger.info("**** -reindex: " + loader.reindex);
            logger.debug("reindex url: " + loader.reindexUrl);
        }
        if (args[0].equalsIgnoreCase("-gList")) {
            loader.gList = true;
            args = (String[]) ArrayUtils.subarray(args, 1, args.length);
            logger.info("**** -gList: " + loader.gList);
        }
        if (args[0].equalsIgnoreCase("-biocache")) {
            Hashtable<String, String> hashTable = new Hashtable<String, String>();
            hashTable.put("accept", "application/json");
            ObjectMapper mapper = new ObjectMapper();
            mapper.getDeserializationConfig().set(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES,
                    false);
            RestfulClient restfulClient = new RestfulClient(0);
            String fq = "&fq=";
            if (args.length > 1) {
                java.util.Date date = new java.util.Date();
                if (args[1].equals("-lastWeek")) {
                    date = DateUtils.addWeeks(date, -1);
                } else if (args[1].equals("-lastMonth")) {
                    date = DateUtils.addMonths(date, -1);
                } else if (args[1].equals("-lastYear")) {
                    date = DateUtils.addYears(date, -1);
                } else
                    date = null;
                if (date != null) {
                    SimpleDateFormat sfd = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");

                    fq += "last_load_date:%5B" + sfd.format(date) + "%20TO%20*%5D";
                }
            }

            Object[] resp = restfulClient
                    .restGet("http://biocache.ala.org.au/ws/occurrences/search?q=multimedia:Image" + fq
                            + "&facets=data_resource_uid&pageSize=0", hashTable);
            logger.info("The URL: " + "http://biocache.ala.org.au/ws/occurrences/search?q=multimedia:Image" + fq
                    + "&facets=data_resource_uid&pageSize=0");
            if ((Integer) resp[0] == HttpStatus.SC_OK) {
                String content = resp[1].toString();
                logger.debug(resp[1]);
                if (content != null && content.length() > "[]".length()) {
                    Map map = mapper.readValue(content, Map.class);
                    try {
                        List<java.util.LinkedHashMap<String, String>> list = ((List<java.util.LinkedHashMap<String, String>>) ((java.util.LinkedHashMap) ((java.util.ArrayList) map
                                .get("facetResults")).get(0)).get("fieldResult"));
                        Set<String> arg = new LinkedHashSet<String>();
                        for (int i = 0; i < list.size(); i++) {
                            java.util.LinkedHashMap<String, String> value = list.get(i);
                            String dataResource = getDataResource(value.get("fq"));
                            Object provider = (loader.getUidInfoSourceMap().get(dataResource));
                            if (provider != null) {
                                arg.add(provider.toString());
                            }
                        }
                        logger.info("Set of biocache infosource ids to load: " + arg);
                        args = new String[] {};
                        args = arg.toArray(args);
                        //handle the situation where biocache-service reports no data resources
                        if (args.length < 1) {
                            logger.error("No biocache data resources found. Unable to load.");
                            System.exit(0);
                        }
                    } catch (Exception e) {
                        logger.error("ERROR: exit process....." + e);
                        e.printStackTrace();
                        System.exit(0);
                    }
                }
            } else {
                logger.warn("Unable to process url: ");
            }
        }
    }
    int filesRead = loader.load(filePath, args); //FIX ME - move to config
    long finish = System.currentTimeMillis();
    logger.info(filesRead + " files scanned/loaded in: " + ((finish - start) / 60000) + " minutes "
            + ((finish - start) / 1000) + " seconds.");
    System.exit(1);
}