List of usage examples for java.lang String contains
public boolean contains(CharSequence s)
From source file:imp.lstm.main.Driver.java
public static void main(String[] args) throws FileNotFoundException, IOException, ConfigurationException, InvalidParametersException { FileBasedConfigurationBuilder<PropertiesConfiguration> builder = new FileBasedConfigurationBuilder<>( PropertiesConfiguration.class).configure( new Parameters().properties().setFileName(args[0]).setThrowExceptionOnMissing(true) .setListDelimiterHandler(new DefaultListDelimiterHandler(';')) .setIncludesAllowed(false)); Configuration config = builder.getConfiguration(); String inputSongPath = config.getString("input_song"); String outputFolderPath = config.getString("output_folder"); String autoEncoderParamsPath = config.getString("auto_encoder_params"); String nameGeneratorParamsPath = config.getString("name_generator_params"); String queueFolderPath = config.getString("queue_folder"); String referenceQueuePath = config.getString("reference_queue", "nil"); String inputCorpusFolder = config.getString("input_corpus_folder"); boolean shouldWriteQueue = config.getBoolean("should_write_generated_queue"); boolean frankensteinTest = config.getBoolean("queue_tests_frankenstein"); boolean interpolateTest = config.getBoolean("queue_tests_interpolation"); boolean iterateOverCorpus = config.getBoolean("iterate_over_corpus", false); boolean shouldGenerateSongTitle = config.getBoolean("generate_song_title"); boolean shouldGenerateSong = config.getBoolean("generate_leadsheet"); LogTimer.initStartTime(); //start our logging timer to keep track of our execution time LogTimer.log("Creating name generator..."); //here is just silly code for generating name based on an LSTM lol $wag LSTM lstm = new LSTM(); FullyConnectedLayer fullLayer = new FullyConnectedLayer(Operations.None); Loadable titleNetLoader = new Loadable() { @Override//from w w w . j av a 2s .c o m public boolean load(INDArray array, String path) { String car = pathCar(path); String cdr = pathCdr(path); switch (car) { case "full": return fullLayer.load(array, cdr); case "lstm": return lstm.load(array, cdr); default: return false; } } }; LogTimer.log("Packing name generator from files..."); (new NetworkConnectomeLoader()).load(nameGeneratorParamsPath, titleNetLoader); String characterString = " !\"'[],-.01245679:?ABCDEFGHIJKLMNOPQRSTUVWYZabcdefghijklmnopqrstuvwxyz"; //Initialization LogTimer.log("Creating autoencoder..."); int inputSize = 34; int outputSize = EncodingParameters.noteEncoder.getNoteLength(); int featureVectorSize = 100; ProductCompressingAutoencoder autoencoder = new ProductCompressingAutoencoder(24, 48, 84 + 1, false); //create our network int numInterpolationDivisions = 5; //"pack" the network from weights and biases file directory LogTimer.log("Packing autoencoder from files"); (new NetworkConnectomeLoader()).load(autoEncoderParamsPath, autoencoder); File[] songFiles; if (iterateOverCorpus) { songFiles = new File(inputCorpusFolder).listFiles(); } else { songFiles = new File[] { new File(inputSongPath) }; } for (File inputFile : songFiles) { (new NetworkConnectomeLoader()).refresh(autoEncoderParamsPath, autoencoder, "initialstate"); String songTitle; if (shouldGenerateSong) { Random rand = new Random(); AVector charOut = Vector.createLength(characterString.length()); GroupedSoftMaxSampler sampler = new GroupedSoftMaxSampler( new Group[] { new Group(0, characterString.length(), true) }); songTitle = ""; for (int i = 0; i < 50; i++) { charOut = fullLayer.forward(lstm.step(charOut)); charOut = sampler.filter(charOut); int charIndex = 0; for (; charIndex < charOut.length(); charIndex++) { if (charOut.get(charIndex) == 1.0) { break; } } songTitle += characterString.substring(charIndex, charIndex + 1); } songTitle = songTitle.trim(); LogTimer.log("Generated song name: " + songTitle); } else { songTitle = "The Song We Never Name"; } LogTimer.log("Reading file..."); LeadSheetDataSequence inputSequence = LeadSheetIO.readLeadSheet(inputFile); //read our leadsheet to get a data vessel as retrieved in rbm-provisor LeadSheetDataSequence outputSequence = inputSequence.copy(); outputSequence.clearMelody(); if (interpolateTest) { LeadSheetDataSequence additionalOutput = outputSequence.copy(); for (int i = 0; i < numInterpolationDivisions; i++) { outputSequence.concat(additionalOutput.copy()); } } LeadSheetDataSequence decoderInputSequence = outputSequence.copy(); LogTimer.startLog("Encoding data..."); //TradingTimer.initStart(); //start our trading timer to keep track our our generation versus realtime play while (inputSequence.hasNext()) { //iterate through time steps in input data //TradingTimer.waitForNextTimedInput(); autoencoder.encodeStep(inputSequence.retrieve()); //feed the resultant input vector into the network if (advanceDecoding) { //if we are using advance decoding (we start decoding as soon as we can) if (autoencoder.canDecode()) { //if queue has enough data to decode from outputSequence.pushStep(null, null, autoencoder.decodeStep(decoderInputSequence.retrieve())); //take sampled data for a timestep from autoencoder //TradingTimer.logTimestep(); //log our time to TradingTimer so we can know how far ahead of realtime we are } } } LogTimer.endLog(); if (shouldWriteQueue) { String queueFilePath = queueFolderPath + java.io.File.separator + inputFile.getName().replace(".ls", ".q"); FragmentedNeuralQueue currQueue = autoencoder.getQueue(); currQueue.writeToFile(queueFilePath); LogTimer.log("Wrote queue " + inputFile.getName().replace(".ls", ".q") + " to file..."); } if (shouldGenerateSong) { if (interpolateTest) { FragmentedNeuralQueue refQueue = new FragmentedNeuralQueue(); refQueue.initFromFile(referenceQueuePath); FragmentedNeuralQueue currQueue = autoencoder.getQueue(); //currQueue.writeToFile(queueFilePath); autoencoder.setQueue(currQueue.copy()); while (autoencoder.hasDataStepsLeft()) { //we are done encoding all time steps, so just finish decoding!{ outputSequence.pushStep(null, null, autoencoder.decodeStep(decoderInputSequence.retrieve())); //take sampled data for a timestep from autoencoder //TradingTimer.logTimestep(); //log our time to TradingTimer so we can know how far ahead of realtime we are } for (int i = 1; i <= numInterpolationDivisions; i++) { System.out.println("Starting interpolation " + ((1.0 / numInterpolationDivisions) * (i))); (new NetworkConnectomeLoader()).refresh(autoEncoderParamsPath, autoencoder, "initialstate"); FragmentedNeuralQueue currCopy = currQueue.copy(); currCopy.basicInterpolate(refQueue, (1.0 / numInterpolationDivisions) * (i)); autoencoder.setQueue(currCopy); int timeStep = 0; while (autoencoder.hasDataStepsLeft()) { //we are done encoding all time steps, so just finish decoding!{ System.out.println("interpolation " + i + " step " + ++timeStep); outputSequence.pushStep(null, null, autoencoder.decodeStep(decoderInputSequence.retrieve())); //take sampled data for a timestep from autoencoder //TradingTimer.logTimestep(); //log our time to TradingTimer so we can know how far ahead of realtime we are } } } if (frankensteinTest) { LogTimer.startLog("Loading queues"); File queueFolder = new File(queueFolderPath); int numComponents = config.getInt("frankenstein_num_components", 5); int numCombinations = config.getInt("frankenstein_num_combinations", 6); double interpolationMagnitude = config.getDouble("frankenstein_magnitude", 2.0); if (queueFolder.isDirectory()) { File[] queueFiles = queueFolder.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains(".q"); } }); List<File> fileList = new ArrayList<>(); for (File file : queueFiles) { fileList.add(file); } Collections.shuffle(fileList); int numSelectedFiles = (numComponents > queueFiles.length) ? queueFiles.length : numComponents; for (int i = 0; i < queueFiles.length - numSelectedFiles; i++) { fileList.remove(fileList.size() - 1); } List<FragmentedNeuralQueue> queuePopulation = new ArrayList<>(fileList.size()); songTitle += " - a mix of "; for (File file : fileList) { FragmentedNeuralQueue newQueue = new FragmentedNeuralQueue(); newQueue.initFromFile(file.getPath()); queuePopulation.add(newQueue); songTitle += file.getName().replaceAll(".ls", "") + ", "; } LogTimer.endLog(); LeadSheetDataSequence additionalOutput = outputSequence.copy(); for (int i = 1; i < numCombinations; i++) { outputSequence.concat(additionalOutput.copy()); } decoderInputSequence = outputSequence.copy(); FragmentedNeuralQueue origQueue = autoencoder.getQueue(); for (int i = 0; i < numCombinations; i++) { LogTimer.startLog("Performing queue interpolation..."); AVector combinationStrengths = Vector.createLength(queuePopulation.size()); Random vectorRand = new Random(i); for (int j = 0; j < combinationStrengths.length(); j++) { combinationStrengths.set(j, vectorRand.nextDouble()); } combinationStrengths.divide(combinationStrengths.elementSum()); FragmentedNeuralQueue currQueue = origQueue.copy(); for (int k = 0; k < combinationStrengths.length(); k++) { currQueue.basicInterpolate(queuePopulation.get(k), combinationStrengths.get(k) * interpolationMagnitude); } LogTimer.endLog(); autoencoder.setQueue(currQueue); LogTimer.startLog("Refreshing autoencoder state..."); (new NetworkConnectomeLoader()).refresh(autoEncoderParamsPath, autoencoder, "initialstate"); LogTimer.endLog(); LogTimer.startLog("Decoding segment..."); while (autoencoder.hasDataStepsLeft()) { //we are done encoding all time steps, so just finish decoding!{ outputSequence.pushStep(null, null, autoencoder.decodeStep(decoderInputSequence.retrieve())); //take sampled data for a timestep from autoencoder //TradingTimer.logTimestep(); //log our time to TradingTimer so we can know how far ahead of realtime we are } LogTimer.endLog(); } } } while (autoencoder.hasDataStepsLeft()) { //we are done encoding all time steps, so just finish decoding!{ outputSequence.pushStep(null, null, autoencoder.decodeStep(decoderInputSequence.retrieve())); //take sampled data for a timestep from autoencoder //TradingTimer.logTimestep(); //log our time to TradingTimer so we can know how far ahead of realtime we are } LogTimer.log("Writing file..."); String outputFilename = outputFolderPath + java.io.File.separator + inputFile.getName().replace(".ls", "_Output"); //we'll write our generated file with the same name plus "_Output" LeadSheetIO.writeLeadSheet(outputSequence, outputFilename, songTitle); System.out.println(outputFilename); } else { autoencoder.setQueue(new FragmentedNeuralQueue()); } } LogTimer.log("Process finished"); //Done! }
From source file:org.eclipse.swt.snippets.SnippetLauncher.java
public static void main(String[] args) { File sourceDir = SnippetsConfig.SNIPPETS_SOURCE_DIR; boolean hasSource = sourceDir.exists(); int count = 500; if (hasSource) { File[] files = sourceDir.listFiles(); if (files.length > 0) count = files.length;/* w ww.j a v a 2 s. co m*/ } for (int i = 1; i < count; i++) { if (SnippetsConfig.isPrintingSnippet(i)) continue; // avoid printing to printer String className = "Snippet" + i; Class<?> clazz = null; try { clazz = Class.forName(SnippetsConfig.SNIPPETS_PACKAGE + "." + className); } catch (ClassNotFoundException e) { } if (clazz != null) { System.out.println("\n" + clazz.getName()); if (hasSource) { File sourceFile = new File(sourceDir, className + ".java"); try (FileReader reader = new FileReader(sourceFile);) { char[] buffer = new char[(int) sourceFile.length()]; reader.read(buffer); String source = String.valueOf(buffer); int start = source.indexOf("package"); start = source.indexOf("/*", start); int end = source.indexOf("* For a list of all"); System.out.println(source.substring(start, end - 3)); boolean skip = false; String platform = SWT.getPlatform(); if (source.contains("PocketPC")) { platform = "PocketPC"; skip = true; } else if (source.contains("OpenGL")) { platform = "OpenGL"; skip = true; } else if (source.contains("JavaXPCOM")) { platform = "JavaXPCOM"; skip = true; } else { String[] platforms = { "win32", "gtk" }; for (int p = 0; p < platforms.length; p++) { if (!platforms[p].equals(platform) && source.contains("." + platforms[p])) { platform = platforms[p]; skip = true; break; } } } if (skip) { System.out.println("...skipping " + platform + " example..."); continue; } } catch (Exception e) { } } Method method = null; String[] param = SnippetsConfig.getSnippetArguments(i); try { method = clazz.getMethod("main", param.getClass()); } catch (NoSuchMethodException e) { System.out.println(" Did not find main(String [])"); } if (method != null) { try { method.invoke(clazz, new Object[] { param }); } catch (IllegalAccessException e) { System.out.println(" Failed to launch (illegal access)"); } catch (IllegalArgumentException e) { System.out.println(" Failed to launch (illegal argument to main)"); } catch (InvocationTargetException e) { System.out.println(" Exception in Snippet: " + e.getTargetException()); } } } } }
From source file:edu.msu.cme.rdp.kmer.cli.FastKmerFilter.java
public static void main(String[] args) throws Exception { final KmerSet<Set<RefKmer>> kmerSet; final SeqReader queryReader; final SequenceType querySeqType; final File queryFile; final KmerStartsWriter out; final boolean translQuery; final int wordSize; final int translTable; final boolean alignedSeqs; final List<String> refLabels = new ArrayList(); final int maxThreads; final int trieWordSize; try {/*from w w w . j a va2s. com*/ CommandLine cmdLine = new PosixParser().parse(options, args); args = cmdLine.getArgs(); if (args.length < 3) { throw new Exception("Unexpected number of arguments"); } if (cmdLine.hasOption("out")) { out = new KmerStartsWriter(cmdLine.getOptionValue("out")); } else { out = new KmerStartsWriter(System.out); } if (cmdLine.hasOption("aligned")) { alignedSeqs = true; } else { alignedSeqs = false; } if (cmdLine.hasOption("transl-table")) { translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table")); } else { translTable = 11; } if (cmdLine.hasOption("threads")) { maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads")); } else { maxThreads = Runtime.getRuntime().availableProcessors(); } queryFile = new File(args[1]); wordSize = Integer.valueOf(args[0]); SequenceType refSeqType = null; querySeqType = SeqUtils.guessSequenceType(queryFile); queryReader = new SequenceReader(queryFile); if (querySeqType == SequenceType.Protein) { throw new Exception("Expected nucl query sequences"); } refSeqType = SeqUtils .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2])); translQuery = refSeqType == SequenceType.Protein; if (translQuery && wordSize % 3 != 0) { throw new Exception("Word size must be a multiple of 3 for nucl ref seqs"); } if (translQuery) { trieWordSize = wordSize / 3; } else { trieWordSize = wordSize; } kmerSet = new KmerSet<Set<RefKmer>>();//new KmerTrie(trieWordSize, translQuery); for (int index = 2; index < args.length; index++) { String refName; String refFileName = args[index]; if (refFileName.contains("=")) { String[] lexemes = refFileName.split("="); refName = lexemes[0]; refFileName = lexemes[1]; } else { String tmpName = new File(refFileName).getName(); if (tmpName.contains(".")) { refName = tmpName.substring(0, tmpName.lastIndexOf(".")); } else { refName = tmpName; } } File refFile = new File(refFileName); if (refSeqType != SeqUtils.guessSequenceType(refFile)) { throw new Exception( "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile) + " sequences but expected " + refSeqType + " sequences"); } SequenceReader seqReader = new SequenceReader(refFile); Sequence seq; while ((seq = seqReader.readNextSequence()) != null) { if (seq.getSeqName().startsWith("#")) { continue; } KmerGenerator kmers; try { if (translQuery) { //protein ref kmers = new ProtKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs); } else { kmers = new NuclKmerGenerator(seq.getSeqString(), trieWordSize, alignedSeqs); } while (kmers.hasNext()) { Kmer temp = kmers.next(); long[] next = temp.getLongKmers(); Set<RefKmer> refKmers = kmerSet.get(next); if (refKmers == null) { refKmers = new HashSet(); kmerSet.add(next, refKmers); } RefKmer kmerRef = new RefKmer(); kmerRef.modelPos = kmers.getPosition(); kmerRef.refFileIndex = refLabels.size(); kmerRef.refSeqid = seq.getSeqName(); refKmers.add(kmerRef); } } catch (IllegalArgumentException ex) { //System.err.println(seq.getSeqName()+ " " + ex.getMessage()); } } seqReader.close(); refLabels.add(refName); } } catch (Exception e) { new HelpFormatter().printHelp( "KmerSearch <kmerSize> <query_file> [name=]<ref_file> ...\nkmerSize should be multiple of 3, (recommend 45, minimum 30, maximum 63) ", options); e.printStackTrace(); System.exit(1); throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables } long startTime = System.currentTimeMillis(); long seqCount = 0; final int maxTasks = 25000; System.err.println("Starting kmer mapping at " + new Date()); System.err.println("* Number of threads: " + maxThreads); System.err.println("* References: " + refLabels); System.err.println("* Reads file: " + queryFile); System.err.println("* Kmer length: " + trieWordSize); System.err.println("* Kmer Refset Size: " + kmerSet.size()); final AtomicInteger processed = new AtomicInteger(); final AtomicInteger outstandingTasks = new AtomicInteger(); ExecutorService service = Executors.newFixedThreadPool(maxThreads); Sequence querySeq; while ((querySeq = queryReader.readNextSequence()) != null) { seqCount++; String seqString = querySeq.getSeqString(); if ((!translQuery && seqString.length() < wordSize) || (translQuery && seqString.length() < wordSize + 2)) { //System.err.println(querySeq.getSeqName() + "\t" + seqString.length()); continue; } final Sequence threadSeq = querySeq; Runnable r = new Runnable() { public void run() { try { processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, false); processSeq(threadSeq, refLabels, kmerSet, out, wordSize, translQuery, translTable, true); processed.incrementAndGet(); outstandingTasks.decrementAndGet(); } catch (Exception e) { e.printStackTrace(); } } }; outstandingTasks.incrementAndGet(); service.submit(r); while (outstandingTasks.get() >= maxTasks) ; if ((processed.get() + 1) % 1000000 == 0) { System.err.println("Processed " + processed + " sequences in " + (System.currentTimeMillis() - startTime) + " ms"); } } service.shutdown(); service.awaitTermination(1, TimeUnit.DAYS); System.err.println("Finished Processed " + processed + " sequences in " + (System.currentTimeMillis() - startTime) + " ms"); out.close(); }
From source file:gov.nih.nci.ncicb.tcga.dcc.dam.util.TempClinicalDataLoader.java
public static void main(String[] args) { // first get the db connection properties String url = urlSet.get(args[1]); String user = args[2];/* w ww. ja va2 s. com*/ String word = args[3]; // make sure we have the Oracle driver somewhere try { Class.forName("oracle.jdbc.OracleDriver"); Class.forName("org.postgresql.Driver"); } catch (Exception x) { System.out.println("Unable to load the driver class!"); System.exit(0); } // connect to the database try { dbConnection = DriverManager.getConnection(url, user, word); ClinicalBean.setDBConnection(dbConnection); } catch (SQLException x) { x.printStackTrace(); System.exit(1); } final String xmlList = args[0]; BufferedReader br = null; try { final Map<String, String> clinicalFiles = new HashMap<String, String>(); final Map<String, String> biospecimenFiles = new HashMap<String, String>(); final Map<String, String> fullFiles = new HashMap<String, String>(); //noinspection IOResourceOpenedButNotSafelyClosed br = new BufferedReader(new FileReader(xmlList)); // read the file list to get all the files to load while (br.ready()) { final String[] in = br.readLine().split("\\t"); String xmlfile = in[0]; String archive = in[1]; if (xmlfile.contains("_clinical")) { clinicalFiles.put(xmlfile, archive); } else if (xmlfile.contains("_biospecimen")) { biospecimenFiles.put(xmlfile, archive); } else { fullFiles.put(xmlfile, archive); } } Date dateAdded = Calendar.getInstance().getTime(); // NOTE!!! This deletes all data before the load starts, assuming we are re-loading everything. // a better way would be to figure out what has changed and load that, or to be able to load multiple versions of the data in the schema emptyClinicalTables(user); // load any "full" files first -- in case some archives aren't split yet for (final String file : fullFiles.keySet()) { String archive = fullFiles.get(file); System.out.println("Full file " + file + " in " + archive); // need to re-instantiate the disease-specific beans for each file createDiseaseSpecificBeans(xmlList); String disease = getDiseaseName(archive); processFullXmlFile(file, archive, disease, dateAdded); // memory leak or something... have to commit and close all connections and re-get connection // after each file to keep from using too much heap space. this troubles me, but I have never had // the time to figure out why it happens resetConnections(url, user, word); } // now process all clinical files, and insert patients and clinical data for (final String clinicalFile : clinicalFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = clinicalFiles.get(clinicalFile); System.out.println("Clinical file " + clinicalFile + " in " + archive); String disease = getDiseaseName(archive); processClinicalXmlFile(clinicalFile, archive, disease, dateAdded); resetConnections(url, user, word); } // now process biospecimen files for (final String biospecimenFile : biospecimenFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = biospecimenFiles.get(biospecimenFile); String disease = getDiseaseName(archive); System.out.println("Biospecimen file " + biospecimenFile); processBiospecimenXmlFile(biospecimenFile, archive, disease, dateAdded); resetConnections(url, user, word); } // this sets relationships between these clinical tables and data browser tables, since we delete // and reload every time setForeignKeys(); dbConnection.commit(); dbConnection.close(); } catch (Exception e) { e.printStackTrace(); System.exit(-1); } finally { IOUtils.closeQuietly(br); } }
From source file:cc.wikitools.lucene.IndexWikipediaDump.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));//from w w w.jav a 2s . com options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexWikipediaDump.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build(); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); try { WikipediaBz2DumpInputStream stream = new WikipediaBz2DumpInputStream(path); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String page; while ((page = stream.readNext()) != null) { String title = cleaner.getTitle(page); // These are heuristic specifically for filtering out non-articles in enwiki-20120104. if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) { continue; } if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) { continue; } Runnable worker = new AddDocumentRunnable(writer, cleaner, page); executor.execute(worker); cnt++; if (cnt % 10000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); } }
From source file:edu.toronto.cs.xcurator.cli.CLIRunner.java
public static void main(String[] args) { Options options = setupOptions();/* ww w. j a v a 2 s .c om*/ CommandLineParser parser = new BasicParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption('t')) { fileType = line.getOptionValue('t'); } else { fileType = XML; } if (line.hasOption('o')) { tdbDirectory = line.getOptionValue('o'); File d = new File(tdbDirectory); if (!d.exists() || !d.isDirectory()) { throw new Exception("TDB directory does not exist, please create."); } } if (line.hasOption('h')) { domain = line.getOptionValue('h'); try { URL url = new URL(domain); } catch (MalformedURLException ex) { throw new Exception("The domain name is ill-formed"); } } else { printHelpAndExit(options); } if (line.hasOption('m')) { serializeMapping = true; mappingFilename = line.getOptionValue('m'); } if (line.hasOption('d')) { dirLocation = line.getOptionValue('d'); inputStreams = new ArrayList<>(); final List<String> files = Util.getFiles(dirLocation); for (String inputfile : files) { File f = new File(inputfile); if (f.isFile() && f.exists()) { System.out.println("Adding document to mapping discoverer: " + inputfile); inputStreams.add(new FileInputStream(f)); } // If it is a URL download link for the document from SEC else if (inputfile.startsWith("http") && inputfile.contains("://")) { // Download System.out.println("Adding remote document to mapping discoverer: " + inputfile); try { URL url = new URL(inputfile); InputStream remoteDocumentStream = url.openStream(); inputStreams.add(remoteDocumentStream); } catch (MalformedURLException ex) { throw new Exception("The document URL is ill-formed: " + inputfile); } catch (IOException ex) { throw new Exception("Error in downloading remote document: " + inputfile); } } else { throw new Exception("Cannot open XBRL document: " + f.getName()); } } } if (line.hasOption('f')) { fileLocation = line.getOptionValue('f'); inputStreams = new ArrayList<>(); File f = new File(fileLocation); if (f.isFile() && f.exists()) { System.out.println("Adding document to mapping discoverer: " + fileLocation); inputStreams.add(new FileInputStream(f)); } // If it is a URL download link for the document from SEC else if (fileLocation.startsWith("http") && fileLocation.contains("://")) { // Download System.out.println("Adding remote document to mapping discoverer: " + fileLocation); try { URL url = new URL(fileLocation); InputStream remoteDocumentStream = url.openStream(); inputStreams.add(remoteDocumentStream); } catch (MalformedURLException ex) { throw new Exception("The document URL is ill-formed: " + fileLocation); } catch (IOException ex) { throw new Exception("Error in downloading remote document: " + fileLocation); } } else { throw new Exception("Cannot open XBRL document: " + f.getName()); } } setupDocumentBuilder(); RdfFactory rdfFactory = new RdfFactory(new RunConfig(domain)); List<Document> documents = new ArrayList<>(); for (InputStream inputStream : inputStreams) { Document dataDocument = null; if (fileType.equals(JSON)) { String json = IOUtils.toString(inputStream); final String xml = Util.json2xml(json); final InputStream xmlInputStream = IOUtils.toInputStream(xml); dataDocument = createDocument(xmlInputStream); } else { dataDocument = createDocument(inputStream); } documents.add(dataDocument); } if (serializeMapping) { System.out.println("Mapping file will be saved to: " + new File(mappingFilename).getAbsolutePath()); rdfFactory.createRdfs(documents, tdbDirectory, mappingFilename); } else { rdfFactory.createRdfs(documents, tdbDirectory); } } catch (Exception ex) { ex.printStackTrace(); System.err.println("Unexpected exception: " + ex.getMessage()); System.exit(1); } }
From source file:main.DOORS_Service.java
/** * Login to the DWA server and perform some OSLC actions * @param args/*ww w . j a va2s.co m*/ * @throws ParseException */ public static void main(String[] args) throws ParseException { Options options = new Options(); options.addOption("url", true, "url"); options.addOption("user", true, "user ID"); options.addOption("password", true, "password"); options.addOption("project", true, "project area"); CommandLineParser cliParser = new GnuParser(); //Parse the command line CommandLine cmd = cliParser.parse(options, args); if (!validateOptions(cmd)) { logger.severe( "Syntax: java <class_name> -url https://<server>:port/<context>/ -user <user> -password <password> -project \"<project_area>\""); logger.severe( "Example: java DoorsOauthSample -url https://exmple.com:9443/dwa -user ADMIN -password ADMIN -project \"JKE Banking (Requirements Management)\""); return; } String webContextUrl = cmd.getOptionValue("url"); String user = cmd.getOptionValue("user"); String passwd = cmd.getOptionValue("password"); String projectArea = cmd.getOptionValue("project"); try { //STEP 1: Initialize a Jazz rootservices helper and indicate we're looking for the RequirementManagement catalog // The root services for DOORs is found at /public level JazzRootServicesHelper helper = new JazzRootServicesHelper(webContextUrl + "/public", OSLCConstants.OSLC_RM); //STEP 2: Create a new OSLC OAuth capable client OslcOAuthClient client = helper.initOAuthClient("JIRA", "JIRA"); if (client != null) { //STEP 3: Try to access the context URL to trigger the OAuth dance and login try { client.getResource(webContextUrl, OSLCConstants.CT_RDF); } catch (OAuthRedirectException oauthE) { validateTokens(client, oauthE.getRedirectURL() + "?oauth_token=" + oauthE.getAccessor().requestToken, user, passwd, webContextUrl + "/j_acegi_security_check"); // Try to access again ClientResponse response = client.getResource(webContextUrl, OSLCConstants.CT_RDF); response.getEntity(InputStream.class).close(); } //STEP 4: Get our requirements collection that we want //TODO: Replace with option from startup String serviceProviderUrl = "http://usnx47:8080/dwa/rm/urn:rational::1-4d2b67b464226e12-M-0000048a"; ClientResponse response = client.getResource(serviceProviderUrl, "application/x-oslc-rm-requirement-collection-1.0+xml"); //build the rdf Model rdfModel = ModelFactory.createDefaultModel(); rdfModel.read(response.getEntity(InputStream.class), serviceProviderUrl); response.consumeContent(); //get the statements List<Statement> reqs = rdfModel.getResource(serviceProviderUrl).listProperties().toList(); HashMap<String, String> requirements = new HashMap<String, String>(); for (Statement s : reqs) { String reqURI = s.getObject().toString(); if (reqURI.contains("http")) { response = client.getResource(reqURI, "application/x-oslc-rm-requirement-1.0+xml"); if (response.getStatusCode() == 200) { InputStream in = response.getEntity(InputStream.class); Model model = ModelFactory.createDefaultModel(); try { model.read(in, reqURI); } catch (Exception sa) { System.out.println(reqURI); } //Properties to traverse on Property attrDef = model .createProperty("http://jazz.net/doors/xmlns/prod/jazz/doors/1.0/attrDef"); Property name = model .createProperty("http://jazz.net/doors/xmlns/prod/jazz/doors/1.0/name"); //Flags we use for parsing int count = 0; boolean isText = false; boolean isID = false; boolean done = false; //Text of the DOORS Object and its ID are what we are going to extract String text = ""; String id = ""; //Look through all of the possible fields StmtIterator statementIter = model.listStatements(); while (statementIter.hasNext() && done != true) { Statement field = statementIter.next(); //Get the attrDef property to find out what kind of value we have StmtIterator props = field.getSubject().listProperties(attrDef); while (props.hasNext() && done != true) { Statement kind = props.next(); RDFNode propertyNode = kind.getObject(); StmtIterator propIt = propertyNode.asResource().listProperties(name); //Check all of the properties for our desired fields while (propIt.hasNext()) { Statement node = propIt.next(); if (node.getObject().isLiteral()) { if (node.getObject().toString().contains("Object+Text") && field.getObject().isLiteral()) { text = field.getLiteral().toString(); text = text.substring(0, text.indexOf("^")); count++; } if (node.getObject().toString().contains("Absolute+Number") && field.getObject().isLiteral()) { id = field.getLiteral().toString(); id = id.substring(0, id.indexOf("^")); count++; } } } if (count == 2) { if (!text.isEmpty()) { //System.out.println( "Req: " + id ); //System.out.println( text ); requirements.put(id, text); count = 0; done = true; break; } } } } } } response.consumeContent(); } //check if already in JIRA //post to jira for (Entry<String, String> e : requirements.entrySet()) { } } } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage(), e); } }
From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java
/** * @param args/*from w w w . j a v a 2 s .c o m*/ * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option nameOption = new Option("dn", "name", true, "the name of the dataset"); nameOption.setRequired(true); nameOption.setArgName("DATASET NAME"); options.addOption(nameOption); Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset"); headerOption.setRequired(true); headerOption.setArgName("DATASET HEADER FILE"); options.addOption(headerOption); Option deafultsOption = new Option("dd", "defaults", true, "the file that contains the default values of the dataset"); deafultsOption.setRequired(true); deafultsOption.setArgName("DATASET DEFAULTS FILE"); options.addOption(deafultsOption); Option tempResOption = new Option("t", "temporal", true, "desired temporal resolution (hour, day, week, or month)"); tempResOption.setRequired(true); tempResOption.setArgName("TEMPORAL RESOLUTION"); options.addOption(tempResOption); Option spatialResOption = new Option("s", "spatial", true, "desired spatial resolution (points, nbhd, zip, grid, or city)"); spatialResOption.setRequired(true); spatialResOption.setArgName("SPATIAL RESOLUTION"); options.addOption(spatialResOption); Option currentSpatialResOption = new Option("cs", "current-spatial", true, "current spatial resolution (points, nbhd, zip, grid, or city)"); currentSpatialResOption.setRequired(true); currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION"); options.addOption(currentSpatialResOption); Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes"); indexResOption.setRequired(true); indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS"); indexResOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(indexResOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String dataset = cmd.getOptionValue("dn"); String header = cmd.getOptionValue("dh"); String defaults = cmd.getOptionValue("dd"); String temporalResolution = cmd.getOptionValue("t"); String spatialResolution = cmd.getOptionValue("s"); String gridResolution = ""; String currentSpatialResolution = cmd.getOptionValue("cs"); if (spatialResolution.contains("grid")) { String[] res = spatialResolution.split("-"); spatialResolution = res[0]; gridResolution = res[1]; } conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header); conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults); conf.set("temporal-resolution", temporalResolution); conf.set("spatial-resolution", spatialResolution); conf.set("grid-resolution", gridResolution); conf.set("current-spatial-resolution", currentSpatialResolution); String[] indexes = cmd.getOptionValues("i"); String temporalPos = ""; Integer sizeSpatioTemp = 0; if (!(currentSpatialResolution.equals("points"))) { String spatialPos = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; spatialPos += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("spatial-pos", spatialPos); } else { String xPositions = "", yPositions = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; xPositions += indexes[++i] + ","; yPositions += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("xPositions", xPositions); conf.set("yPositions", yPositions); } conf.set("temporal-pos", temporalPos); conf.set("size-spatio-temporal", sizeSpatioTemp.toString()); // checking resolutions if (utils.spatialResolution(spatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + spatialResolution); System.exit(-1); } if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) { System.out.println("The data needs to be reduced at least to neighborhoods or grid."); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + currentSpatialResolution); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) { System.out.println("The current spatial resolution is coarser than " + "the desired one. You can only navigate from a fine resolution" + " to a coarser one."); System.exit(-1); } if (utils.temporalResolution(temporalResolution) < 0) { System.out.println("Invalid temporal resolution: " + temporalResolution); System.exit(-1); } String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution + "-" + spatialResolution + gridResolution; conf.set("aggregates", fileName + ".aggregates"); // making sure both files are removed, if they exist FrameworkUtils.removeFile(fileName, s3conf, s3); FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3); /** * Hadoop Parameters * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation * https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/ */ conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); // using SnappyCodec for intermediate and output data ? // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization? // LZO - http://www.oberhumer.com/opensource/lzo/#download // Hadoop-LZO - https://github.com/twitter/hadoop-lzo // Protocol Buffer - https://github.com/twitter/elephant-bird // General Info - http://www.devx.com/Java/Article/47913 // Compression - http://comphadoop.weebly.com/index.html if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } // TODO: this is dangerous! if (s3) { conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } Job job = new Job(conf); job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution); job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setMapOutputValueClass(AggregationArrayWritable.class); job.setOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setOutputValueClass(AggregationArrayWritable.class); job.setMapperClass(PreProcessingMapper.class); job.setCombinerClass(PreProcessingCombiner.class); job.setReducerClass(PreProcessingReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); //job.setNumReduceTasks(1); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset)); FileOutputFormat.setOutputPath(job, new Path(fileName)); job.setJarByClass(PreProcessing.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(fileName + "\t" + (System.currentTimeMillis() - start)); }
From source file:com.thesmartweb.vivliocrawlermaven.VivlioCrawlerMavenMain.java
/** * @param args the command line arguments *//*w w w . j ava 2 s . c o m*/ public static void main(String[] args) { // TODO code application logic here try { OaiPmhServer server = new OaiPmhServer("http://vivliothmmy.ee.auth.gr/cgi/oai2"); RecordsList listRecords = server.listRecords("oai_dc");//we capture all the records in oai dc format List<VivlioCrawlerMavenMain> listtotal = new ArrayList<VivlioCrawlerMavenMain>(); //we capture all the names of the professors and former professor of ECE of AUTH from a txt file //change the directory to yours List<String> profs = Files.readAllLines(Paths.get( "/home/themis/NetBeansProjects/VivlioCrawlerMaven/src/main/java/com/thesmartweb/vivliocrawlermaven/profs.txt")); boolean more = true;//it is a flag used if we encounter more entries than the initial capture JSONArray array = new JSONArray();//it is going to be our final total json array JSONObject jsonObject = new JSONObject();//it is going to be our final total json object while (more) { for (Record rec : listRecords.asList()) { VivlioCrawlerMavenMain vc = new VivlioCrawlerMavenMain(); Element metadata = rec.getMetadata(); if (metadata != null) { //System.out.println(rec.getMetadataAsString()); List<Element> elements = metadata.elements(); //System.out.println(metadata.getStringValue()); for (Element element : elements) { String name = element.getName(); //we get the title, remove \r, \n and beginning and trailing whitespace if (name.equalsIgnoreCase("title")) { vc.title = element.getStringValue(); vc.title = vc.title.trim(); vc.title = vc.title.replaceAll("(\\r|\\n)", ""); if (!(vc.title.endsWith("."))) { vc.title = vc.title + ".";//we also add dot in the end for the titles to be uniformed } } if (name.equalsIgnoreCase("creator")) { vc.creators.add(element.getStringValue());//we capture the students' names } if (name.equalsIgnoreCase("subject")) { vc.subjects.add(element.getStringValue());//we capture the subjects } if (name.equalsIgnoreCase("description")) { vc.description = element.getStringValue();//we capture the abstract } if (name.equalsIgnoreCase("date")) { vc.datestring = element.getStringValue(); } if (name.equalsIgnoreCase("identifier")) { if (element.getStringValue().contains("http://")) { vc.thesisFiles.add(element.getStringValue());//we capture the url of the thesis whole file if (vc.thesisURL == null) { vc.thesisURL = element.getStringValue().substring(0, 32); } } //if the identifier contains the title then it must be the citation //out of the citation we need to extract the supevisor's name if (element.getStringValue().contains(vc.title.substring(0, 10))) { vc.citation = element.getStringValue(); vc.supervisor = element.getStringValue(); Iterator profsIterator = profs.iterator(); vc.supervisor = vc.supervisor.replace(vc.title, "");//we remove the title out of the citation //if we have two students we remove the first occurence of "" which stands for "and" if (vc.creators.size() == 2) { vc.supervisor = vc.supervisor.replaceFirst("", ""); } //we remove the students' names Iterator creatorsIterator = vc.creators.iterator(); while (creatorsIterator.hasNext()) { vc.supervisor = vc.supervisor.replace(creatorsIterator.next().toString(), ""); } boolean profFlag = false;//flag used that declares that we found the professor that was supervisor while (profsIterator.hasNext() && !profFlag) { String prof = profsIterator.next().toString(); //we split the professor's name to surname and name //because some entries have first the surname and others first the name String[] profSplitted = prof.split("\\s+"); String supervisorCleared = vc.supervisor; supervisorCleared = supervisorCleared.replaceAll("\\s+", "");//we clear the white space supervisorCleared = supervisorCleared.replaceAll("(\\r|\\n)", "");//we remove the \r\n //now we check if the citation includes any name of the professors from the txt if (supervisorCleared.contains(profSplitted[0]) && supervisorCleared.contains(profSplitted[1])) { vc.supervisor = prof; profFlag = true; } } //if we don't find the name of the supervisor, we have to perform string manipulation to extract it if (!profFlag) { vc.supervisor = vc.supervisor.trim(); //we remove the word "" which stands for "Thessaloniki" and "" which stands for Greece if (vc.supervisor.contains("")) { vc.supervisor = vc.supervisor.replaceFirst("", ""); } if (vc.supervisor.contains("")) { vc.supervisor = vc.supervisor.replaceFirst("", ""); } if (vc.supervisor.contains("")) { vc.supervisor = vc.supervisor.replaceFirst("", ""); } if (vc.supervisor.contains("")) { vc.supervisor = vc.supervisor.replaceFirst("", ""); } //we remove the year and then we should be left only with the supervisor's name vc.supervisor = vc.supervisor.replace("(", ""); vc.supervisor = vc.supervisor.trim(); vc.supervisor = vc.supervisor.replace(")", ""); vc.supervisor = vc.supervisor.trim(); vc.supervisor = vc.supervisor.replace(",", ""); vc.supervisor = vc.supervisor.trim(); vc.supervisor = vc.supervisor.replace(".", ""); vc.supervisor = vc.supervisor.trim(); vc.supervisor = vc.supervisor.replace(vc.datestring.substring(0, 4), ""); vc.supervisor = vc.supervisor.trim(); } //we put everything in a json object JSONObject obj = new JSONObject(); obj.put("title", vc.title); obj.put("description", vc.description); JSONArray creatorsArray = new JSONArray(); creatorsArray.add(vc.creators); obj.put("creators", creatorsArray); JSONArray subjectsArray = new JSONArray(); List<String> subjectsList = new ArrayList<String>(vc.subjects); subjectsArray.add(subjectsList); obj.put("subjects", subjectsArray); obj.put("datestring", vc.datestring); JSONArray thesisFilesArray = new JSONArray(); thesisFilesArray.add(vc.thesisFiles); obj.put("thesisFiles", thesisFilesArray); obj.put("thesisURL", vc.thesisURL); obj.put("supervisor", vc.supervisor); obj.put("citation", vc.citation); //if you are using JSON.simple do this array.add(obj); } } } listtotal.add(vc);//a list containing all the objects //it is not used for now, but created for potential extension of the work } } //the following if clause searches for new records if (listRecords.getResumptionToken() != null) { listRecords = server.listRecords(listRecords.getResumptionToken()); } else { more = false; } } //we print which records did not have a supervisor for (VivlioCrawlerMavenMain vctest : listtotal) { if (vctest.supervisor == null) { System.out.println(vctest.title); System.out.println(vctest.citation); } } //we create a pretty json with GSON and we write it into a file jsonObject.put("VivliothmmyOldArray", array); JsonParser parser = new JsonParser(); JsonObject json = parser.parse(jsonObject.toJSONString()).getAsJsonObject(); Gson gson = new GsonBuilder().setPrettyPrinting().create(); String prettyJson = gson.toJson(json); try { FileWriter file = new FileWriter( "/home/themis/NetBeansProjects/VivlioCrawlerMaven/src/main/java/com/thesmartweb/vivliocrawlermaven/VivliothmmyOldRecords.json"); file.write(prettyJson); file.flush(); file.close(); } catch (IOException e) { System.out.println("Exception: " + e); } //System.out.print(prettyJson); //int j=0; } catch (OAIException | IOException e) { System.out.println("Exception: " + e); } }
From source file:bear.core.BearMain.java
/** * -VbearMain.appConfigDir=src/main/groovy/examples -VbearMain.buildDir=.bear/classes -VbearMain.script=dumpSampleGrid -VbearMain.projectClass=SecureSocialDemoProject -VbearMain.propertiesFile=.bear/test.properties *//*from ww w.j a v a2 s . c om*/ public static void main(String[] args) throws Exception { int i = ArrayUtils.indexOf(args, "--log-level"); if (i != -1) { LoggingBooter.changeLogLevel(LogManager.ROOT_LOGGER_NAME, Level.toLevel(args[i + 1])); } i = ArrayUtils.indexOf(args, "-q"); if (i != -1) { LoggingBooter.changeLogLevel(LogManager.ROOT_LOGGER_NAME, Level.WARN); } GlobalContext global = GlobalContext.getInstance(); BearMain bearMain = null; try { bearMain = new BearMain(global, getCompilerManager(), args); } catch (Exception e) { if (e.getClass().getSimpleName().equals("MissingRequiredOptionException")) { System.out.println(e.getMessage()); } else { Throwables.getRootCause(e).printStackTrace(); } System.exit(-1); } if (bearMain.checkHelpAndVersion()) { return; } AppOptions2 options2 = bearMain.options; if (options2.has(AppOptions2.UNPACK_DEMOS)) { String filesAsText = ProjectGenerator.readResource("/demoFiles.txt"); int count = 0; for (String resource : filesAsText.split("::")) { File dest = new File(BEAR_DIR + resource); System.out.printf("copying %s to %s...%n", resource, dest); writeStringToFile(dest, ProjectGenerator.readResource(resource)); count++; } System.out.printf("extracted %d files%n", count); return; } if (options2.has(AppOptions2.CREATE_NEW)) { String dashedTitle = options2.get(AppOptions2.CREATE_NEW); String user = options2.get(AppOptions2.USER); String pass = options2.get(AppOptions2.PASSWORD); List<String> hosts = options2.getList(AppOptions2.HOSTS); List<String> template; if (options2.has(AppOptions2.TEMPLATE)) { template = options2.getList(AppOptions2.TEMPLATE); } else { template = emptyList(); } ProjectGenerator g = new ProjectGenerator(dashedTitle, user, pass, hosts, template); if (options2.has(AppOptions2.ORACLE_USER)) { g.oracleUser = options2.get(AppOptions2.ORACLE_USER); } if (options2.has(AppOptions2.ORACLE_PASSWORD)) { g.oraclePassword = options2.get(AppOptions2.ORACLE_PASSWORD); } File projectFile = new File(BEAR_DIR, g.getProjectTitle() + ".groovy"); File pomFile = new File(BEAR_DIR, "pom.xml"); writeStringToFile(projectFile, g.processTemplate("TemplateProject.template")); writeStringToFile(new File(BEAR_DIR, dashedTitle + ".properties"), g.processTemplate("project-properties.template")); writeStringToFile(new File(BEAR_DIR, "demos.properties"), g.processTemplate("project-properties.template")); writeStringToFile(new File(BEAR_DIR, "bear-fx.properties"), g.processTemplate("bear-fx.properties.template")); writeStringToFile(pomFile, g.generatePom(dashedTitle)); System.out.printf("Created project file: %s%n", projectFile.getPath()); System.out.printf("Created maven pom: %s%n", pomFile.getPath()); System.out.println("\nProject files have been created. You may now: " + "\n a) Run `bear " + g.getShortName() + ".ls` to quick-test your minimal setup" + "\n b) Import the project to IDE or run smoke tests, find more details at the project wiki: https://github.com/chaschev/bear/wiki/."); return; } Bear bear = global.bear; if (options2.has(AppOptions2.QUIET)) { global.put(bear.quiet, true); LoggingBooter.changeLogLevel(LogManager.ROOT_LOGGER_NAME, Level.WARN); } if (options2.has(AppOptions2.USE_UI)) { global.put(bear.useUI, true); } if (options2.has(AppOptions2.NO_UI)) { global.put(bear.useUI, false); } List<?> list = options2.getOptionSet().nonOptionArguments(); if (list.size() > 1) { throw new IllegalArgumentException("too many arguments: " + list + ", " + "please specify an invoke line, project.method(arg1, arg2)"); } if (list.isEmpty()) { throw new UnsupportedOperationException("todo implement running a single project"); } String invokeLine = (String) list.get(0); String projectName; String method; if (invokeLine.contains(".")) { projectName = StringUtils.substringBefore(invokeLine, "."); method = StringUtils.substringAfter(invokeLine, "."); } else { projectName = invokeLine; method = null; } if (method == null || method.isEmpty()) method = "deploy()"; if (!method.contains("(")) method += "()"; Optional<CompiledEntry<? extends BearProject>> optional = bearMain.compileManager.findProject(projectName); if (!optional.isPresent()) { throw new IllegalArgumentException("project was not found: " + projectName + ", loaded classes: \n" + Joiner.on("\n").join(bearMain.compileManager.findProjects()) + ", searched in: " + bearMain.compileManager.getSourceDirs() + ", "); } BearProject project = OpenBean.newInstance(optional.get().aClass).injectMain(bearMain); GroovyShell shell = new GroovyShell(); shell.setVariable("project", project); shell.evaluate("project." + method); }