List of usage examples for java.io File isDirectory
public boolean isDirectory()
From source file:com.act.biointerpretation.sars.SarGenerationDriver.java
public static void main(String[] args) throws Exception { // Build command line parser. Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/* w ww.j a v a 2s . c o m*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { LOGGER.error("Argument parsing failed: %s", e.getMessage()); HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } // Print help. if (cl.hasOption(OPTION_HELP)) { HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } // Create DB and DbAPI MongoDB mongoDB = new MongoDB(LOCAL_HOST, MONGO_PORT, cl.getOptionValue(OPTION_DB)); DbAPI dbApi = new DbAPI(mongoDB); // Handle output file File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_PATH)); if (outputFile.isDirectory() || outputFile.exists()) { LOGGER.error("Supplied output file is a directory or already exists."); HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } outputFile.createNewFile(); // Check that there is exactly one reaction group input option if (cl.hasOption(OPTION_REACTION_LIST) && cl.hasOption(OPTION_REACTIONS_FILE)) { LOGGER.error("Cannot process both a reaction list and a reactions file as input."); HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (!cl.hasOption(OPTION_REACTION_LIST) && !cl.hasOption(OPTION_REACTIONS_FILE)) { LOGGER.error("Must supply either a reaction list or a reactions file as input."); HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } // Build input reaction group corpus. Iterable<ReactionGroup> groups = null; if (cl.hasOption(OPTION_REACTION_LIST)) { LOGGER.info("Using specific input reactions."); ReactionGroup group = new ReactionGroup("ONLY_GROUP", "NO_DB"); for (String idString : cl.getOptionValues(OPTION_REACTION_LIST)) { group.addReactionId(Long.parseLong(idString)); } groups = Arrays.asList(group); } if (cl.hasOption(OPTION_REACTIONS_FILE)) { LOGGER.info("Using reactions file."); File inputFile = new File(cl.getOptionValue(OPTION_REACTIONS_FILE)); try { groups = ReactionGroupCorpus.loadFromJsonFile(inputFile); LOGGER.info("Successfully parsed input as json file."); } catch (IOException e) { LOGGER.info("Input file not json file. Trying txt format."); try { groups = ReactionGroupCorpus.loadFromTextFile(inputFile); LOGGER.info("Successfully parsed input as text file."); } catch (IOException f) { LOGGER.error("Reactions input file not parseable. %s", f.getMessage()); throw f; } } } // Build all pieces of SAR generator ReactionProjector projector = new ReactionProjector(); ExpandedReactionSearcher generalizer = new ExpandedReactionSearcher(projector); McsCalculator reactionMcsCalculator = new McsCalculator(McsCalculator.REACTION_BUILDING_OPTIONS); McsCalculator sarMcsCalculator = new McsCalculator(McsCalculator.SAR_OPTIONS); FullReactionBuilder reactionBuilder = new FullReactionBuilder(reactionMcsCalculator, generalizer, projector); SarFactory substructureSarFactory = new OneSubstrateSubstructureSar.Factory(sarMcsCalculator); SarFactory carbonCountSarFactory = new OneSubstrateCarbonCountSar.Factory(); List<SarFactory> sarFactories = Arrays.asList(carbonCountSarFactory, substructureSarFactory); ErosCorpus roCorpus = new ErosCorpus(); roCorpus.loadValidationCorpus(); ReactionGroupCharacterizer reactionGroupCharacterizer = new OneSubstrateOneRoCharacterizer(dbApi, sarFactories, reactionBuilder, roCorpus); SarCorpusBuilder corpusBuilder = new SarCorpusBuilder(groups, reactionGroupCharacterizer); LOGGER.info("Parsed arguments and constructed SAR corpus builder. Building corpus."); SarCorpus sarCorpus = corpusBuilder.build(); LOGGER.info("Built sar corpus. Printing to file in json format."); sarCorpus.printToJsonFile(outputFile); LOGGER.info("Complete!"); }
From source file:com.tamingtext.classifier.mlt.TestMoreLikeThis.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing category information").withShortName("catf") .create();//from w ww .j a va2 s .com Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing content information").withShortName("contf") .create(); Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt) .withOption(categoryFieldOpt).withOption(maxResultsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } int maxResults = 10; if (cmdLine.hasOption(maxResultsOpt)) { maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String modelPath = (String) cmdLine.getValue(modelOpt); String categoryField = (String) cmdLine.getValue(categoryFieldOpt); String contentField = (String) cmdLine.getValue(contentFieldOpt); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } Directory directory = FSDirectory.open(new File(modelPath)); IndexReader indexReader = IndexReader.open(directory); Analyzer analyzer //<co id="mlt.analyzersetup"/> = new EnglishAnalyzer(Version.LUCENE_36); MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField); categorizer.setAnalyzer(analyzer); categorizer.setMatchMode(mode); categorizer.setFieldNames(new String[] { contentField }); categorizer.setMaxResults(maxResults); categorizer.setNgramSize(gramSize); File f = new File(inputPath); if (!f.isDirectory()) { throw new IllegalArgumentException(f + " is not a directory or does not exit"); } File[] inputFiles = FileUtil.buildFileList(f); String line = null; //<start id="lucene.examples.mlt.test"/> final ClassifierResult UNKNOWN = new ClassifierResult("unknown", 1.0); ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/> new ResultAnalyzer(categorizer.getCategories(), UNKNOWN.getLabel()); for (File ff : inputFiles) { //<co id="co.mlt.read"/> BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8")); while ((line = in.readLine()) != null) { String[] parts = line.split("\t"); if (parts.length != 2) { continue; } CategoryHits[] hits //<co id="co.mlt.cat"/> = categorizer.categorize(new StringReader(parts[1])); ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN; resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/> } in.close(); } System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/> /* <calloutlist> <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout> <callout arearefs="co.mlt.read">Read Test data</callout> <callout arearefs="co.mlt.cat">Categorize</callout> <callout arearefs="co.mlt.an">Collect Results</callout> <callout arearefs="co.mlt.print">Display Results</callout> </calloutlist> */ //<end id="lucene.examples.mlt.test"/> } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:edu.toronto.cs.xcurator.cli.CLIRunner.java
public static void main(String[] args) { Options options = setupOptions();// ww w . j a v a 2s . c o m CommandLineParser parser = new BasicParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption('t')) { fileType = line.getOptionValue('t'); } else { fileType = XML; } if (line.hasOption('o')) { tdbDirectory = line.getOptionValue('o'); File d = new File(tdbDirectory); if (!d.exists() || !d.isDirectory()) { throw new Exception("TDB directory does not exist, please create."); } } if (line.hasOption('h')) { domain = line.getOptionValue('h'); try { URL url = new URL(domain); } catch (MalformedURLException ex) { throw new Exception("The domain name is ill-formed"); } } else { printHelpAndExit(options); } if (line.hasOption('m')) { serializeMapping = true; mappingFilename = line.getOptionValue('m'); } if (line.hasOption('d')) { dirLocation = line.getOptionValue('d'); inputStreams = new ArrayList<>(); final List<String> files = Util.getFiles(dirLocation); for (String inputfile : files) { File f = new File(inputfile); if (f.isFile() && f.exists()) { System.out.println("Adding document to mapping discoverer: " + inputfile); inputStreams.add(new FileInputStream(f)); } // If it is a URL download link for the document from SEC else if (inputfile.startsWith("http") && inputfile.contains("://")) { // Download System.out.println("Adding remote document to mapping discoverer: " + inputfile); try { URL url = new URL(inputfile); InputStream remoteDocumentStream = url.openStream(); inputStreams.add(remoteDocumentStream); } catch (MalformedURLException ex) { throw new Exception("The document URL is ill-formed: " + inputfile); } catch (IOException ex) { throw new Exception("Error in downloading remote document: " + inputfile); } } else { throw new Exception("Cannot open XBRL document: " + f.getName()); } } } if (line.hasOption('f')) { fileLocation = line.getOptionValue('f'); inputStreams = new ArrayList<>(); File f = new File(fileLocation); if (f.isFile() && f.exists()) { System.out.println("Adding document to mapping discoverer: " + fileLocation); inputStreams.add(new FileInputStream(f)); } // If it is a URL download link for the document from SEC else if (fileLocation.startsWith("http") && fileLocation.contains("://")) { // Download System.out.println("Adding remote document to mapping discoverer: " + fileLocation); try { URL url = new URL(fileLocation); InputStream remoteDocumentStream = url.openStream(); inputStreams.add(remoteDocumentStream); } catch (MalformedURLException ex) { throw new Exception("The document URL is ill-formed: " + fileLocation); } catch (IOException ex) { throw new Exception("Error in downloading remote document: " + fileLocation); } } else { throw new Exception("Cannot open XBRL document: " + f.getName()); } } setupDocumentBuilder(); RdfFactory rdfFactory = new RdfFactory(new RunConfig(domain)); List<Document> documents = new ArrayList<>(); for (InputStream inputStream : inputStreams) { Document dataDocument = null; if (fileType.equals(JSON)) { String json = IOUtils.toString(inputStream); final String xml = Util.json2xml(json); final InputStream xmlInputStream = IOUtils.toInputStream(xml); dataDocument = createDocument(xmlInputStream); } else { dataDocument = createDocument(inputStream); } documents.add(dataDocument); } if (serializeMapping) { System.out.println("Mapping file will be saved to: " + new File(mappingFilename).getAbsolutePath()); rdfFactory.createRdfs(documents, tdbDirectory, mappingFilename); } else { rdfFactory.createRdfs(documents, tdbDirectory); } } catch (Exception ex) { ex.printStackTrace(); System.err.println("Unexpected exception: " + ex.getMessage()); System.exit(1); } }
From source file:cc.twittertools.corpus.demo.ReadStatuses.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input directory or file") .create(INPUT_OPTION));/* w w w . ja va 2 s . c o m*/ options.addOption(VERBOSE_OPTION, false, "print logging output every 10000 tweets"); options.addOption(DUMP_OPTION, false, "dump statuses"); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(ReadStatuses.class.getName(), options); System.exit(-1); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); StatusStream stream; // Figure out if we're reading from HTML SequenceFiles or JSON. File file = new File(cmdline.getOptionValue(INPUT_OPTION)); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } if (file.isDirectory()) { stream = new JsonStatusCorpusReader(file); } else { stream = new JsonStatusBlockReader(file); } int cnt = 0; Status status; while ((status = stream.next()) != null) { if (cmdline.hasOption(DUMP_OPTION)) { String text = status.getText(); if (text != null) { text = text.replaceAll("\\s+", " "); text = text.replaceAll("\0", ""); } out.println(String.format("%d\t%s\t%s\t%s", status.getId(), status.getScreenname(), status.getCreatedAt(), text)); } cnt++; if (cnt % 10000 == 0 && cmdline.hasOption(VERBOSE_OPTION)) { LOG.info(cnt + " statuses read"); } } stream.close(); LOG.info(String.format("Total of %s statuses read.", cnt)); }
From source file:com.tamingtext.tagging.LuceneTagExtractor.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription(/*from www . j a va 2 s.com*/ "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(fieldOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } File file = new File(cmdLine.getValue(inputOpt).toString()); if (!file.isDirectory()) { throw new IllegalArgumentException(file + " does not exist or is not a directory"); } long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String field = cmdLine.getValue(fieldOpt).toString(); PrintWriter out = null; if (cmdLine.hasOption(outputOpt)) { out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString())); } else { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } File output = new File("/home/drew/taming-text/delicious/training"); output.mkdirs(); emitTextForTags(file, output); IOUtils.close(Collections.singleton(out)); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:Inmemantlr.java
public static void main(String[] args) { LOGGER.info("Inmemantlr tool"); HelpFormatter hformatter = new HelpFormatter(); Options options = new Options(); // Binary arguments options.addOption("h", "print this message"); Option grmr = Option.builder().longOpt("grmrfiles").hasArgs().desc("comma-separated list of ANTLR files") .required(true).argName("grmrfiles").type(String.class).valueSeparator(',').build(); Option infiles = Option.builder().longOpt("infiles").hasArgs() .desc("comma-separated list of files to parse").required(true).argName("infiles").type(String.class) .valueSeparator(',').build(); Option utilfiles = Option.builder().longOpt("utilfiles").hasArgs() .desc("comma-separated list of utility files to be added for " + "compilation").required(false) .argName("utilfiles").type(String.class).valueSeparator(',').build(); Option odir = Option.builder().longOpt("outdir") .desc("output directory in which the dot files will be " + "created").required(false).hasArg(true) .argName("outdir").type(String.class).build(); options.addOption(infiles);/*from ww w .j a va2 s.co m*/ options.addOption(grmr); options.addOption(utilfiles); options.addOption(odir); CommandLineParser parser = new DefaultParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); if (cmd.hasOption('h')) { hformatter.printHelp("java -jar inmemantlr.jar", options); System.exit(0); } } catch (ParseException e) { hformatter.printHelp("java -jar inmemantlr.jar", options); LOGGER.error(e.getMessage()); System.exit(-1); } // input files Set<File> ins = getFilesForOption(cmd, "infiles"); // grammar files Set<File> gs = getFilesForOption(cmd, "grmrfiles"); // utility files Set<File> uf = getFilesForOption(cmd, "utilfiles"); // output dir Set<File> od = getFilesForOption(cmd, "outdir"); if (od.size() > 1) { LOGGER.error("output directories must be less than or equal to 1"); System.exit(-1); } if (ins.size() <= 0) { LOGGER.error("no input files were specified"); System.exit(-1); } if (gs.size() <= 0) { LOGGER.error("no grammar files were specified"); System.exit(-1); } LOGGER.info("create generic parser"); GenericParser gp = null; try { gp = new GenericParser(gs.toArray(new File[gs.size()])); } catch (FileNotFoundException e) { LOGGER.error(e.getMessage()); System.exit(-1); } if (!uf.isEmpty()) { try { gp.addUtilityJavaFiles(uf.toArray(new String[uf.size()])); } catch (FileNotFoundException e) { LOGGER.error(e.getMessage()); System.exit(-1); } } LOGGER.info("create and add parse tree listener"); DefaultTreeListener dt = new DefaultTreeListener(); gp.setListener(dt); LOGGER.info("compile generic parser"); try { gp.compile(); } catch (CompilationException e) { LOGGER.error("cannot compile generic parser: {}", e.getMessage()); System.exit(-1); } String fpfx = ""; for (File of : od) { if (!of.exists() || !of.isDirectory()) { LOGGER.error("output directory does not exist or is not a " + "directory"); System.exit(-1); } fpfx = of.getAbsolutePath(); } Ast ast; for (File f : ins) { try { gp.parse(f); } catch (IllegalWorkflowException | FileNotFoundException e) { LOGGER.error(e.getMessage()); System.exit(-1); } ast = dt.getAst(); if (!fpfx.isEmpty()) { String of = fpfx + "/" + FilenameUtils.removeExtension(f.getName()) + ".dot"; LOGGER.info("write file {}", of); try { FileUtils.writeStringToFile(new File(of), ast.toDot(), "UTF-8"); } catch (IOException e) { LOGGER.error(e.getMessage()); System.exit(-1); } } else { LOGGER.info("Tree for {} \n {}", f.getName(), ast.toDot()); } } System.exit(0); }
From source file:act.installer.pubchem.PubchemSynonymFinder.java
public static void main(String[] args) throws Exception { org.apache.commons.cli.Options opts = new org.apache.commons.cli.Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());//from w w w . ja v a 2s .c om } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File rocksDBFile = new File(cl.getOptionValue(OPTION_INDEX_PATH)); if (!rocksDBFile.isDirectory()) { System.err.format("Index directory does not exist or is not a directory at '%s'", rocksDBFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } List<String> compoundIds = null; if (cl.hasOption(OPTION_PUBCHEM_COMPOUND_ID)) { compoundIds = Collections.singletonList(cl.getOptionValue(OPTION_PUBCHEM_COMPOUND_ID)); } else if (cl.hasOption(OPTION_IDS_FILE)) { File idsFile = new File(cl.getOptionValue(OPTION_IDS_FILE)); if (!idsFile.exists()) { System.err.format("Cannot find Pubchem CIDs file at %s", idsFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } compoundIds = getCIDsFromFile(idsFile); if (compoundIds.size() == 0) { System.err.format("Found zero Pubchem CIDs to process in file at '%s', exiting", idsFile.getAbsolutePath()); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } } else { System.err.format("Must specify one of '%s' or '%s'; index is too big to print all synonyms.", OPTION_PUBCHEM_COMPOUND_ID, OPTION_IDS_FILE); HELP_FORMATTER.printHelp(PubchemSynonymFinder.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } // Run a quick check to warn users of malformed ids. compoundIds.forEach(x -> { if (!PC_CID_PATTERN.matcher(x).matches()) { // Use matches() for complete matching. LOGGER.warn("Specified compound id does not match expected format: %s", x); } }); LOGGER.info("Opening DB and searching for %d Pubchem CIDs", compoundIds.size()); Pair<RocksDB, Map<PubchemTTLMerger.COLUMN_FAMILIES, ColumnFamilyHandle>> dbAndHandles = null; Map<String, PubchemSynonyms> results = new LinkedHashMap<>(compoundIds.size()); try { dbAndHandles = PubchemTTLMerger.openExistingRocksDB(rocksDBFile); RocksDB db = dbAndHandles.getLeft(); ColumnFamilyHandle cidToSynonymsCfh = dbAndHandles.getRight() .get(PubchemTTLMerger.COLUMN_FAMILIES.CID_TO_SYNONYMS); for (String cid : compoundIds) { PubchemSynonyms synonyms = null; byte[] val = db.get(cidToSynonymsCfh, cid.getBytes(UTF8)); if (val != null) { ObjectInputStream oi = new ObjectInputStream(new ByteArrayInputStream(val)); // We're relying on our use of a one-value-type per index model here so we can skip the instanceof check. synonyms = (PubchemSynonyms) oi.readObject(); } else { LOGGER.warn("No synonyms available for compound id '%s'", cid); } results.put(cid, synonyms); } } finally { if (dbAndHandles != null) { dbAndHandles.getLeft().close(); } } try (OutputStream outputStream = cl.hasOption(OPTION_OUTPUT) ? new FileOutputStream(cl.getOptionValue(OPTION_OUTPUT)) : System.out) { OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValue(outputStream, results); new OutputStreamWriter(outputStream).append('\n'); } LOGGER.info("Done searching for Pubchem synonyms"); }
From source file:net.myrrix.online.eval.ParameterOptimizer.java
public static void main(String[] args) throws Exception { if (args.length < 4) { System.err.println(/* w w w . ja va 2s .c o m*/ "Usage: dataDirectory numSteps evaluationPercentage property=min:max [property2=min2:max2 ...]"); return; } final File dataDir = new File(args[0]); Preconditions.checkArgument(dataDir.exists() && dataDir.isDirectory(), "Not a directory: %s", dataDir); Preconditions.checkArgument(dataDir.listFiles().length > 0, "No files in: %s", dataDir); int numSteps = Integer.parseInt(args[1]); Preconditions.checkArgument(numSteps >= 2, "# steps must be at least 2: %s", numSteps); final double evaluationPercentage = Double.parseDouble(args[2]); Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, "evaluationPercentage must be in (0,1]: %s", evaluationPercentage); Map<String, ParameterRange> parameterRanges = Maps.newHashMapWithExpectedSize(args.length); for (int i = 3; i < args.length; i++) { String[] propValue = EQUALS.split(args[i]); String systemProperty = propValue[0]; String[] minMax = COLON.split(propValue[1]); ParameterRange range; try { int min = Integer.parseInt(minMax[0]); int max = Integer.parseInt(minMax.length == 1 ? minMax[0] : minMax[1]); range = new ParameterRange(min, max); } catch (NumberFormatException ignored) { double min = Double.parseDouble(minMax[0]); double max = Double.parseDouble(minMax.length == 1 ? minMax[0] : minMax[1]); range = new ParameterRange(min, max); } parameterRanges.put(systemProperty, range); } Callable<Number> evaluator = new Callable<Number>() { @Override public Number call() throws IOException, TasteException, InterruptedException { MyrrixIRStatistics stats = (MyrrixIRStatistics) new PrecisionRecallEvaluator().evaluate(dataDir, 0.9, evaluationPercentage, null); return stats == null ? null : stats.getMeanAveragePrecision(); } }; ParameterOptimizer optimizer = new ParameterOptimizer(parameterRanges, numSteps, evaluator, false); Map<String, Number> optimalValues = optimizer.findGoodParameterValues(); System.out.println(optimalValues); }
From source file:Attr.java
public static void main(String args[]) { File path = new File(args[0]); // grab command-line argument String exists = getYesNo(path.exists()); String canRead = getYesNo(path.canRead()); String canWrite = getYesNo(path.canWrite()); String isFile = getYesNo(path.isFile()); String isHid = getYesNo(path.isHidden()); String isDir = getYesNo(path.isDirectory()); String isAbs = getYesNo(path.isAbsolute()); System.out.println("File attributes for '" + args[0] + "'"); System.out.println("Exists : " + exists); if (path.exists()) { System.out.println("Readable : " + canRead); System.out.println("Writable : " + canWrite); System.out.println("Is directory : " + isDir); System.out.println("Is file : " + isFile); System.out.println("Is hidden : " + isHid); System.out.println("Absolute path : " + isAbs); }/*w w w . jav a2 s .c o m*/ }
From source file:com.grantingersoll.intell.index.Indexer.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true) .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create()) .withDescription(/*ww w . j a v a 2 s. c om*/ "The path to the wikipedia dump file. Maybe a directory containing wikipedia dump files." + " If a directory is specified, only .xml files are used.") .withShortName("w").create(); Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false) .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create()) .withDescription("The number of docs to index").withShortName("n").create(); Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false) .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create()) .withDescription("The URL where Solr lives").withShortName("s").create(); Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false) .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create()) .withDescription("The number of docs to include in each indexing batch").withShortName("b") .create(); Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt) .withOption(solrURLOpt).withOption(solrBatchOpt).create(); Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); File file; file = new File(cmdLine.getValue(wikipediaFileOpt).toString()); File[] dumpFiles; if (file.isDirectory()) { dumpFiles = file.listFiles(new FilenameFilter() { public boolean accept(File file, String s) { return s.endsWith(".xml"); } }); } else { dumpFiles = new File[] { file }; } int numDocs = Integer.MAX_VALUE; if (cmdLine.hasOption(numDocsOpt)) { numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString()); } String url = DEFAULT_SOLR_URL; if (cmdLine.hasOption(solrURLOpt)) { url = cmdLine.getValue(solrURLOpt).toString(); } int batch = 100; if (cmdLine.hasOption(solrBatchOpt)) { batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString()); } Indexer indexer = new Indexer(new CommonsHttpSolrServer(url)); int total = 0; for (int i = 0; i < dumpFiles.length && total < numDocs; i++) { File dumpFile = dumpFiles[i]; log.info("Indexing: " + file + " Num files to index: " + (numDocs - total)); long start = System.currentTimeMillis(); int totalFile = indexer.index(dumpFile, numDocs - total, batch); long finish = System.currentTimeMillis(); if (log.isInfoEnabled()) { log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms"); } total += totalFile; log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total + " overall."); } log.info("Indexed " + total + " docs overall."); }