List of usage examples for java.nio.file Path toString
String toString();
From source file:edu.jhu.hlt.concrete.ingesters.alnc.ALNCIngesterRunner.java
/** * @param args//w ww.j a v a 2 s .c om */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); ALNCIngesterRunner run = new ALNCIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(ALNCIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); Path outWithExt = outpath.resolve(p.getFileName() + ".tar.gz"); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (ALNCIngester ing = new ALNCIngester(p); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterator<Communication> iter = ing.iterator(); while (iter.hasNext()) { Communication c = iter.next(); LOGGER.debug("Got comm: {}", c.getId()); arch.addEntry(new ArchivableCommunication(c)); } } catch (IngestException e) { LOGGER.error("Caught exception processing path: " + pstr, e); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:edu.jhu.hlt.concrete.gigaword.expt.ConvertGigawordDocuments.java
/** * @param args//from w w w . jav a 2 s .c o m */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { logger.error("Thread {} caught unhandled exception.", t.getName()); logger.error("Unhandled exception.", e); } }); if (args.length != 2) { logger.info("Usage: {} {} {}", GigawordConcreteConverter.class.getName(), "path/to/expt/file", "path/to/out/folder"); System.exit(1); } String exptPathStr = args[0]; String outPathStr = args[1]; // Verify path points to something. Path exptPath = Paths.get(exptPathStr); if (!Files.exists(exptPath)) { logger.error("File: {} does not exist. Re-run with the correct path to " + " the experiment 2 column file. See README.md."); System.exit(1); } logger.info("Experiment map located at: {}", exptPathStr); // Create output dir if not yet created. Path outPath = Paths.get(outPathStr); if (!Files.exists(outPath)) { logger.info("Creating directory: {}", outPath.toString()); try { Files.createDirectories(outPath); } catch (IOException e) { logger.error("Caught an IOException when creating output dir.", e); System.exit(1); } } logger.info("Output directory located at: {}", outPathStr); // Read in expt map. See README.md. Map<String, Set<String>> exptMap = null; try (Reader r = ExperimentUtils.createReader(exptPath); BufferedReader br = new BufferedReader(r)) { exptMap = ExperimentUtils.createFilenameToIdMap(br); } catch (IOException e) { logger.error("Caught an IOException when creating expt map.", e); System.exit(1); } // Start a timer. logger.info("Gigaword -> Concrete beginning."); StopWatch sw = new StopWatch(); sw.start(); // Iterate over expt map. exptMap.entrySet() // .parallelStream() .forEach(p -> { final String pathStr = p.getKey(); final Set<String> ids = p.getValue(); final Path lp = Paths.get(pathStr); logger.info("Converting path: {}", pathStr); // Get the file name and immediate folder it is under. int nElements = lp.getNameCount(); Path fileName = lp.getName(nElements - 1); Path subFolder = lp.getName(nElements - 2); String newFnStr = fileName.toString().split("\\.")[0] + ".tar"; // Mirror folders in output dir. Path localOutFolder = outPath.resolve(subFolder); Path localOutPath = localOutFolder.resolve(newFnStr); // Create output subfolders. if (!Files.exists(localOutFolder) && !Files.isDirectory(localOutFolder)) { logger.info("Creating out file: {}", localOutFolder.toString()); try { Files.createDirectories(localOutFolder); } catch (IOException e) { throw new RuntimeException("Caught an IOException when creating output dir.", e); } } // Iterate over communications. Iterator<Communication> citer; try (OutputStream os = Files.newOutputStream(localOutPath); BufferedOutputStream bos = new BufferedOutputStream(os); Archiver archiver = new TarArchiver(bos);) { citer = new ConcreteGigawordDocumentFactory().iterator(lp); while (citer.hasNext()) { Communication c = citer.next(); String cId = c.getId(); // Document ID must be in the set. Remove. boolean wasInSet = ids.remove(cId); if (!wasInSet) { // Some IDs are duplicated in Gigaword. // See ERRATA. logger.debug( "ID: {} was parsed from path: {}, but was not in the experiment map. Attempting to remove dupe.", cId, pathStr); // Attempt to create a duplicate id (append .duplicate to the id). // Then, try to remove again. String newId = RepairDuplicateIDs.repairDuplicate(cId); boolean dupeRemoved = ids.remove(newId); // There are not nested duplicates, so this should never fire. if (!dupeRemoved) { logger.info("Failed to remove dupe."); return; } else // Modify the communication ID to the unique version. c.setId(newId); } archiver.addEntry(new ArchivableCommunication(c)); } logger.info("Finished path: {}", pathStr); } catch (ConcreteException ex) { logger.error("Caught ConcreteException during Concrete mapping.", ex); logger.error("Path: {}", pathStr); } catch (IOException e) { logger.error("Error archiving communications.", e); logger.error("Path: {}", localOutPath.toString()); } }); sw.stop(); logger.info("Finished."); Minutes m = new Duration(sw.getTime()).toStandardMinutes(); logger.info("Runtime: Approximately {} minutes.", m.getMinutes()); }
From source file:fr.cnrs.sharp.Main.java
public static void main(String args[]) { Options options = new Options(); Option versionOpt = new Option("v", "version", false, "print the version information and exit"); Option helpOpt = new Option("h", "help", false, "print the help"); Option inProvFileOpt = OptionBuilder.withArgName("input_PROV_file_1> ... <input_PROV_file_n") .withLongOpt("input_PROV_files").withDescription("The list of PROV input files, in RDF Turtle.") .hasArgs().create("i"); Option inRawFileOpt = OptionBuilder.withArgName("input_raw_file_1> ... <input_raw_file_n") .withLongOpt("input_raw_files") .withDescription(/*from w w w .jav a2 s .c om*/ "The list of raw files to be fingerprinted and possibly interlinked with owl:sameAs.") .hasArgs().create("ri"); Option summaryOpt = OptionBuilder.withArgName("summary").withLongOpt("summary") .withDescription("Materialization of wasInfluencedBy relations.").create("s"); options.addOption(inProvFileOpt); options.addOption(inRawFileOpt); options.addOption(versionOpt); options.addOption(helpOpt); options.addOption(summaryOpt); String header = "SharpTB is a tool to maturate provenance based on PROV inferences"; String footer = "\nPlease report any issue to alban.gaignard@univ-nantes.fr"; try { CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SharpTB", header, options, footer, true); System.exit(0); } if (cmd.hasOption("v")) { logger.info("SharpTB version 0.1.0"); System.exit(0); } if (cmd.hasOption("ri")) { String[] inFiles = cmd.getOptionValues("ri"); Model model = ModelFactory.createDefaultModel(); for (String inFile : inFiles) { Path p = Paths.get(inFile); if (!p.toFile().isFile()) { logger.error("Cannot find file " + inFile); System.exit(1); } else { //1. fingerprint try { model.add(Interlinking.fingerprint(p)); } catch (IOException e) { logger.error("Cannot fingerprint file " + inFile); } } } //2. genSameAs Model sameAs = Interlinking.generateSameAs(model); sameAs.write(System.out, "TTL"); } if (cmd.hasOption("i")) { String[] inFiles = cmd.getOptionValues("i"); Model data = ModelFactory.createDefaultModel(); for (String inFile : inFiles) { Path p = Paths.get(inFile); if (!p.toFile().isFile()) { logger.error("Cannot find file " + inFile); System.exit(1); } else { RDFDataMgr.read(data, inFile, Lang.TTL); } } Model res = Harmonization.harmonizeProv(data); try { Path pathInfProv = Files.createTempFile("PROV-inf-tgd-egd-", ".ttl"); res.write(new FileWriter(pathInfProv.toFile()), "TTL"); System.out.println("Harmonized PROV written to file " + pathInfProv.toString()); //if the summary option is activated, then save the subgraph and generate a visualization if (cmd.hasOption("s")) { String queryInfluence = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n" + "PREFIX prov: <http://www.w3.org/ns/prov#> \n" + "CONSTRUCT { \n" + " ?x ?p ?y .\n" + " ?x rdfs:label ?lx .\n" + " ?y rdfs:label ?ly .\n" + "} WHERE {\n" + " ?x ?p ?y .\n" + " FILTER (?p IN (prov:wasInfluencedBy)) .\n" + " ?x rdfs:label ?lx .\n" + " ?y rdfs:label ?ly .\n" + "}"; Query query = QueryFactory.create(queryInfluence); QueryExecution queryExec = QueryExecutionFactory.create(query, res); Model summary = queryExec.execConstruct(); queryExec.close(); Util.writeHtmlViz(summary); } } catch (IOException ex) { logger.error("Impossible to write the harmonized provenance file."); System.exit(1); } } else { // logger.info("Please fill the -i input parameter."); // HelpFormatter formatter = new HelpFormatter(); // formatter.printHelp("SharpTB", header, options, footer, true); // System.exit(0); } } catch (ParseException ex) { logger.error("Error while parsing command line arguments. Please check the following help:"); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SharpToolBox", header, options, footer, true); System.exit(1); } }
From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java
public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); if (args.length < 2) { LOGGER.info("Usage: {} {} {} {}", WebPostIngester.class.getName(), "/path/to/output/folder", "/path/to/web/.xml/file", "<additional/xml/file/paths>"); System.exit(1);/*from ww w .ja v a2 s . co m*/ } Path outPath = Paths.get(args[0]); Optional.ofNullable(outPath.getParent()).ifPresent(p -> { if (!Files.exists(p)) try { Files.createDirectories(p); } catch (IOException e) { throw new UncheckedIOException(e); } }); if (!Files.isDirectory(outPath)) { LOGGER.error("Output path must be a directory."); System.exit(1); } WebPostIngester ing = new WebPostIngester(); for (int i = 1; i < args.length; i++) { Path lp = Paths.get(args[i]); LOGGER.info("On path: {}", lp.toString()); try { Communication c = ing.fromCharacterBasedFile(lp); new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true); } catch (IngestException | ConcreteException e) { LOGGER.error("Caught exception during ingest on file: " + args[i], e); } } }
From source file:com.github.ansell.shp.SHPDump.java
public static void main(String... args) throws Exception { final OptionParser parser = new OptionParser(); final OptionSpec<Void> help = parser.accepts("help").forHelp(); final OptionSpec<File> input = parser.accepts("input").withRequiredArg().ofType(File.class).required() .describedAs("The input SHP file"); final OptionSpec<File> output = parser.accepts("output").withRequiredArg().ofType(File.class).required() .describedAs("The output directory to use for debugging files"); final OptionSpec<String> outputPrefix = parser.accepts("prefix").withRequiredArg().ofType(String.class) .defaultsTo("shp-debug").describedAs("The output prefix to use for debugging files"); final OptionSpec<File> outputMappingTemplate = parser.accepts("output-mapping").withRequiredArg() .ofType(File.class).describedAs("The output mapping template file if it needs to be generated."); final OptionSpec<Integer> resolution = parser.accepts("resolution").withRequiredArg().ofType(Integer.class) .defaultsTo(2048).describedAs("The output image file resolution"); final OptionSpec<String> format = parser.accepts("format").withRequiredArg().ofType(String.class) .defaultsTo("png").describedAs("The output image format"); final OptionSpec<String> removeIfEmpty = parser.accepts("remove-if-empty").withRequiredArg() .ofType(String.class).describedAs( "The name of an attribute to remove if its value is empty before outputting the resulting shapefile. Use multiple times to specify multiple fields to check"); OptionSet options = null;/* w ww .j a v a2s .com*/ try { options = parser.parse(args); } catch (final OptionException e) { System.out.println(e.getMessage()); parser.printHelpOn(System.out); throw e; } if (options.has(help)) { parser.printHelpOn(System.out); return; } final Path inputPath = input.value(options).toPath(); if (!Files.exists(inputPath)) { throw new FileNotFoundException("Could not find input SHP file: " + inputPath.toString()); } final Path outputPath = output.value(options).toPath(); if (!Files.exists(outputPath)) { throw new FileNotFoundException("Output directory does not exist: " + outputPath.toString()); } final Path outputMappingPath = options.has(outputMappingTemplate) ? outputMappingTemplate.value(options).toPath() : null; if (options.has(outputMappingTemplate) && Files.exists(outputMappingPath)) { throw new FileNotFoundException( "Output mapping template file already exists: " + outputMappingPath.toString()); } final Set<String> filterFields = ConcurrentHashMap.newKeySet(); if (options.has(removeIfEmpty)) { for (String nextFilterField : removeIfEmpty.values(options)) { System.out.println("Will filter field if empty value found: " + nextFilterField); filterFields.add(nextFilterField); } } if (!filterFields.isEmpty()) { System.out.println("Full set of filter fields: " + filterFields); } final String prefix = outputPrefix.value(options); FileDataStore store = FileDataStoreFinder.getDataStore(inputPath.toFile()); if (store == null) { throw new RuntimeException("Could not read the given input as an ESRI Shapefile: " + inputPath.toAbsolutePath().toString()); } for (String typeName : new LinkedHashSet<>(Arrays.asList(store.getTypeNames()))) { System.out.println(""); System.out.println("Type: " + typeName); SimpleFeatureSource featureSource = store.getFeatureSource(typeName); SimpleFeatureType schema = featureSource.getSchema(); Name outputSchemaName = new NameImpl(schema.getName().getNamespaceURI(), schema.getName().getLocalPart().replace(" ", "").replace("%20", "")); System.out.println("Replacing name on schema: " + schema.getName() + " with " + outputSchemaName); SimpleFeatureType outputSchema = SHPUtils.changeSchemaName(schema, outputSchemaName); List<String> attributeList = new ArrayList<>(); for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) { System.out.println("Attribute: " + attribute.getName().toString()); attributeList.add(attribute.getName().toString()); } CsvSchema csvSchema = CSVUtil.buildSchema(attributeList); SimpleFeatureCollection collection = featureSource.getFeatures(); int featureCount = 0; Path nextCSVFile = outputPath.resolve(prefix + ".csv"); Path nextSummaryCSVFile = outputPath .resolve(prefix + "-" + outputSchema.getTypeName() + "-Summary.csv"); List<SimpleFeature> outputFeatureList = new CopyOnWriteArrayList<>(); try (SimpleFeatureIterator iterator = collection.features(); Writer bufferedWriter = Files.newBufferedWriter(nextCSVFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW); SequenceWriter csv = CSVUtil.newCSVWriter(bufferedWriter, csvSchema);) { List<String> nextLine = new ArrayList<>(); while (iterator.hasNext()) { SimpleFeature feature = iterator.next(); featureCount++; if (featureCount <= 2) { System.out.println(""); System.out.println(feature.getIdentifier()); } else if (featureCount % 100 == 0) { System.out.print("."); } boolean filterThisFeature = false; for (AttributeDescriptor attribute : schema.getAttributeDescriptors()) { String featureString = Optional.ofNullable(feature.getAttribute(attribute.getName())) .orElse("").toString(); nextLine.add(featureString); if (filterFields.contains(attribute.getName().toString()) && featureString.trim().isEmpty()) { filterThisFeature = true; } if (featureString.length() > 100) { featureString = featureString.substring(0, 100) + "..."; } if (featureCount <= 2) { System.out.print(attribute.getName() + "="); System.out.println(featureString); } } if (!filterThisFeature) { outputFeatureList.add(SHPUtils.changeSchemaName(feature, outputSchema)); csv.write(nextLine); } nextLine.clear(); } } try (Reader csvReader = Files.newBufferedReader(nextCSVFile, StandardCharsets.UTF_8); Writer summaryOutput = Files.newBufferedWriter(nextSummaryCSVFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE_NEW); final Writer mappingWriter = options.has(outputMappingTemplate) ? Files.newBufferedWriter(outputMappingPath) : NullWriter.NULL_WRITER) { CSVSummariser.runSummarise(csvReader, summaryOutput, mappingWriter, CSVSummariser.DEFAULT_SAMPLE_COUNT, false); } if (featureCount > 100) { System.out.println(""); } System.out.println(""); System.out.println("Feature count: " + featureCount); SimpleFeatureCollection outputCollection = new ListFeatureCollection(outputSchema, outputFeatureList); Path outputShapefilePath = outputPath.resolve(prefix + "-" + outputSchema.getTypeName() + "-dump"); if (!Files.exists(outputShapefilePath)) { Files.createDirectory(outputShapefilePath); } SHPUtils.writeShapefile(outputCollection, outputShapefilePath); // Create ZIP file from the contents to keep the subfiles together Path outputShapefileZipPath = outputPath .resolve(prefix + "-" + outputSchema.getTypeName() + "-dump.zip"); try (final OutputStream out = Files.newOutputStream(outputShapefileZipPath, StandardOpenOption.CREATE_NEW); final ZipOutputStream zip = new ZipOutputStream(out, StandardCharsets.UTF_8);) { Files.list(outputShapefilePath).forEachOrdered(Unchecked.consumer(e -> { zip.putNextEntry(new ZipEntry(e.getFileName().toString())); Files.copy(e, zip); zip.closeEntry(); })); } try (final OutputStream outputStream = Files.newOutputStream( outputPath.resolve(prefix + "." + format.value(options)), StandardOpenOption.CREATE_NEW);) { MapContent map = new MapContent(); map.setTitle(prefix + "-" + outputSchema.getTypeName()); Style style = SLD.createSimpleStyle(featureSource.getSchema()); Layer layer = new FeatureLayer(new CollectionFeatureSource(outputCollection), style); map.addLayer(layer); SHPUtils.renderImage(map, outputStream, resolution.value(options), format.value(options)); } } }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); if (args.length < 2) { LOGGER.info("Usage: {} {} {} {}", BoltForumPostIngester.class.getName(), "/path/to/output/folder", "/path/to/bolt/.xml/file", "<additional/xml/file/paths>"); System.exit(1);//from w w w . j a va 2s. co m } Path outPath = Paths.get(args[0]); Optional.ofNullable(outPath.getParent()).ifPresent(p -> { if (!Files.exists(p)) try { Files.createDirectories(p); } catch (IOException e) { throw new UncheckedIOException(e); } }); if (!Files.isDirectory(outPath)) { LOGGER.error("Output path must be a directory."); System.exit(1); } BoltForumPostIngester ing = new BoltForumPostIngester(); for (int i = 1; i < args.length; i++) { Path lp = Paths.get(args[i]); LOGGER.info("On path: {}", lp.toString()); try { Communication c = ing.fromCharacterBasedFile(lp); new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true); } catch (IngestException | ConcreteException e) { LOGGER.error("Caught exception during ingest on file: " + args[i], e); } } }
From source file:edu.harvard.hul.ois.drs.pdfaconvert.PdfaConvert.java
public static void main(String[] args) throws IOException { if (logger == null) { System.out.println("About to initialize Log4j"); logger = LogManager.getLogger(); System.out.println("Finished initializing Log4j"); }/*from ww w. jav a2s .c om*/ logger.debug("Entering main()"); // WIP: the following command line code was pulled from FITS Options options = new Options(); Option inputFileOption = new Option(PARAM_I, true, "input file"); options.addOption(inputFileOption); options.addOption(PARAM_V, false, "print version information"); options.addOption(PARAM_H, false, "help information"); options.addOption(PARAM_O, true, "output sub-directory"); CommandLineParser parser = new DefaultParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args, true); } catch (ParseException e) { System.err.println(e.getMessage()); System.exit(1); } // print version info if (cmd.hasOption(PARAM_V)) { if (StringUtils.isEmpty(applicationVersion)) { applicationVersion = "<not set>"; System.exit(1); } System.out.println("Version: " + applicationVersion); System.exit(0); } // print help info if (cmd.hasOption(PARAM_H)) { displayHelp(); System.exit(0); } // input parameter if (cmd.hasOption(PARAM_I)) { String input = cmd.getOptionValue(PARAM_I); boolean hasValue = cmd.hasOption(PARAM_I); logger.debug("Has option {} value: [{}]", PARAM_I, hasValue); String paramVal = cmd.getOptionValue(PARAM_I); logger.debug("value of option: [{}] ****", paramVal); File inputFile = new File(input); if (!inputFile.exists()) { logger.warn("{} does not exist or is not readable.", input); System.exit(1); } String subDir = cmd.getOptionValue(PARAM_O); PdfaConvert convert; if (!StringUtils.isEmpty(subDir)) { convert = new PdfaConvert(subDir); } else { convert = new PdfaConvert(); } if (inputFile.isDirectory()) { if (inputFile.listFiles() == null || inputFile.listFiles().length < 1) { logger.warn("Input directory is empty, nothing to process."); System.exit(1); } else { logger.debug("Have directory: [{}] with file count: {}", inputFile.getAbsolutePath(), inputFile.listFiles().length); DirectoryStream<Path> dirStream = null; dirStream = Files.newDirectoryStream(inputFile.toPath()); for (Path filePath : dirStream) { logger.debug("Have file name: {}", filePath.toString()); // Note: only handling files, not recursively going into sub-directories if (filePath.toFile().isFile()) { // Catch possible exception for each file so can handle other files in directory. try { convert.examine(filePath.toFile()); } catch (Exception e) { logger.error("Problem processing file: {} -- Error message: {}", filePath.getFileName(), e.getMessage()); } } else { logger.warn("Not a file so not processing: {}", filePath.toString()); // could be a directory but not recursing } } dirStream.close(); } } else { logger.debug("About to process file: {}", inputFile.getPath()); try { convert.examine(inputFile); } catch (Exception e) { logger.error("Problem processing file: {} -- Error message: {}", inputFile.getName(), e.getMessage()); logger.debug("Problem processing file: {} -- Error message: {}", inputFile.getName(), e.getMessage(), e); } } } else { System.err.println("Missing required option: " + PARAM_I); displayHelp(); System.exit(-1); } System.exit(0); }
From source file:com.ignorelist.kassandra.steam.scraper.TaggerCli.java
/** * @param args the command line arguments * @throws java.io.IOException/*from www . ja v a2s . co m*/ * @throws org.antlr.runtime.RecognitionException * @throws org.apache.commons.cli.ParseException */ public static void main(String[] args) throws IOException, RecognitionException, ParseException { Options options = buildOptions(); CommandLineParser parser = new DefaultParser(); CommandLine commandLine; try { commandLine = parser.parse(options, args); } catch (ParseException pe) { System.out.println(pe.getMessage()); System.out.println(); printHelp(options); System.exit(0); return; } if (commandLine.hasOption("h")) { printHelp(options); System.exit(0); } final PathResolver pathResolver = new PathResolver(); Configuration configuration; Path configurationFile = pathResolver.findConfiguration(); if (Files.isRegularFile(configurationFile)) { configuration = Configuration.fromPropertiesFile(configurationFile); } else { configuration = new Configuration(); } configuration = toConfiguration(configuration, commandLine); //configuration.toProperties().store(System.err, null); if (!Files.isRegularFile(configurationFile)) { configuration.writeProperties(configurationFile); System.err.println( "no configuration file present, write based on CLI options: " + configurationFile.toString()); configuration.toProperties().store(System.err, null); } Set<Path> sharedConfigPaths = configuration.getSharedConfigPaths(); if (sharedConfigPaths.size() > 1 && !commandLine.hasOption("w")) { System.err.println("multiple sharedconfig.vdf available:\n" + Joiner.on("\n").join(sharedConfigPaths) + "\n, can not write to stdout. Need to specify -w or -f with a single sharedconfig.vdf"); System.exit(1); } Tagger.Options taggerOptions = Tagger.Options.fromConfiguration(configuration); final String[] removeTagsValues = commandLine.getOptionValues("remove"); if (null != removeTagsValues) { taggerOptions.setRemoveTags(Sets.newHashSet(removeTagsValues)); } Set<TagType> tagTypes = configuration.getTagTypes(); if (null == tagTypes) { System.err.println("no tag types!"); System.exit(1); } final boolean printTags = commandLine.hasOption("p"); final HtmlTagLoader htmlTagLoader = new HtmlTagLoader(pathResolver.findCachePath("html"), null == configuration.getCacheExpiryDays() ? 7 : configuration.getCacheExpiryDays()); final BatchTagLoader tagLoader = new BatchTagLoader(htmlTagLoader, configuration.getDownloadThreads()); if (true || commandLine.hasOption("v")) { tagLoader.registerEventListener(new CliEventLoggerLoaded()); } Tagger tagger = new Tagger(tagLoader); if (printTags) { Set<String> availableTags = tagger.getAvailableTags(sharedConfigPaths, taggerOptions); Joiner.on("\n").appendTo(System.out, availableTags); } else { for (Path path : sharedConfigPaths) { VdfNode tagged = tagger.tag(path, taggerOptions); if (commandLine.hasOption("w")) { Path backup = path.getParent() .resolve(path.getFileName().toString() + ".bak" + new Date().getTime()); Files.copy(path, backup, StandardCopyOption.REPLACE_EXISTING); System.err.println("backup up " + path + " to " + backup); Files.copy(new ByteArrayInputStream(tagged.toPrettyString().getBytes(StandardCharsets.UTF_8)), path, StandardCopyOption.REPLACE_EXISTING); try { Files.setPosixFilePermissions(path, SHARED_CONFIG_POSIX_PERMS); } catch (Exception e) { System.err.println(e); } System.err.println("wrote " + path); } else { System.out.println(tagged.toPrettyString()); System.err.println("pipe to file and copy to: " + path.toString()); } } } }
From source file:tuit.java
@SuppressWarnings("ConstantConditions") public static void main(String[] args) { System.out.println(licence);/*from w w w .ja v a 2 s .c om*/ //Declare variables File inputFile; File outputFile; File tmpDir; File blastnExecutable; File properties; File blastOutputFile = null; // TUITPropertiesLoader tuitPropertiesLoader; TUITProperties tuitProperties; // String[] parameters = null; // Connection connection = null; MySQL_Connector mySQL_connector; // Map<Ranks, TUITCutoffSet> cutoffMap; // BLASTIdentifier blastIdentifier = null; // RamDb ramDb = null; CommandLineParser parser = new GnuParser(); Options options = new Options(); options.addOption(tuit.IN, "input<file>", true, "Input file (currently fasta-formatted only)"); options.addOption(tuit.OUT, "output<file>", true, "Output file (in " + tuit.TUIT_EXT + " format)"); options.addOption(tuit.P, "prop<file>", true, "Properties file (XML formatted)"); options.addOption(tuit.V, "verbose", false, "Enable verbose output"); options.addOption(tuit.B, "blast_output<file>", true, "Perform on a pre-BLASTed output"); options.addOption(tuit.DEPLOY, "deploy", false, "Deploy the taxonomic databases"); options.addOption(tuit.UPDATE, "update", false, "Update the taxonomic databases"); options.addOption(tuit.USE_DB, "usedb", false, "Use RDBMS instead of RAM-based taxonomy"); Option option = new Option(tuit.REDUCE, "reduce", true, "Pack identical (100% similar sequences) records in the given sample file"); option.setArgs(Option.UNLIMITED_VALUES); options.addOption(option); option = new Option(tuit.COMBINE, "combine", true, "Combine a set of given reduction files into an HMP Tree-compatible taxonomy"); option.setArgs(Option.UNLIMITED_VALUES); options.addOption(option); options.addOption(tuit.NORMALIZE, "normalize", false, "If used in combination with -combine ensures that the values are normalized by the root value"); HelpFormatter formatter = new HelpFormatter(); try { //Get TUIT directory final File tuitDir = new File( new File(tuit.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()) .getParent()); final File ramDbFile = new File(tuitDir, tuit.RAM_DB); //Setup logger Log.getInstance().setLogName("tuit.log"); //Read command line final CommandLine commandLine = parser.parse(options, args, true); //Check if the REDUCE option is on if (commandLine.hasOption(tuit.REDUCE)) { final String[] fileList = commandLine.getOptionValues(tuit.REDUCE); for (String s : fileList) { final Path path = Paths.get(s); Log.getInstance().log(Level.INFO, "Processing " + path.toString() + "..."); final NucleotideFastaSequenceReductor nucleotideFastaSequenceReductor = NucleotideFastaSequenceReductor .fromPath(path); ReductorFileOperator.save(nucleotideFastaSequenceReductor, path.resolveSibling(path.getFileName().toString() + ".rdc")); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Check if COMBINE is on if (commandLine.hasOption(tuit.COMBINE)) { final boolean normalize = commandLine.hasOption(tuit.NORMALIZE); final String[] fileList = commandLine.getOptionValues(tuit.COMBINE); //TODO: implement a test for format here final List<TreeFormatter.TreeFormatterFormat.HMPTreesOutput> hmpTreesOutputs = new ArrayList<>(); final TreeFormatter treeFormatter = TreeFormatter .newInstance(new TreeFormatter.TuitLineTreeFormatterFormat()); for (String s : fileList) { final Path path = Paths.get(s); Log.getInstance().log(Level.INFO, "Merging " + path.toString() + "..."); treeFormatter.loadFromPath(path); final TreeFormatter.TreeFormatterFormat.HMPTreesOutput output = TreeFormatter.TreeFormatterFormat.HMPTreesOutput .newInstance(treeFormatter.toHMPTree(normalize), s.substring(0, s.indexOf("."))); hmpTreesOutputs.add(output); treeFormatter.erase(); } final Path destination; if (commandLine.hasOption(OUT)) { destination = Paths.get(commandLine.getOptionValue(tuit.OUT)); } else { destination = Paths.get("merge.tcf"); } CombinatorFileOperator.save(hmpTreesOutputs, treeFormatter, destination); Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } if (!commandLine.hasOption(tuit.P)) { throw new ParseException("No properties file option found, exiting."); } else { properties = new File(commandLine.getOptionValue(tuit.P)); } //Load properties tuitPropertiesLoader = TUITPropertiesLoader.newInstanceFromFile(properties); tuitProperties = tuitPropertiesLoader.getTuitProperties(); //Create tmp directory and blastn executable tmpDir = new File(tuitProperties.getTMPDir().getPath()); blastnExecutable = new File(tuitProperties.getBLASTNPath().getPath()); //Check for deploy if (commandLine.hasOption(tuit.DEPLOY)) { if (commandLine.hasOption(tuit.USE_DB)) { NCBITablesDeployer.fastDeployNCBIDatabasesFromNCBI(connection, tmpDir); } else { NCBITablesDeployer.fastDeployNCBIRamDatabaseFromNCBI(tmpDir, ramDbFile); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Check for update if (commandLine.hasOption(tuit.UPDATE)) { if (commandLine.hasOption(tuit.USE_DB)) { NCBITablesDeployer.updateDatabasesFromNCBI(connection, tmpDir); } else { //No need to specify a different way to update the database other than just deploy in case of the RAM database NCBITablesDeployer.fastDeployNCBIRamDatabaseFromNCBI(tmpDir, ramDbFile); } Log.getInstance().log(Level.FINE, "Task done, exiting..."); return; } //Connect to the database if (commandLine.hasOption(tuit.USE_DB)) { mySQL_connector = MySQL_Connector.newDefaultInstance( "jdbc:mysql://" + tuitProperties.getDBConnection().getUrl().trim() + "/", tuitProperties.getDBConnection().getLogin().trim(), tuitProperties.getDBConnection().getPassword().trim()); mySQL_connector.connectToDatabase(); connection = mySQL_connector.getConnection(); } else { //Probe for ram database if (ramDbFile.exists() && ramDbFile.canRead()) { Log.getInstance().log(Level.INFO, "Loading RAM taxonomic map..."); try { ramDb = RamDb.loadSelfFromFile(ramDbFile); } catch (IOException ie) { if (ie instanceof java.io.InvalidClassException) throw new IOException("The RAM-based taxonomic database needs to be updated."); } } else { Log.getInstance().log(Level.SEVERE, "The RAM database either has not been deployed, or is not accessible." + "Please use the --deploy option and check permissions on the TUIT directory. " + "If you were looking to use the RDBMS as a taxonomic reference, plese use the -usedb option."); return; } } if (commandLine.hasOption(tuit.B)) { blastOutputFile = new File(commandLine.getOptionValue(tuit.B)); if (!blastOutputFile.exists() || !blastOutputFile.canRead()) { throw new Exception("BLAST output file either does not exist, or is not readable."); } else if (blastOutputFile.isDirectory()) { throw new Exception("BLAST output file points to a directory."); } } //Check vital parameters if (!commandLine.hasOption(tuit.IN)) { throw new ParseException("No input file option found, exiting."); } else { inputFile = new File(commandLine.getOptionValue(tuit.IN)); Log.getInstance().setLogName(inputFile.getName().split("\\.")[0] + ".tuit.log"); } //Correct the output file option if needed if (!commandLine.hasOption(tuit.OUT)) { outputFile = new File((inputFile.getPath()).split("\\.")[0] + tuit.TUIT_EXT); } else { outputFile = new File(commandLine.getOptionValue(tuit.OUT)); } //Adjust the output level if (commandLine.hasOption(tuit.V)) { Log.getInstance().setLevel(Level.FINE); Log.getInstance().log(Level.INFO, "Using verbose output for the log"); } else { Log.getInstance().setLevel(Level.INFO); } //Try all files if (inputFile != null) { if (!inputFile.exists() || !inputFile.canRead()) { throw new Exception("Input file either does not exist, or is not readable."); } else if (inputFile.isDirectory()) { throw new Exception("Input file points to a directory."); } } if (!properties.exists() || !properties.canRead()) { throw new Exception("Properties file either does not exist, or is not readable."); } else if (properties.isDirectory()) { throw new Exception("Properties file points to a directory."); } //Create blast parameters final StringBuilder stringBuilder = new StringBuilder(); for (Database database : tuitProperties.getBLASTNParameters().getDatabase()) { stringBuilder.append(database.getUse()); stringBuilder.append(" ");//Gonna insert an extra space for the last database } String remote; String entrez_query; if (tuitProperties.getBLASTNParameters().getRemote().getDelegate().equals("yes")) { remote = "-remote"; entrez_query = "-entrez_query"; parameters = new String[] { "-db", stringBuilder.toString(), remote, entrez_query, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue() }; } else { if (!commandLine.hasOption(tuit.B)) { if (tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .startsWith("NOT") || tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .startsWith("ALL")) { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), "-negative_gilist", TUITFileOperatorHelper.restrictToEntrez(tmpDir, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue() .toUpperCase().replace("NOT", "OR")) .getAbsolutePath(), "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } else if (tuitProperties.getBLASTNParameters().getEntrezQuery().getValue().toUpperCase() .equals("")) { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } else { parameters = new String[] { "-db", stringBuilder.toString(), "-evalue", tuitProperties.getBLASTNParameters().getExpect().getValue(), /*"-gilist", TUITFileOperatorHelper.restrictToEntrez( tmpDir, tuitProperties.getBLASTNParameters().getEntrezQuery().getValue()).getAbsolutePath(),*/ //TODO remove comment!!!!! "-num_threads", tuitProperties.getBLASTNParameters().getNumThreads().getValue() }; } } } //Prepare a cutoff Map if (tuitProperties.getSpecificationParameters() != null && tuitProperties.getSpecificationParameters().size() > 0) { cutoffMap = new HashMap<Ranks, TUITCutoffSet>(tuitProperties.getSpecificationParameters().size()); for (SpecificationParameters specificationParameters : tuitProperties .getSpecificationParameters()) { cutoffMap.put(Ranks.valueOf(specificationParameters.getCutoffSet().getRank()), TUITCutoffSet.newDefaultInstance( Double.parseDouble( specificationParameters.getCutoffSet().getPIdentCutoff().getValue()), Double.parseDouble(specificationParameters.getCutoffSet() .getQueryCoverageCutoff().getValue()), Double.parseDouble( specificationParameters.getCutoffSet().getAlpha().getValue()))); } } else { cutoffMap = new HashMap<Ranks, TUITCutoffSet>(); } final TUITFileOperatorHelper.OutputFormat format; if (tuitProperties.getBLASTNParameters().getOutputFormat().getFormat().equals("rdp")) { format = TUITFileOperatorHelper.OutputFormat.RDP_FIXRANK; } else { format = TUITFileOperatorHelper.OutputFormat.TUIT; } try (TUITFileOperator<NucleotideFasta> nucleotideFastaTUITFileOperator = NucleotideFastaTUITFileOperator .newInstance(format, cutoffMap);) { nucleotideFastaTUITFileOperator.setInputFile(inputFile); nucleotideFastaTUITFileOperator.setOutputFile(outputFile); final String cleanupString = tuitProperties.getBLASTNParameters().getKeepBLASTOuts().getKeep(); final boolean cleanup; if (cleanupString.equals("no")) { Log.getInstance().log(Level.INFO, "Temporary BLAST files will be deleted."); cleanup = true; } else { Log.getInstance().log(Level.INFO, "Temporary BLAST files will be kept."); cleanup = false; } //Create blast identifier ExecutorService executorService = Executors.newSingleThreadExecutor(); if (commandLine.hasOption(tuit.USE_DB)) { if (blastOutputFile == null) { blastIdentifier = TUITBLASTIdentifierDB.newInstanceFromFileOperator(tmpDir, blastnExecutable, parameters, nucleotideFastaTUITFileOperator, connection, cutoffMap, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup); } else { try { blastIdentifier = TUITBLASTIdentifierDB.newInstanceFromBLASTOutput( nucleotideFastaTUITFileOperator, connection, cutoffMap, blastOutputFile, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup); } catch (JAXBException e) { Log.getInstance().log(Level.SEVERE, "Error reading " + blastOutputFile.getName() + ", please check input. The file must be XML formatted."); } catch (Exception e) { e.printStackTrace(); } } } else { if (blastOutputFile == null) { blastIdentifier = TUITBLASTIdentifierRAM.newInstanceFromFileOperator(tmpDir, blastnExecutable, parameters, nucleotideFastaTUITFileOperator, cutoffMap, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup, ramDb); } else { try { blastIdentifier = TUITBLASTIdentifierRAM.newInstanceFromBLASTOutput( nucleotideFastaTUITFileOperator, cutoffMap, blastOutputFile, Integer.parseInt( tuitProperties.getBLASTNParameters().getMaxFilesInBatch().getValue()), cleanup, ramDb); } catch (JAXBException e) { Log.getInstance().log(Level.SEVERE, "Error reading " + blastOutputFile.getName() + ", please check input. The file must be XML formatted."); } catch (Exception e) { e.printStackTrace(); } } } Future<?> runnableFuture = executorService.submit(blastIdentifier); runnableFuture.get(); executorService.shutdown(); } } catch (ParseException pe) { Log.getInstance().log(Level.SEVERE, (pe.getMessage())); formatter.printHelp("tuit", options); } catch (SAXException saxe) { Log.getInstance().log(Level.SEVERE, saxe.getMessage()); } catch (FileNotFoundException fnfe) { Log.getInstance().log(Level.SEVERE, fnfe.getMessage()); } catch (TUITPropertyBadFormatException tpbfe) { Log.getInstance().log(Level.SEVERE, tpbfe.getMessage()); } catch (ClassCastException cce) { Log.getInstance().log(Level.SEVERE, cce.getMessage()); } catch (JAXBException jaxbee) { Log.getInstance().log(Level.SEVERE, "The properties file is not well formatted. Please ensure that the XML is consistent with the io.properties.dtd schema."); } catch (ClassNotFoundException cnfe) { //Probably won't happen unless the library deleted from the .jar Log.getInstance().log(Level.SEVERE, cnfe.getMessage()); //cnfe.printStackTrace(); } catch (SQLException sqle) { Log.getInstance().log(Level.SEVERE, "A database communication error occurred with the following message:\n" + sqle.getMessage()); //sqle.printStackTrace(); if (sqle.getMessage().contains("Access denied for user")) { Log.getInstance().log(Level.SEVERE, "Please use standard database login: " + NCBITablesDeployer.login + " and password: " + NCBITablesDeployer.password); } } catch (Exception e) { Log.getInstance().log(Level.SEVERE, e.getMessage()); e.printStackTrace(); } finally { if (connection != null) { try { connection.close(); } catch (SQLException sqle) { Log.getInstance().log(Level.SEVERE, "Problem closing the database connection: " + sqle); } } Log.getInstance().log(Level.FINE, "Task done, exiting..."); } }
From source file:edu.jhu.hlt.concrete.ingesters.simple.CompleteFileIngester.java
/** * See usage string./*from w ww. j av a2 s .co m*/ * * @param args */ public static void main(String[] args) { if (args.length != 3) { System.err.println("This program converts a character-based file to a .concrete file."); System.err.println("The text file must contain UTF-8 encoded characters."); System.err.println( "The .concrete file will share the same name as the input file, including the extension."); System.err.println("This program takes 3 arguments."); System.err.println("Argument 1: path/to/a/character/based/file"); System.err.println("Argument 2: type of Communication to generate [e.g., tweet]"); System.err.println("Argument 3: path/to/output/folder"); System.err.println("Example usage: " + CompleteFileIngester.class.getName() + " /my/text/file story /my/output/folder"); System.exit(1); } String inPathStr = args[0]; Path inPath = Paths.get(inPathStr); try { ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(inPath); Optional<String> commType = Optional.ofNullable(args[1]); Optional<String> outPathStr = Optional.ofNullable(args[2]); Path ep = ef.getPath(); String fn = ef.getName(); Path outPath = Paths.get(outPathStr.get()); Path outFile = outPath.resolve(fn + ".concrete"); // Output directory exists, or it doesn't. // Try to create if it does not. if (!Files.exists(outPath)) { try { Files.createDirectories(outPath); } catch (IOException e) { logger.error("Caught exception when making output directories.", e); } // if it does, check to make sure it's a directory. } else { if (!Files.isDirectory(outPath)) { logger.error("Output path exists but is not a directory."); System.exit(1); } else { // check to make sure the output file won't be overwritten. if (Files.exists(outFile)) { logger.warn("Output file {} exists; not overwriting.", outFile.toString()); System.exit(1); } } } try { UTF8FileIngester ing = new CompleteFileIngester(commType.get()); Communication comm = ing.fromCharacterBasedFile(ep); new WritableCommunication(comm).writeToFile(outFile, false); } catch (IngestException e) { logger.error("Caught exception during ingest.", e); System.exit(1); } catch (ConcreteException e) { logger.error("Caught exception writing output.", e); } } catch (NoSuchFileException e) { logger.error("Path {} does not exist.", inPathStr); System.exit(1); } catch (NotFileException e) { logger.error("Path {} is a directory.", inPathStr); System.exit(1); } }