List of usage examples for java.nio.file Path resolve
default Path resolve(String other)
From source file:edu.jhu.hlt.concrete.ingesters.simple.DoubleLineBreakFileIngester.java
/** * See usage string./*from w w w.j a va 2 s. c om*/ * * @param args */ public static void main(String[] args) { if (args.length != 4) { System.err.println("This program converts a character-based file to a .concrete file."); System.err.println("The text file must contain UTF-8 encoded characters."); System.err.println( "If the file contains any double-newlines, the file will be split into sections where those double-newlines occur."); System.err.println( "The .concrete file will share the same name as the input file, including the extension."); System.err.println("This program takes 4 arguments."); System.err.println("Argument 1: path/to/a/character/based/file"); System.err.println("Argument 2: type of Communication to generate [e.g., tweet]"); System.err.println("Argument 3: type of Sections to generate [e.g., passage]"); System.err.println("Argument 4: path/to/out/concrete/file"); System.err.println("Example usage: " + CompleteFileIngester.class.getName() + " /my/text/file story passage /my/output/folder"); System.exit(1); } String inPathStr = args[0]; Path inPath = Paths.get(inPathStr); try { ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(inPath); Optional<String> commType = Optional.ofNullable(args[1]); Optional<String> sectionType = Optional.ofNullable(args[2]); Optional<String> outPathStr = Optional.ofNullable(args[3]); Path ep = ef.getPath(); String fn = ef.getName(); Path outPath = Paths.get(outPathStr.get()); Path outFile = outPath.resolve(fn + ".concrete"); // Output directory exists, or it doesn't. // Try to create if it does not. if (!Files.exists(outPath)) { try { Files.createDirectories(outPath); } catch (IOException e) { logger.error("Caught exception when making output directories.", e); } // if it does, check to make sure it's a directory. } else { if (!Files.isDirectory(outPath)) { logger.error("Output path exists but is not a directory."); System.exit(1); } else { // check to make sure the output file won't be overwritten. if (Files.exists(outFile)) { logger.warn("Output file {} exists; not overwriting.", outFile.toString()); System.exit(1); } } } try { UTF8FileIngester ing = new DoubleLineBreakFileIngester(commType.get(), sectionType.get()); Communication comm = ing.fromCharacterBasedFile(ep); new WritableCommunication(comm).writeToFile(outFile, false); } catch (IngestException e) { logger.error("Caught exception during ingest.", e); System.exit(1); } catch (ConcreteException e) { logger.error("Caught exception writing output.", e); } } catch (NoSuchFileException e) { logger.error("Path {} does not exist.", inPathStr); System.exit(1); } catch (NotFileException e) { logger.error("Path {} is a directory.", inPathStr); System.exit(1); } }
From source file:cc.kave.commons.pointsto.evaluation.ProjectTrainValidateEvaluation.java
public static void main(String[] args) throws IOException { Locale.setDefault(Locale.US); Path baseDir = Paths.get("E:\\Coding\\MT"); Path usageStoresDir = baseDir.resolve("Usages"); Path resultFile = baseDir.resolve("EvaluationResults").resolve("TrainValidate.txt"); ProjectTrainValidateEvaluation evaluator = INJECTOR.getInstance(ProjectTrainValidateEvaluation.class); evaluator.run(usageStoresDir);/*from w ww.ja v a2 s .c o m*/ INJECTOR.getInstance(ResultExporter.class).export(resultFile, evaluator.getResults().entrySet().stream() .flatMap(e -> e.getValue().stream().map(er -> ImmutablePair.of(e.getKey(), er))).map(p -> { return new String[] { CoReNames.vm2srcQualifiedType(p.left), p.right.training, p.right.validation, String.format(Locale.US, "%.3f", p.right.score), Integer.toString(p.right.numTrainingUsages), Integer.toString(p.right.numValidationUsages) }; })); INJECTOR.getInstance(ExecutorService.class).shutdown(); }
From source file:hdfs.MiniHDFS.java
public static void main(String[] args) throws Exception { if (args.length != 1 && args.length != 3) { throw new IllegalArgumentException( "Expected: MiniHDFS <baseDirectory> [<kerberosPrincipal> <kerberosKeytab>], " + "got: " + Arrays.toString(args)); }/*from ww w. ja v a2 s.co m*/ boolean secure = args.length == 3; // configure Paths Path baseDir = Paths.get(args[0]); // hadoop-home/, so logs will not complain if (System.getenv("HADOOP_HOME") == null) { Path hadoopHome = baseDir.resolve("hadoop-home"); Files.createDirectories(hadoopHome); System.setProperty("hadoop.home.dir", hadoopHome.toAbsolutePath().toString()); } // hdfs-data/, where any data is going Path hdfsHome = baseDir.resolve("hdfs-data"); // configure cluster Configuration cfg = new Configuration(); cfg.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsHome.toAbsolutePath().toString()); // lower default permission: TODO: needed? cfg.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, "766"); // optionally configure security if (secure) { String kerberosPrincipal = args[1]; String keytabFile = args[2]; cfg.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); cfg.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true"); cfg.set(DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal); cfg.set(DFSConfigKeys.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal); cfg.set(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal); cfg.set(DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY, keytabFile); cfg.set(DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY, keytabFile); cfg.set(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, "true"); cfg.set(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, "true"); cfg.set(DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY, "true"); } UserGroupInformation.setConfiguration(cfg); // TODO: remove hardcoded port! MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(cfg); if (secure) { builder.nameNodePort(9998); } else { builder.nameNodePort(9999); } MiniDFSCluster dfs = builder.build(); // Configure contents of the filesystem org.apache.hadoop.fs.Path esUserPath = new org.apache.hadoop.fs.Path("/user/elasticsearch"); try (FileSystem fs = dfs.getFileSystem()) { // Set the elasticsearch user directory up fs.mkdirs(esUserPath); if (UserGroupInformation.isSecurityEnabled()) { List<AclEntry> acls = new ArrayList<>(); acls.add(new AclEntry.Builder().setType(AclEntryType.USER).setName("elasticsearch") .setPermission(FsAction.ALL).build()); fs.modifyAclEntries(esUserPath, acls); } // Install a pre-existing repository into HDFS String directoryName = "readonly-repository"; String archiveName = directoryName + ".tar.gz"; URL readOnlyRepositoryArchiveURL = MiniHDFS.class.getClassLoader().getResource(archiveName); if (readOnlyRepositoryArchiveURL != null) { Path tempDirectory = Files.createTempDirectory(MiniHDFS.class.getName()); File readOnlyRepositoryArchive = tempDirectory.resolve(archiveName).toFile(); FileUtils.copyURLToFile(readOnlyRepositoryArchiveURL, readOnlyRepositoryArchive); FileUtil.unTar(readOnlyRepositoryArchive, tempDirectory.toFile()); fs.copyFromLocalFile(true, true, new org.apache.hadoop.fs.Path( tempDirectory.resolve(directoryName).toAbsolutePath().toUri()), esUserPath.suffix("/existing/" + directoryName)); FileUtils.deleteDirectory(tempDirectory.toFile()); } } // write our PID file Path tmp = Files.createTempFile(baseDir, null, null); String pid = ManagementFactory.getRuntimeMXBean().getName().split("@")[0]; Files.write(tmp, pid.getBytes(StandardCharsets.UTF_8)); Files.move(tmp, baseDir.resolve(PID_FILE_NAME), StandardCopyOption.ATOMIC_MOVE); // write our port file tmp = Files.createTempFile(baseDir, null, null); Files.write(tmp, Integer.toString(dfs.getNameNodePort()).getBytes(StandardCharsets.UTF_8)); Files.move(tmp, baseDir.resolve(PORT_FILE_NAME), StandardCopyOption.ATOMIC_MOVE); }
From source file:edu.jhu.hlt.concrete.ingesters.alnc.ALNCIngesterRunner.java
/** * @param args/* w ww . j av a2 s . co m*/ */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); ALNCIngesterRunner run = new ALNCIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(ALNCIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); Path outWithExt = outpath.resolve(p.getFileName() + ".tar.gz"); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (ALNCIngester ing = new ALNCIngester(p); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterator<Communication> iter = ing.iterator(); while (iter.hasNext()) { Communication c = iter.next(); LOGGER.debug("Got comm: {}", c.getId()); arch.addEntry(new ArchivableCommunication(c)); } } catch (IngestException e) { LOGGER.error("Caught exception processing path: " + pstr, e); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:com.github.horrorho.inflatabledonkey.Main.java
/** * @param args the command line arguments * @throws IOException/* w ww . j av a 2 s . c o m*/ */ public static void main(String[] args) throws IOException { try { if (!PropertyLoader.instance().test(args)) { return; } } catch (IllegalArgumentException ex) { System.out.println("Argument error: " + ex.getMessage()); System.out.println("Try '" + Property.APP_NAME.value() + " --help' for more information."); System.exit(-1); } // SystemDefault HttpClient. // TODO concurrent CloseableHttpClient httpClient = HttpClients.custom().setUserAgent("CloudKit/479 (13A404)") .useSystemProperties().build(); // Auth // TODO rework when we have UncheckedIOException for Authenticator Auth auth = Property.AUTHENTICATION_TOKEN.value().map(Auth::new).orElse(null); if (auth == null) { auth = Authenticator.authenticate(httpClient, Property.AUTHENTICATION_APPLEID.value().get(), Property.AUTHENTICATION_PASSWORD.value().get()); } logger.debug("-- main() - auth: {}", auth); logger.info("-- main() - dsPrsID:mmeAuthToken: {}:{}", auth.dsPrsID(), auth.mmeAuthToken()); if (Property.ARGS_TOKEN.booleanValue().orElse(false)) { System.out.println("DsPrsID:mmeAuthToken " + auth.dsPrsID() + ":" + auth.mmeAuthToken()); return; } logger.info("-- main() - Apple ID: {}", Property.AUTHENTICATION_APPLEID.value()); logger.info("-- main() - password: {}", Property.AUTHENTICATION_PASSWORD.value()); logger.info("-- main() - token: {}", Property.AUTHENTICATION_TOKEN.value()); // Account Account account = Accounts.account(httpClient, auth); // Backup Backup backup = Backup.create(httpClient, account); // BackupAccount BackupAccount backupAccount = backup.backupAccount(httpClient); logger.debug("-- main() - backup account: {}", backupAccount); // Devices List<Device> devices = backup.devices(httpClient, backupAccount.devices()); logger.debug("-- main() - device count: {}", devices.size()); // Snapshots List<SnapshotID> snapshotIDs = devices.stream().map(Device::snapshots).flatMap(Collection::stream) .collect(Collectors.toList()); logger.info("-- main() - total snapshot count: {}", snapshotIDs.size()); Map<String, Snapshot> snapshots = backup.snapshot(httpClient, snapshotIDs).stream().collect( Collectors.toMap(s -> s.record().getRecordIdentifier().getValue().getName(), Function.identity())); boolean repeat = false; do { for (int i = 0; i < devices.size(); i++) { Device device = devices.get(i); List<SnapshotID> deviceSnapshotIDs = device.snapshots(); System.out.println(i + " " + device.info()); for (int j = 0; j < deviceSnapshotIDs.size(); j++) { SnapshotID sid = deviceSnapshotIDs.get(j); System.out.println("\t" + j + snapshots.get(sid.id()).info() + " " + sid.timestamp()); } } if (Property.PRINT_SNAPSHOTS.booleanValue().orElse(false)) { return; } // Selection Scanner input = new Scanner(System.in); int deviceIndex; int snapshotIndex = Property.SELECT_SNAPSHOT_INDEX.intValue().get(); if (devices.size() > 1) { System.out.printf("Select a device [0 - %d]: ", devices.size() - 1); deviceIndex = input.nextInt(); } else deviceIndex = Property.SELECT_DEVICE_INDEX.intValue().get(); if (deviceIndex >= devices.size() || deviceIndex < 0) { System.out.println("No such device: " + deviceIndex); System.exit(-1); } Device device = devices.get(deviceIndex); System.out.println("Selected device: " + deviceIndex + ", " + device.info()); if (device.snapshots().size() > 1) { System.out.printf("Select a snapshot [0 - %d]: ", device.snapshots().size() - 1); snapshotIndex = input.nextInt(); } else snapshotIndex = Property.SELECT_SNAPSHOT_INDEX.intValue().get(); if (snapshotIndex >= devices.get(deviceIndex).snapshots().size() || snapshotIndex < 0) { System.out.println("No such snapshot for selected device: " + snapshotIndex); System.exit(-1); } logger.info("-- main() - arg device index: {}", deviceIndex); logger.info("-- main() - arg snapshot index: {}", snapshotIndex); String selected = devices.get(deviceIndex).snapshots().get(snapshotIndex).id(); Snapshot snapshot = snapshots.get(selected); System.out.println("Selected snapshot: " + snapshotIndex + ", " + snapshot.info()); // Asset list. List<Assets> assetsList = backup.assetsList(httpClient, snapshot); logger.info("-- main() - assets count: {}", assetsList.size()); // Domains filter --domain option String chosenDomain = Property.FILTER_DOMAIN.value().orElse("").toLowerCase(Locale.US); logger.info("-- main() - arg domain substring filter: {}", Property.FILTER_DOMAIN.value()); // Output domains --domains option if (Property.PRINT_DOMAIN_LIST.booleanValue().orElse(false)) { System.out.println("Domains / file count:"); assetsList.stream().filter(a -> a.domain().isPresent()) .map(a -> a.domain().get() + " / " + a.files().size()).sorted() .forEach(System.out::println); System.out.print("Type a domain ('null' to exit): "); chosenDomain = input.next().toLowerCase(Locale.US); if (chosenDomain.equals("null")) return; // TODO check Assets without domain information. } String domainSubstring = chosenDomain; Predicate<Optional<String>> domainFilter = domain -> domain.map(d -> d.toLowerCase(Locale.US)) .map(d -> d.contains(domainSubstring)).orElse(false); List<String> files = Assets.files(assetsList, domainFilter); logger.info("-- main() - domain filtered file count: {}", files.size()); // Output folders. Path outputFolder = Paths.get(Property.OUTPUT_FOLDER.value().orElse("output")); Path assetOutputFolder = outputFolder.resolve("assets"); // TODO assets value injection Path chunkOutputFolder = outputFolder.resolve("chunks"); // TODO chunks value injection logger.info("-- main() - output folder chunks: {}", chunkOutputFolder); logger.info("-- main() - output folder assets: {}", assetOutputFolder); // Download tools. AuthorizeAssets authorizeAssets = AuthorizeAssets.backupd(); DiskChunkStore chunkStore = new DiskChunkStore(chunkOutputFolder); StandardChunkEngine chunkEngine = new StandardChunkEngine(chunkStore); AssetDownloader assetDownloader = new AssetDownloader(chunkEngine); KeyBagManager keyBagManager = backup.newKeyBagManager(); // Mystery Moo. Moo moo = new Moo(authorizeAssets, assetDownloader, keyBagManager); // Filename extension filter. String filenameExtension = Property.FILTER_EXTENSION.value().orElse("").toLowerCase(Locale.US); logger.info("-- main() - arg filename extension filter: {}", Property.FILTER_EXTENSION.value()); Predicate<Asset> assetFilter = asset -> asset.relativePath().map(d -> d.toLowerCase(Locale.US)) .map(d -> d.endsWith(filenameExtension)).orElse(false); // Batch process files in groups of 100. // TODO group files into batches based on file size. List<List<String>> batches = ListUtils.partition(files, 100); for (List<String> batch : batches) { List<Asset> assets = backup.assets(httpClient, batch).stream().filter(assetFilter::test) .collect(Collectors.toList()); logger.info("-- main() - filtered asset count: {}", assets.size()); moo.download(httpClient, assets, assetOutputFolder); } System.out.print("Download other snapshot (Y/N)? "); repeat = input.next().toLowerCase(Locale.US).charAt(0) == 'y'; } while (repeat == true); }
From source file:edu.jhu.hlt.concrete.ingesters.annotatednyt.AnnotatedNYTIngesterRunner.java
/** * @param args//from www . ja v a2 s. co m */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); AnnotatedNYTIngesterRunner run = new AnnotatedNYTIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(AnnotatedNYTIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); NYTCorpusDocumentParser parser = new NYTCorpusDocumentParser(); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); int nPaths = p.getNameCount(); Path year = p.getName(nPaths - 2); Path outWithExt = outpath.resolve(year.toString() + p.getFileName()); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (InputStream is = Files.newInputStream(p); BufferedInputStream bin = new BufferedInputStream(is); TarGzArchiveEntryByteIterator iter = new TarGzArchiveEntryByteIterator(bin); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterable<byte[]> able = () -> iter; StreamSupport.stream(able.spliterator(), false).map(ba -> parser.fromByteArray(ba, false)) .map(doc -> new AnnotatedNYTDocument(doc)) .map(and -> new CommunicationizableAnnotatedNYTDocument(and).toCommunication()) .forEach(comm -> { try { arch.addEntry(new ArchivableCommunication(comm)); } catch (IOException e) { LOGGER.error("Caught exception processing file: " + pstr, e); } }); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:edu.usc.goffish.gofs.tools.GoFSFormat.java
public static void main(String[] args) throws IOException { if (args.length < REQUIRED_ARGS) { PrintUsageAndQuit(null);/*from ww w . ja v a 2s. co m*/ } if (args.length == 1 && args[0].equals("-help")) { PrintUsageAndQuit(null); } Path executableDirectory; try { executableDirectory = Paths .get(GoFSFormat.class.getProtectionDomain().getCodeSource().getLocation().toURI()).getParent(); } catch (URISyntaxException e) { throw new RuntimeException("Unexpected error retrieving executable location", e); } Path configPath = executableDirectory.resolve(DEFAULT_CONFIG).normalize(); boolean copyBinaries = false; // parse optional arguments int i = 0; OptArgLoop: for (i = 0; i < args.length - REQUIRED_ARGS; i++) { switch (args[i]) { case "-config": i++; try { configPath = Paths.get(args[i]); } catch (InvalidPathException e) { PrintUsageAndQuit("Config file - " + e.getMessage()); } break; case "-copyBinaries": copyBinaries = true; break; default: break OptArgLoop; } } if (args.length - i < REQUIRED_ARGS) { PrintUsageAndQuit(null); } // finished parsing args if (i < args.length) { PrintUsageAndQuit("Unrecognized argument \"" + args[i] + "\""); } // parse config System.out.println("Parsing config..."); PropertiesConfiguration config = new PropertiesConfiguration(); config.setDelimiterParsingDisabled(true); try { config.load(Files.newInputStream(configPath)); } catch (ConfigurationException e) { throw new IOException(e); } // retrieve data nodes ArrayList<URI> dataNodes; { String[] dataNodesArray = config.getStringArray(GOFS_DATANODES_KEY); if (dataNodesArray.length == 0) { throw new ConversionException("Config must contain key " + GOFS_DATANODES_KEY); } dataNodes = new ArrayList<>(dataNodesArray.length); if (dataNodesArray.length == 0) { throw new ConversionException("Config key " + GOFS_DATANODES_KEY + " has invalid format - must define at least one data node"); } try { for (String node : dataNodesArray) { URI dataNodeURI = new URI(node); if (!"file".equalsIgnoreCase(dataNodeURI.getScheme())) { throw new ConversionException("config key " + GOFS_DATANODES_KEY + " value \"" + dataNodeURI + "\" has invalid format - data node urls must have 'file' scheme"); } else if (dataNodeURI.getPath() == null || dataNodeURI.getPath().isEmpty()) { throw new ConversionException("config key " + GOFS_DATANODES_KEY + " value \"" + dataNodeURI + "\" has invalid format - data node urls must have an absolute path specified"); } // ensure uri ends with a slash, so we know it is a directory if (!dataNodeURI.getPath().endsWith("/")) { dataNodeURI = dataNodeURI.resolve(dataNodeURI.getPath() + "/"); } dataNodes.add(dataNodeURI); } } catch (URISyntaxException e) { throw new ConversionException( "Config key " + GOFS_DATANODES_KEY + " has invalid format - " + e.getMessage()); } } // validate serializer type Class<? extends ISliceSerializer> serializerType; { String serializerTypeName = config.getString(GOFS_SERIALIZER_KEY); if (serializerTypeName == null) { throw new ConversionException("Config must contain key " + GOFS_SERIALIZER_KEY); } try { serializerType = SliceSerializerProvider.loadSliceSerializerType(serializerTypeName); } catch (ReflectiveOperationException e) { throw new ConversionException( "Config key " + GOFS_SERIALIZER_KEY + " has invalid format - " + e.getMessage()); } } // retrieve name node IInternalNameNode nameNode; try { nameNode = NameNodeProvider.loadNameNodeFromConfig(config, GOFS_NAMENODE_TYPE_KEY, GOFS_NAMENODE_LOCATION_KEY); } catch (ReflectiveOperationException e) { throw new RuntimeException("Unable to load name node", e); } System.out.println("Contacting name node..."); // validate name node if (!nameNode.isAvailable()) { throw new IOException("Name node at " + nameNode.getURI() + " is not available"); } System.out.println("Contacting data nodes..."); // validate data nodes for (URI dataNode : dataNodes) { // only attempt ssh if host exists if (dataNode.getHost() != null) { try { SSHHelper.SSH(dataNode, "true"); } catch (IOException e) { throw new IOException("Data node at " + dataNode + " is not available", e); } } } // create temporary directory Path workingDir = Files.createTempDirectory("gofs_format"); try { // create deploy directory Path deployDirectory = Files.createDirectory(workingDir.resolve(DATANODE_DIR_NAME)); // create empty slice directory Files.createDirectory(deployDirectory.resolve(DataNode.DATANODE_SLICE_DIR)); // copy binaries if (copyBinaries) { System.out.println("Copying binaries..."); FileUtils.copyDirectory(executableDirectory.toFile(), deployDirectory.resolve(executableDirectory.getFileName()).toFile()); } // write config file Path dataNodeConfigFile = deployDirectory.resolve(DataNode.DATANODE_CONFIG); try { // create config for every data node and scp deploy folder into place for (URI dataNodeParent : dataNodes) { URI dataNode = dataNodeParent.resolve(DATANODE_DIR_NAME); PropertiesConfiguration datanode_config = new PropertiesConfiguration(); datanode_config.setDelimiterParsingDisabled(true); datanode_config.setProperty(DataNode.DATANODE_INSTALLED_KEY, true); datanode_config.setProperty(DataNode.DATANODE_NAMENODE_TYPE_KEY, config.getString(GOFS_NAMENODE_TYPE_KEY)); datanode_config.setProperty(DataNode.DATANODE_NAMENODE_LOCATION_KEY, config.getString(GOFS_NAMENODE_LOCATION_KEY)); datanode_config.setProperty(DataNode.DATANODE_LOCALHOSTURI_KEY, dataNode.toString()); try { datanode_config.save(Files.newOutputStream(dataNodeConfigFile)); } catch (ConfigurationException e) { throw new IOException(e); } System.out.println("Formatting data node " + dataNode.toString() + "..."); // scp everything into place on the data node SCPHelper.SCP(deployDirectory, dataNodeParent); // update name node nameNode.addDataNode(dataNode); } // update name node nameNode.setSerializer(serializerType); } catch (Exception e) { System.out.println( "ERROR: data node formatting interrupted - name node and data nodes are in an inconsistent state and require clean up"); throw e; } System.out.println("GoFS format complete"); } finally { FileUtils.deleteQuietly(workingDir.toFile()); } }
From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java
public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); if (args.length < 2) { LOGGER.info("Usage: {} {} {} {}", WebPostIngester.class.getName(), "/path/to/output/folder", "/path/to/web/.xml/file", "<additional/xml/file/paths>"); System.exit(1);// w w w . j av a 2s. c o m } Path outPath = Paths.get(args[0]); Optional.ofNullable(outPath.getParent()).ifPresent(p -> { if (!Files.exists(p)) try { Files.createDirectories(p); } catch (IOException e) { throw new UncheckedIOException(e); } }); if (!Files.isDirectory(outPath)) { LOGGER.error("Output path must be a directory."); System.exit(1); } WebPostIngester ing = new WebPostIngester(); for (int i = 1; i < args.length; i++) { Path lp = Paths.get(args[i]); LOGGER.info("On path: {}", lp.toString()); try { Communication c = ing.fromCharacterBasedFile(lp); new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true); } catch (IngestException | ConcreteException e) { LOGGER.error("Caught exception during ingest on file: " + args[i], e); } } }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); if (args.length < 2) { LOGGER.info("Usage: {} {} {} {}", BoltForumPostIngester.class.getName(), "/path/to/output/folder", "/path/to/bolt/.xml/file", "<additional/xml/file/paths>"); System.exit(1);/* w w w .ja v a2 s . c om*/ } Path outPath = Paths.get(args[0]); Optional.ofNullable(outPath.getParent()).ifPresent(p -> { if (!Files.exists(p)) try { Files.createDirectories(p); } catch (IOException e) { throw new UncheckedIOException(e); } }); if (!Files.isDirectory(outPath)) { LOGGER.error("Output path must be a directory."); System.exit(1); } BoltForumPostIngester ing = new BoltForumPostIngester(); for (int i = 1; i < args.length; i++) { Path lp = Paths.get(args[i]); LOGGER.info("On path: {}", lp.toString()); try { Communication c = ing.fromCharacterBasedFile(lp); new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true); } catch (IngestException | ConcreteException e) { LOGGER.error("Caught exception during ingest on file: " + args[i], e); } } }
From source file:edu.jhu.hlt.concrete.gigaword.expt.ConvertGigawordDocuments.java
/** * @param args/*from w ww .j av a2 s . c o m*/ */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { logger.error("Thread {} caught unhandled exception.", t.getName()); logger.error("Unhandled exception.", e); } }); if (args.length != 2) { logger.info("Usage: {} {} {}", GigawordConcreteConverter.class.getName(), "path/to/expt/file", "path/to/out/folder"); System.exit(1); } String exptPathStr = args[0]; String outPathStr = args[1]; // Verify path points to something. Path exptPath = Paths.get(exptPathStr); if (!Files.exists(exptPath)) { logger.error("File: {} does not exist. Re-run with the correct path to " + " the experiment 2 column file. See README.md."); System.exit(1); } logger.info("Experiment map located at: {}", exptPathStr); // Create output dir if not yet created. Path outPath = Paths.get(outPathStr); if (!Files.exists(outPath)) { logger.info("Creating directory: {}", outPath.toString()); try { Files.createDirectories(outPath); } catch (IOException e) { logger.error("Caught an IOException when creating output dir.", e); System.exit(1); } } logger.info("Output directory located at: {}", outPathStr); // Read in expt map. See README.md. Map<String, Set<String>> exptMap = null; try (Reader r = ExperimentUtils.createReader(exptPath); BufferedReader br = new BufferedReader(r)) { exptMap = ExperimentUtils.createFilenameToIdMap(br); } catch (IOException e) { logger.error("Caught an IOException when creating expt map.", e); System.exit(1); } // Start a timer. logger.info("Gigaword -> Concrete beginning."); StopWatch sw = new StopWatch(); sw.start(); // Iterate over expt map. exptMap.entrySet() // .parallelStream() .forEach(p -> { final String pathStr = p.getKey(); final Set<String> ids = p.getValue(); final Path lp = Paths.get(pathStr); logger.info("Converting path: {}", pathStr); // Get the file name and immediate folder it is under. int nElements = lp.getNameCount(); Path fileName = lp.getName(nElements - 1); Path subFolder = lp.getName(nElements - 2); String newFnStr = fileName.toString().split("\\.")[0] + ".tar"; // Mirror folders in output dir. Path localOutFolder = outPath.resolve(subFolder); Path localOutPath = localOutFolder.resolve(newFnStr); // Create output subfolders. if (!Files.exists(localOutFolder) && !Files.isDirectory(localOutFolder)) { logger.info("Creating out file: {}", localOutFolder.toString()); try { Files.createDirectories(localOutFolder); } catch (IOException e) { throw new RuntimeException("Caught an IOException when creating output dir.", e); } } // Iterate over communications. Iterator<Communication> citer; try (OutputStream os = Files.newOutputStream(localOutPath); BufferedOutputStream bos = new BufferedOutputStream(os); Archiver archiver = new TarArchiver(bos);) { citer = new ConcreteGigawordDocumentFactory().iterator(lp); while (citer.hasNext()) { Communication c = citer.next(); String cId = c.getId(); // Document ID must be in the set. Remove. boolean wasInSet = ids.remove(cId); if (!wasInSet) { // Some IDs are duplicated in Gigaword. // See ERRATA. logger.debug( "ID: {} was parsed from path: {}, but was not in the experiment map. Attempting to remove dupe.", cId, pathStr); // Attempt to create a duplicate id (append .duplicate to the id). // Then, try to remove again. String newId = RepairDuplicateIDs.repairDuplicate(cId); boolean dupeRemoved = ids.remove(newId); // There are not nested duplicates, so this should never fire. if (!dupeRemoved) { logger.info("Failed to remove dupe."); return; } else // Modify the communication ID to the unique version. c.setId(newId); } archiver.addEntry(new ArchivableCommunication(c)); } logger.info("Finished path: {}", pathStr); } catch (ConcreteException ex) { logger.error("Caught ConcreteException during Concrete mapping.", ex); logger.error("Path: {}", pathStr); } catch (IOException e) { logger.error("Error archiving communications.", e); logger.error("Path: {}", localOutPath.toString()); } }); sw.stop(); logger.info("Finished."); Minutes m = new Duration(sw.getTime()).toStandardMinutes(); logger.info("Runtime: Approximately {} minutes.", m.getMinutes()); }