List of usage examples for java.lang String trim
public String trim()
From source file:backtype.storm.command.gray_upgrade.java
public static void main(String[] args) throws Exception { if (args == null || args.length < 1) { System.out.println("Invalid parameter"); usage();//from w w w . j a v a 2s . c o m return; } String topologyName = args[0]; String[] str2 = Arrays.copyOfRange(args, 1, args.length); CommandLineParser parser = new GnuParser(); Options r = buildGeneralOptions(new Options()); CommandLine commandLine = parser.parse(r, str2, true); int workerNum = 0; String component = null; List<String> workers = null; if (commandLine.hasOption("n")) { workerNum = Integer.valueOf(commandLine.getOptionValue("n")); } if (commandLine.hasOption("p")) { component = commandLine.getOptionValue("p"); } if (commandLine.hasOption("w")) { String w = commandLine.getOptionValue("w"); if (!StringUtils.isBlank(w)) { workers = Lists.newArrayList(); String[] parts = w.split(","); for (String part : parts) { if (part.split(":").length == 2) { workers.add(part.trim()); } } } } upgradeTopology(topologyName, component, workers, workerNum); }
From source file:org.musa.tcpclients.Main.java
/** * Load the Spring Integration Application Context * * @param args - command line arguments//from w w w .ja va 2 s .c o m */ public static void main(final String... args) { final Scanner scanner = new Scanner(System.in); //context.getB GenericXmlApplicationContext context = Main.setupContext(); WarpGateway gateway = (WarpGateway) context.getBean("gw"); System.out.println("running.\n\n"); System.out.println("Please enter numbers to spawn spacemarines :"); System.out.println("1: Gabriel Loken"); System.out.println("2: Nathaniel Garro"); System.out.println("3: Ezekyl Abaddon"); System.out.println("4: Sanguinius"); System.out.println("5: Lucius"); System.out.println("\t- Entering q will quit the application"); System.out.print("\n"); while (true) { final String input = scanner.nextLine(); if ("q".equals(input.trim())) { break; } else { SpaceMarine gabriel = new SpaceMarine("Gabriel Loken", "Luna Wolves", 500, SMRank.CaptainBrother, SMLoyalty.Loyalist, 100); SpaceMarine garro = new SpaceMarine("Nathaniel Garro", "Deathguard", 500, SMRank.CaptainBrother, SMLoyalty.Loyalist, 100); SpaceMarine ezekyl = new SpaceMarine("Ezekyl Abaddon", "Black Legion", 500, SMRank.CaptainBrother, SMLoyalty.Traitor, 100); SpaceMarine sanguinius = new SpaceMarine("Sanguinius", "Blood angels", 999, SMRank.Primarch, SMLoyalty.Loyalist, 600); SpaceMarine lucius = new SpaceMarine("Lucius", "Emperor's children", 500, SMRank.SwordMaster, SMLoyalty.Traitor, 700); SpaceMarine[] spaceMarines = { gabriel, garro, ezekyl, sanguinius, lucius }; int max_id = spaceMarines.length; int num = 0; //input try { num = Integer.parseInt(input); } catch (NumberFormatException e) { System.out.println("unable to parse value"); } if (num >= max_id) { System.out.println("no such spacemarine, using Loken"); num = 0; } System.out.println("teleporting " + spaceMarines[num].getName() + "...."); Message<SpaceMarine> m = MessageBuilder.withPayload(spaceMarines[num]).build(); //context. String reply = (String) gateway.send(m); System.out.println(reply); } } System.out.println("Exiting application...bye."); System.exit(0); }
From source file:at.tuwien.ifs.feature.evaluation.SimilarityRetrievalWriter.java
public static void main(String[] args) throws SOMToolboxException, IOException { // register and parse all options JSAPResult config = OptionFactory.parseResults(args, OPTIONS); File inputVectorFile = config.getFile("inputVectorFile"); String outputDirStr = AbstractOptionFactory.getFilePath(config, "outputDirectory"); File outputDirBase = new File(outputDirStr); outputDirBase.mkdirs();//from w w w .j a va 2 s. com String metricName = config.getString("metric"); DistanceMetric metric = AbstractMetric.instantiateNice(metricName); int neighbours = config.getInt("numberNeighbours"); int startIndex = config.getInt("startIndex"); int numberItems = config.getInt("numberItems", -1); try { SOMLibSparseInputData data = new SOMLibSparseInputData(inputVectorFile.getAbsolutePath()); int endIndex = data.numVectors(); if (numberItems != -1) { if (startIndex + numberItems > endIndex) { System.out.println("Specified number of items (" + numberItems + ") exceeds maximum (" + data.numVectors() + "), limiting to " + (endIndex - startIndex) + "."); } else { endIndex = startIndex + numberItems; } } StdErrProgressWriter progress = new StdErrProgressWriter(endIndex - startIndex, "processing vector "); // SortedSet<InputDistance> distances; for (int inputDatumIndex = startIndex; inputDatumIndex < endIndex; inputDatumIndex++) { InputDatum inputDatum = data.getInputDatum(inputDatumIndex); String inputLabel = inputDatum.getLabel(); if (inputDatumIndex == -1) { throw new IllegalArgumentException( "Input with label '" + inputLabel + "' not found in vector file '" + inputVectorFile + "'; possible labels are: " + StringUtils.toString(data.getLabels(), 15)); } File outputDir = new File(outputDirBase, inputLabel.charAt(2) + "/" + inputLabel.charAt(3) + "/" + inputLabel.charAt(4)); outputDir.mkdirs(); File outputFile = new File(outputDir, inputLabel + ".txt"); boolean fileExistsAndValid = false; if (outputFile.exists()) { // check if it the valid data String linesInvalid = ""; int validLineCount = 0; ArrayList<String> lines = FileUtils.readLinesAsList(outputFile.getAbsolutePath()); for (String string : lines) { if (string.trim().length() == 0) { continue; } String[] parts = string.split("\t"); if (parts.length != 2) { linesInvalid += "Line '" + string + "' invalid - contains " + parts.length + " elements.\n"; } else if (!NumberUtils.isNumber(parts[1])) { linesInvalid = "Line '" + string + "' invalid - 2nd part is not a number.\n"; } else { validLineCount++; } } if (validLineCount != neighbours) { linesInvalid = "Not enough valid lines; expected " + neighbours + ", found " + validLineCount + ".\n"; } fileExistsAndValid = true; if (org.apache.commons.lang.StringUtils.isNotBlank(linesInvalid)) { System.out.println("File " + outputFile.getAbsolutePath() + " exists, but is not valid:\n" + linesInvalid); } } if (fileExistsAndValid) { Logger.getLogger("at.tuwien.ifs.feature.evaluation").finer( "File " + outputFile.getAbsolutePath() + " exists and is valid; not recomputing"); } else { PrintWriter p = new PrintWriter(outputFile); SmallestElementSet<InputDistance> distances = data.getNearestDistances(inputDatumIndex, neighbours, metric); for (InputDistance inputDistance : distances) { p.println(inputDistance.getInput().getLabel() + "\t" + inputDistance.getDistance()); } p.close(); } progress.progress(); } } catch (IllegalArgumentException e) { System.out.println(e.getMessage() + ". Aborting."); System.exit(-1); } }
From source file:ReplayTest.java
public static void main(String[] args) throws IOException { int cnt = 0;// ww w .j a va 2 s .c om String operateTime = ""; String operateType = ""; String uuid = ""; String programId = ""; List<String> lines = Files.readLines(new File("e:\\test\\sample4.txt"), Charsets.UTF_8); System.out.println(lines.size()); for (String value1 : lines) { String[] values = value1.split(SPLIT_T); //logArr16?operateDate=2014-04-25 17:59:59 621, operateType=STARTUP, deviceCode=010333501065233, versionId=, mac=10:48:b1:06:4d:23, platformId=00000032AmlogicMDZ-05-201302261821793, ipAddress=60.10.133.10 if (values.length != 16) { continue; } String logContent = values[15]; if (logContent == null || logContent.trim().length() <= 0) { System.out.println("logContent"); return; } String[] contentArr = logContent.split(COMMA_SIGN);//content if (contentArr == null || contentArr.length != 3) { System.out.println("logContentArr:" + contentArr.length); return; } StringBuffer stringBuffer = new StringBuffer(); //1.CNTVID?? stringBuffer.append(StringsUtils.getEncodeingStr(values[3])).append(SPLIT); //2.IP? if (null == values[7] || EMPTY.equals(values[7])) { stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)).append(SPLIT); } else { stringBuffer.append(StringsUtils.getEncodeingStr(values[7].trim())).append(SPLIT); } //3.OperateTtype ? 1: 2:? operateType = StringUtils.substringAfter(contentArr[0].trim(), EQUAL_SIGN); if (null == operateType || EMPTY.equals(operateType)) { stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)).append(SPLIT); } else if ("on".equals(operateType)) { stringBuffer.append(StringsUtils.getEncodeingStr("1")).append(SPLIT); } else if ("out".equals(operateType)) { stringBuffer.append(StringsUtils.getEncodeingStr("2")).append(SPLIT); } // 4.operateTime ? operateTime = DateUtil.convertDateToString("yyyyMMdd HHmmss", DateUtil.convertStringToDate("yyyy-MM-dd HH:mm:ss SSS", values[10].trim())); if (operateTime == null || EMPTY.equals(operateTime)) { stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)).append(SPLIT); } else { stringBuffer.append(StringsUtils.getEncodeingStr(operateTime)).append(SPLIT); } //5.url_addr ? stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)).append(SPLIT); //6.channel? uuid = StringUtils.substringAfter(contentArr[1].trim(), EQUAL_SIGN); if (uuid == null || EMPTY.equals(uuid)) { stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)).append(SPLIT); } else { stringBuffer.append(StringsUtils.getEncodeingStr(uuid)).append(SPLIT); } //7.programId id programId = StringUtils.substringAfter(contentArr[2].trim(), EQUAL_SIGN); if (!programId.matches("\\d+")) { //id???? return; } else { stringBuffer.append(StringsUtils.getEncodeingStr(programId)).append(SPLIT); } //8.EPGCode EPG?,?EPGCode? stringBuffer.append(StringsUtils.getEncodeingStr("06")).append(SPLIT); //9.DataSource??12 stringBuffer.append(DATA_SOURCE).append(SPLIT); //10.Fsource??????? stringBuffer.append(F_SOURCE).append(SPLIT); //11.resolution ?,? stringBuffer.append(StringsUtils.getEncodeingStr(EMPTY)); System.out.println(stringBuffer.toString()); cnt++; } System.out.println(":" + cnt); }
From source file:se.berazy.api.examples.App.java
/** * Operation examples./*from ww w . j a va 2 s . c om*/ * @param args */ public static void main(String[] args) { Scanner scanner = null; try { client = new BookkeepingClient(); System.out.println("Choose operation to invoke:\n"); System.out.println("1. Create invoice"); System.out.println("2. Credit invoice"); scanner = new Scanner(System.in); while (scanner.hasNextLine()) { String line = scanner.nextLine(); line = (line != null) ? line.trim().toLowerCase() : ""; if (line.equals("1")) { outPutResponse(createInvoice()); } else if (line.equals("2")) { outPutResponse(creditInvoice()); } else if (line.equals("q") || line.equals("quit") || line.equals("exit")) { System.exit(0); } else { System.out.println("\nPlease choose an operation from 1-7."); } } scanner.close(); } catch (Exception ex) { System.out.println(String.format( "\nAn exception occured, press CTRL+C to exit or enter 'q', 'quit' or 'exit'.\n\nException: %s %s", ex.getMessage(), ex.getStackTrace())); } finally { if (scanner != null) { scanner.close(); } } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step3AddRawDocumentsFromClueWeb.java
public static void main(String[] args) throws IOException { // input dir - list of xml query containers // step2a-retrieved-results File inputDir = new File(args[0]); // warc.bz file containing all required documents according to ClueWeb IDs // ltr-50queries-100docs-clueweb-export.warc.gz File warc = new File(args[1]); // output dir File outputDir = new File(args[2]); if (!outputDir.exists()) { outputDir.mkdirs();// ww w . j a va 2s . c o m } // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); // iterate over warc for each query WARCFileReader reader = new WARCFileReader(new Configuration(), new Path(warc.getAbsolutePath())); try { while (true) { WARCRecord read = reader.read(); String trecId = read.getHeader().getField("WARC-TREC-ID"); // now iterate over retrieved results for the query and find matching IDs for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { if (rankedResults.clueWebID.equals(trecId)) { // add the raw html content String fullHTTPResponse = new String(read.getContent(), "utf-8"); // TODO fix coding? String html = removeHTTPHeaders(fullHTTPResponse); rankedResults.originalHtml = sanitizeXmlChars(html.trim()); } } } } catch (EOFException e) { // end of file } // check if all results have filled html for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { if (rankedResults.originalHtml == null) { System.err.println("Missing original html for\t" + rankedResults.clueWebID + ", setting relevance to false"); rankedResults.relevant = Boolean.FALSE.toString(); } } // and save the query to output dir File outputFile = new File(outputDir, queryResultContainer.qID + ".xml"); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }
From source file:SequentialPersonalizedPageRank.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(//from w w w . jav a2 s . c o m OptionBuilder.withArgName("val").hasArg().withDescription("random jump factor").create(JUMP)); options.addOption(OptionBuilder.withArgName("node").hasArg() .withDescription("source node (i.e., destination of the random jump)").create(SOURCE)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCE)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(SequentialPersonalizedPageRank.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String infile = cmdline.getOptionValue(INPUT); final String source = cmdline.getOptionValue(SOURCE); float alpha = cmdline.hasOption(JUMP) ? Float.parseFloat(cmdline.getOptionValue(JUMP)) : 0.15f; int edgeCnt = 0; DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); BufferedReader data = new BufferedReader(new InputStreamReader(new FileInputStream(infile))); String line; while ((line = data.readLine()) != null) { line.trim(); String[] arr = line.split("\\t"); for (int i = 1; i < arr.length; i++) { graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); } } data.close(); if (!graph.containsVertex(source)) { System.err.println("Error: source node not found in the graph!"); System.exit(-1); } WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<String, Integer>(); Set<Set<String>> components = clusterer.transform(graph); int numComponents = components.size(); System.out.println("Number of components: " + numComponents); System.out.println("Number of edges: " + graph.getEdgeCount()); System.out.println("Number of nodes: " + graph.getVertexCount()); System.out.println("Random jump factor: " + alpha); // Compute personalized PageRank. PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<String, Integer>(graph, new Transformer<String, Double>() { @Override public Double transform(String vertex) { return vertex.equals(source) ? 1.0 : 0; } }, alpha); ranker.evaluate(); // Use priority queue to sort vertices by PageRank values. PriorityQueue<Ranking<String>> q = new PriorityQueue<Ranking<String>>(); int i = 0; for (String pmid : graph.getVertices()) { q.add(new Ranking<String>(i++, ranker.getVertexScore(pmid), pmid)); } // Print PageRank values. System.out.println("\nPageRank of nodes, in descending order:"); Ranking<String> r = null; while ((r = q.poll()) != null) { System.out.println(r.rankScore + "\t" + r.getRanked()); } }
From source file:edu.umd.shrawanraina.SequentialPersonalizedPageRank.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(//from www . j a v a2 s . co m OptionBuilder.withArgName("val").hasArg().withDescription("random jump factor").create(JUMP)); options.addOption(OptionBuilder.withArgName("node").hasArg() .withDescription("source node (i.e., destination of the random jump)").create(SOURCE)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCE)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(SequentialPersonalizedPageRank.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String infile = cmdline.getOptionValue(INPUT); final String source = cmdline.getOptionValue(SOURCE); float alpha = cmdline.hasOption(JUMP) ? Float.parseFloat(cmdline.getOptionValue(JUMP)) : 0.15f; int edgeCnt = 0; DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>(); BufferedReader data = new BufferedReader(new InputStreamReader(new FileInputStream(infile))); String line; while ((line = data.readLine()) != null) { line.trim(); String[] arr = line.split("\\t"); for (int i = 1; i < arr.length; i++) { graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]); } } data.close(); if (!graph.containsVertex(source)) { System.err.println("Error: source node not found in the graph!"); System.exit(-1); } WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<String, Integer>(); Set<Set<String>> components = clusterer.transform(graph); int numComponents = components.size(); System.out.println("Number of components: " + numComponents); System.out.println("Number of edges: " + graph.getEdgeCount()); System.out.println("Number of nodes: " + graph.getVertexCount()); System.out.println("Random jump factor: " + alpha); // Compute personalized PageRank. PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<String, Integer>(graph, new Transformer<String, Double>() { public Double transform(String vertex) { return vertex.equals(source) ? 1.0 : 0; } }, alpha); ranker.evaluate(); // Use priority queue to sort vertices by PageRank values. PriorityQueue<Ranking<String>> q = new PriorityQueue<Ranking<String>>(); int i = 0; for (String pmid : graph.getVertices()) { q.add(new Ranking<String>(i++, ranker.getVertexScore(pmid), pmid)); } // Print PageRank values. System.out.println("\nPageRank of nodes, in descending order:"); Ranking<String> r = null; while ((r = q.poll()) != null) { System.out.println(r.rankScore + "\t" + r.getRanked()); } }
From source file:com.wittawat.wordseg.Main.java
public static void main(String[] args) throws Exception { Console con = System.console(); if (con == null) { System.out.println("The system must support console to run the program."); System.exit(1);//from w w w .ja va 2s.com } // Load model System.out.println("Loading model ..."); Classifier model = Data.getDefaultModel(); System.out.println("Finished loading model."); System.out.println(getAgreement()); boolean isUseDict = true; // Dummy statement to eliminate all lazy loading System.out.println("\n" + new NukeTokenizer3( "?????", model, isUseDict).tokenize() + "\n"); System.out.println(getHelp()); final String SET_DICT_PAT_STR = "\\s*set\\s+dict\\s+(true|false)\\s*"; final Pattern SET_DICT_PAT = Pattern.compile(SET_DICT_PAT_STR); while (true) { System.out.print(">> "); String line = con.readLine(); if (line != null && !line.trim().equals("")) { line = line.trim(); try { if (line.equals("h") || line.equals("help")) { System.out.println(getHelp()); } else if (line.equals("about")) { System.out.println(getAbout()); } else if (line.equals("agreement")) { System.out.println(getAgreement()); } else if (SET_DICT_PAT.matcher(line).find()) { Matcher m = SET_DICT_PAT.matcher(line); m.find(); String v = m.group(1); isUseDict = v.equals("true"); System.out.println("Dictionary will " + (isUseDict ? "" : "not ") + "be used."); } else if (line.matches("q|quit|exit")) { System.out.println("Bye"); System.exit(0); } else if (line.contains(":tokfile:")) { String[] splits = line.split(":tokfile:"); String in = splits[0]; String out = splits[1]; String content = FileUtils.readFileToString(new File(in)); long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println("Time to tokenize: " + (end - start) + " ms."); FileUtils.writeStringToFile(new File(out), tokenized); } else if (line.contains(":tokfile")) { String[] splits = line.split(":tokfile"); String in = splits[0]; String content = FileUtils.readFileToString(new File(in)); long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(content, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println(tokenized); System.out.println("Time to tokenize: " + (end - start) + " ms."); } else if (line.contains(":tok:")) { String[] splits = line.split(":tok:"); String inText = splits[0]; String out = splits[1]; long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println("Time to tokenize: " + (end - start) + " ms."); FileUtils.writeStringToFile(new File(out), tokenized); } else if (line.contains(":tok")) { String[] splits = line.split(":tok"); String inText = splits[0]; long start = new Date().getTime(); NukeTokenizer tokenizer = new NukeTokenizer3(inText, model, isUseDict); String tokenized = tokenizer.tokenize(); long end = new Date().getTime(); System.out.println(tokenized); System.out.println("Time to tokenize: " + (end - start) + " ms."); } else { System.out.println("Unknown command"); } } catch (Exception e) { System.out.println("Error. See the exception."); e.printStackTrace(); } } } }
From source file:de.tudarmstadt.ukp.argumentation.data.roomfordebate.DataFetcher.java
public static void main(String[] args) throws Exception { File crawledPagesFolder = new File(args[0]); if (!crawledPagesFolder.exists()) { crawledPagesFolder.mkdirs();//from w ww . j a v a2s . co m } File outputFolder = new File(args[1]); if (!outputFolder.exists()) { outputFolder.mkdirs(); } // read links from text file final String urlsResourceName = "roomfordebate-urls.txt"; InputStream urlsStream = DataFetcher.class.getClassLoader().getResourceAsStream(urlsResourceName); if (urlsStream == null) { throw new IOException("Cannot find resource " + urlsResourceName + " on the classpath"); } // read list of urls List<String> urls = new ArrayList<>(); LineIterator iterator = IOUtils.lineIterator(urlsStream, "utf-8"); while (iterator.hasNext()) { // ignore commented url (line starts with #) String line = iterator.nextLine(); if (!line.startsWith("#") && !line.trim().isEmpty()) { urls.add(line.trim()); } } // download all crawlPages(urls, crawledPagesFolder); List<File> files = new ArrayList<>(FileUtils.listFiles(crawledPagesFolder, null, false)); Collections.sort(files, new Comparator<File>() { @Override public int compare(File o1, File o2) { return o1.getName().compareTo(o2.getName()); } }); int idCounter = 0; for (File file : files) { NYTimesCommentsScraper commentsScraper = new NYTimesCommentsScraper(); NYTimesArticleExtractor extractor = new NYTimesArticleExtractor(); String html = FileUtils.readFileToString(file, "utf-8"); idCounter++; File outputFileArticle = new File(outputFolder, String.format("Cx%03d.txt", idCounter)); File outputFileComments = new File(outputFolder, String.format("Dx%03d.txt", idCounter)); try { List<Comment> comments = commentsScraper.extractComments(html); Article article = extractor.extractArticle(html); saveArticleToText(article, outputFileArticle); System.out.println("Saved to " + outputFileArticle); saveCommentsToText(comments, outputFileComments, article); System.out.println("Saved to " + outputFileComments); } catch (IOException ex) { System.err.println(file.getName() + "\n" + ex.getMessage()); } } }