List of usage examples for java.io OutputStreamWriter OutputStreamWriter
public OutputStreamWriter(OutputStream out, CharsetEncoder enc)
From source file:gobblin.compaction.CompactionRunner.java
public static void main(String[] args) throws ConfigurationException, IOException, SQLException { if (args.length != 1) { LOG.info("Proper usage: java -jar compaction.jar <global-config-file>\n" + "or\n" + "hadoop jar compaction.jar <global-config-file>\n" + "or\n" + "yarn jar compaction.jar <global-config-file>\n"); System.exit(1);/*from ww w.j a va 2 s. co m*/ } Configuration globalConfig = new PropertiesConfiguration(args[0]); properties = ConfigurationConverter.getProperties(globalConfig); File compactionConfigDir = new File(properties.getProperty(COMPACTION_CONFIG_DIR)); File[] listOfFiles = compactionConfigDir.listFiles(); if (listOfFiles == null || listOfFiles.length == 0) { System.err.println("No compaction configuration files found under " + compactionConfigDir); System.exit(1); } int numOfJobs = 0; for (File file : listOfFiles) { if (file.isFile() && !file.getName().startsWith(".")) { numOfJobs++; } } LOG.info("Found " + numOfJobs + " compaction tasks."); PrintWriter pw = new PrintWriter(new OutputStreamWriter( new FileOutputStream(properties.getProperty(TIMING_FILE, TIMING_FILE_DEFAULT)), Charset.forName("UTF-8"))); for (File file : listOfFiles) { if (file.isFile() && !file.getName().startsWith(".")) { Configuration jobConfig = new PropertiesConfiguration(file.getAbsolutePath()); jobProperties = ConfigurationConverter.getProperties(jobConfig); long startTime = System.nanoTime(); compact(); long endTime = System.nanoTime(); long elapsedTime = endTime - startTime; double seconds = TimeUnit.NANOSECONDS.toSeconds(elapsedTime); pw.printf("%s: %f%n", file.getAbsolutePath(), seconds); } } pw.close(); }
From source file:cht.Parser.java
public static void main(String[] args) throws IOException { // TODO get from google drive boolean isUnicode = false; boolean isRemoveInputFileOnComplete = false; int rowNum;/* w w w . ja va 2 s . c o m*/ int colNum; Gson gson = new GsonBuilder().setPrettyPrinting().create(); Properties prop = new Properties(); try { prop.load(new FileInputStream("config.txt")); } catch (IOException ex) { ex.printStackTrace(); } String inputFilePath = prop.getProperty("inputFile"); String outputDirectory = prop.getProperty("outputDirectory"); System.out.println(outputDirectory); // optional String unicode = prop.getProperty("unicode"); String removeInputFileOnComplete = prop.getProperty("removeInputFileOnComplete"); inputFilePath = inputFilePath.trim(); outputDirectory = outputDirectory.trim(); if (unicode != null) { isUnicode = Boolean.parseBoolean(unicode.trim()); } if (removeInputFileOnComplete != null) { isRemoveInputFileOnComplete = Boolean.parseBoolean(removeInputFileOnComplete.trim()); } Writer out = null; FileInputStream in = null; final String newLine = System.getProperty("line.separator").toString(); final String separator = File.separator; try { in = new FileInputStream(inputFilePath); Workbook workbook = new XSSFWorkbook(in); Sheet sheet = workbook.getSheetAt(0); rowNum = sheet.getLastRowNum() + 1; colNum = sheet.getRow(0).getPhysicalNumberOfCells(); for (int j = 1; j < colNum; ++j) { String outputFilename = sheet.getRow(0).getCell(j).getStringCellValue(); // guess directory int slash = outputFilename.indexOf('/'); if (slash != -1) { // has directory outputFilename = outputFilename.substring(0, slash) + separator + outputFilename.substring(slash + 1); } String outputPath = FilenameUtils.concat(outputDirectory, outputFilename); System.out.println("--Writing " + outputPath); out = new OutputStreamWriter(new FileOutputStream(outputPath), "UTF-8"); TreeMap<String, Object> map = new TreeMap<String, Object>(); for (int i = 1; i < rowNum; i++) { try { String key = sheet.getRow(i).getCell(0).getStringCellValue(); //String value = ""; Cell tmp = sheet.getRow(i).getCell(j); if (tmp != null) { // not empty string! value = sheet.getRow(i).getCell(j).getStringCellValue(); } if (!key.equals("") && !key.startsWith("#") && !key.startsWith(".")) { value = isUnicode ? StringEscapeUtils.escapeJava(value) : value; int firstdot = key.indexOf("."); String keyName, keyAttribute; if (firstdot > 0) {// a.b.c.d keyName = key.substring(0, firstdot); // a keyAttribute = key.substring(firstdot + 1); // b.c.d TreeMap oldhash = null; Object old = null; if (map.get(keyName) != null) { old = map.get(keyName); if (old instanceof TreeMap == false) { System.out.println("different type of key:" + key); continue; } oldhash = (TreeMap) old; } else { oldhash = new TreeMap(); } int firstdot2 = keyAttribute.indexOf("."); String rootName, childName; if (firstdot2 > 0) {// c, d.f --> d, f rootName = keyAttribute.substring(0, firstdot2); childName = keyAttribute.substring(firstdot2 + 1); } else {// c, d -> d, null rootName = keyAttribute; childName = null; } TreeMap<String, Object> object = myPut(oldhash, rootName, childName); map.put(keyName, object); } else {// c, d -> d, null keyName = key; keyAttribute = null; // simple string mode map.put(key, value); } } } catch (Exception e) { // just ingore empty rows } } String json = gson.toJson(map); // output json out.write(json + newLine); out.close(); } in.close(); System.out.println("\n---Complete!---"); System.out.println("Read input file from " + inputFilePath); System.out.println(colNum - 1 + " output files ate generated at " + outputDirectory); System.out.println(rowNum + " records are generated for each output file."); System.out.println("output file is ecoded as unicode? " + (isUnicode ? "yes" : "no")); if (isRemoveInputFileOnComplete) { File input = new File(inputFilePath); input.deleteOnExit(); System.out.println("Deleted " + inputFilePath); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (in != null) { in.close(); } } }
From source file:com.jaspersoft.jasperserver.export.RemoveDuplicatedDisplayName.java
public static void main(String[] args) { Parameters params = null;// w w w.java2 s. c o m boolean success = false; try { GenericApplicationContext ctx = new GenericApplicationContext(); XmlBeanDefinitionReader configReader = new XmlBeanDefinitionReader(ctx); List resourceXML = getPaths(args[2]); if (args != null && args.length > 0) { for (int i = 0; i < resourceXML.size(); i++) { org.springframework.core.io.Resource resource = classPathResourceFactory .create((String) resourceXML.get(i)); configReader.loadBeanDefinitions(resource); } } ctx.refresh(); if (args.length > 3) { if ("UPDATE".equals(args[3])) { updateRepo = true; } } // write to file // try { CommandBean commandBean = (CommandBean) ctx.getBean("removeDuplicateDisplayName", CommandBean.class); Charset encoding = Charset.forName( ((RemoveDuplicatedDisplayName) commandBean).getEncodingProvider().getCharacterEncoding()); osw = new OutputStreamWriter(new FileOutputStream("remove_duplicated_display_name_report.txt"), encoding); commandBean.process(params); } finally { osw.close(); } success = true; } catch (Exception e) { e.printStackTrace(System.err); } System.exit(success ? 0 : -1); }
From source file:json_to_xml_1.java
public static void main(String args[]) { System.out.print("json_to_xml_1 workflow Copyright (C) 2016 Stephan Kreutzer\n" + "This program comes with ABSOLUTELY NO WARRANTY.\n" + "This is free software, and you are welcome to redistribute it\n" + "under certain conditions. See the GNU Affero General Public License 3\n" + "or any later version for details. Also, see the source code repository\n" + "https://github.com/publishing-systems/digital_publishing_workflow_tools/ and\n" + "the project website http://www.publishing-systems.org.\n\n"); json_to_xml_1 converter = json_to_xml_1.getInstance(); converter.getInfoMessages().clear(); try {//w w w .j ava2s.c om converter.execute(args); } catch (ProgramTerminationException ex) { converter.handleTermination(ex); } if (converter.resultInfoFile != null) { try { BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(converter.resultInfoFile), "UTF-8")); writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); writer.write( "<!-- This file was created by json_to_xml_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://github.com/publishing-systems/digital_publishing_workflow_tools/ and http://www.publishing-systems.org). -->\n"); writer.write("<json-to-xml-1-result-information>\n"); if (converter.getInfoMessages().size() <= 0) { writer.write(" <success/>\n"); } else { writer.write(" <success>\n"); writer.write(" <info-messages>\n"); for (int i = 0, max = converter.getInfoMessages().size(); i < max; i++) { InfoMessage infoMessage = converter.getInfoMessages().get(i); writer.write(" <info-message number=\"" + i + "\">\n"); writer.write(" <timestamp>" + infoMessage.getTimestamp() + "</timestamp>\n"); String infoMessageText = infoMessage.getMessage(); String infoMessageId = infoMessage.getId(); String infoMessageBundle = infoMessage.getBundle(); Object[] infoMessageArguments = infoMessage.getArguments(); if (infoMessageBundle != null) { // Ampersand needs to be the first, otherwise it would double-encode // other entities. infoMessageBundle = infoMessageBundle.replaceAll("&", "&"); infoMessageBundle = infoMessageBundle.replaceAll("<", "<"); infoMessageBundle = infoMessageBundle.replaceAll(">", ">"); writer.write(" <id-bundle>" + infoMessageBundle + "</id-bundle>\n"); } if (infoMessageId != null) { // Ampersand needs to be the first, otherwise it would double-encode // other entities. infoMessageId = infoMessageId.replaceAll("&", "&"); infoMessageId = infoMessageId.replaceAll("<", "<"); infoMessageId = infoMessageId.replaceAll(">", ">"); writer.write(" <id>" + infoMessageId + "</id>\n"); } if (infoMessageText != null) { // Ampersand needs to be the first, otherwise it would double-encode // other entities. infoMessageText = infoMessageText.replaceAll("&", "&"); infoMessageText = infoMessageText.replaceAll("<", "<"); infoMessageText = infoMessageText.replaceAll(">", ">"); writer.write(" <message>" + infoMessageText + "</message>\n"); } if (infoMessageArguments != null) { writer.write(" <arguments>\n"); int argumentCount = infoMessageArguments.length; for (int j = 0; j < argumentCount; j++) { if (infoMessageArguments[j] == null) { writer.write(" <argument number=\"" + j + "\">\n"); writer.write(" <class></class>\n"); writer.write(" <value>null</value>\n"); writer.write(" </argument>\n"); continue; } String className = infoMessageArguments[j].getClass().getName(); // Ampersand needs to be the first, otherwise it would double-encode // other entities. className = className.replaceAll("&", "&"); className = className.replaceAll("<", "<"); className = className.replaceAll(">", ">"); String value = infoMessageArguments[j].toString(); // Ampersand needs to be the first, otherwise it would double-encode // other entities. value = value.replaceAll("&", "&"); value = value.replaceAll("<", "<"); value = value.replaceAll(">", ">"); writer.write(" <argument number=\"" + j + "\">\n"); writer.write(" <class>" + className + "</class>\n"); writer.write(" <value>" + value + "</value>\n"); writer.write(" </argument>\n"); } writer.write(" </arguments>\n"); } Exception exception = infoMessage.getException(); if (exception != null) { writer.write(" <exception>\n"); String className = exception.getClass().getName(); // Ampersand needs to be the first, otherwise it would double-encode // other entities. className = className.replaceAll("&", "&"); className = className.replaceAll("<", "<"); className = className.replaceAll(">", ">"); writer.write(" <class>" + className + "</class>\n"); StringWriter stringWriter = new StringWriter(); PrintWriter printWriter = new PrintWriter(stringWriter); exception.printStackTrace(printWriter); String stackTrace = stringWriter.toString(); // Ampersand needs to be the first, otherwise it would double-encode // other entities. stackTrace = stackTrace.replaceAll("&", "&"); stackTrace = stackTrace.replaceAll("<", "<"); stackTrace = stackTrace.replaceAll(">", ">"); writer.write(" <stack-trace>" + stackTrace + "</stack-trace>\n"); writer.write(" </exception>\n"); } writer.write(" </info-message>\n"); } writer.write(" </info-messages>\n"); writer.write(" </success>\n"); } writer.write("</json-to-xml-1-result-information>\n"); writer.flush(); writer.close(); } catch (FileNotFoundException ex) { ex.printStackTrace(); System.exit(-1); } catch (UnsupportedEncodingException ex) { ex.printStackTrace(); System.exit(-1); } catch (IOException ex) { ex.printStackTrace(); System.exit(-1); } } converter.getInfoMessages().clear(); converter.resultInfoFile = null; }
From source file:com.tamingtext.tagging.LuceneTagExtractor.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription(/*from w ww. j ava 2 s .co m*/ "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(fieldOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } File file = new File(cmdLine.getValue(inputOpt).toString()); if (!file.isDirectory()) { throw new IllegalArgumentException(file + " does not exist or is not a directory"); } long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String field = cmdLine.getValue(fieldOpt).toString(); PrintWriter out = null; if (cmdLine.hasOption(outputOpt)) { out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString())); } else { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } File output = new File("/home/drew/taming-text/delicious/training"); output.mkdirs(); emitTextForTags(file, output); IOUtils.close(Collections.singleton(out)); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription(//www .ja v a2 s. com "The maximum number of documents to analyze. If not specified, then it will loop over all docs") .withShortName("m").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(fieldOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } File inputDir = new File(cmdLine.getValue(inputOpt).toString()); if (!inputDir.isDirectory()) { throw new IllegalArgumentException(inputDir + " does not exist or is not a directory"); } long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String field = cmdLine.getValue(fieldOpt).toString(); PrintWriter out = null; if (cmdLine.hasOption(outputOpt)) { out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString())); } else { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } dumpDocumentFields(inputDir, field, maxDocs, out); IOUtils.close(Collections.singleton(out)); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:com.ibm.watson.catalyst.corpus.tfidf.CorpusTfidf.java
public static void main(String[] args) { PROPERTIES = BaseProperties.setInstance(args, "sample/test.properties"); String input = PROPERTIES.getProperty("input", "sample/test-check.json"); TermCorpusBuilder cb = new TermCorpusBuilder(); cb.setJson(input);/*from w w w .ja va 2 s.c o m*/ System.out.println("Building corpus."); TermCorpus c = cb.build(); System.out.println(c.size()); System.out.println("Generating terms."); c.genTerms(); System.out.println("Generating idfs."); c.genIdfs(); System.out.println(c.numTerms()); System.out.println("Terms generated."); ObjectNode tfidfs = getCorpusTfidfs(c); String output = PROPERTIES.getProperty("output", "sample/test-tfidf-output.json"); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(output), "UTF-8"))) { bw.write(tfidfs.toString()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.webkruscht.wmt.DownloadFiles.java
/** * @param args/*from w w w . j a v a2 s. c om*/ * @throws Exception */ public static void main(String[] args) throws Exception { WebmasterTools wmt; String filename; Date date = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); String today = sdf.format(date); getProperties(); Options options = getOptions(args); try { wmt = new WebmasterTools(username, password); for (SitesEntry entry : wmt.getUserSites()) { // only process verified sites if (entry.getVerified()) { // get download paths for site JSONObject data = wmt.getDownloadList(entry); if (data != null) { for (String prop : props) { String path = (String) data.get("TOP_QUERIES"); path += "&prop=" + prop; URL url = new URL(entry.getTitle().getPlainText()); if (options.getStartdate() != null) { path += "&db=" + options.getStartdate(); path += "&de=" + options.getEnddate(); filename = String.format("%s-%s-%s-%s-%s.csv", url.getHost(), options.getStartdate(), options.getEnddate(), prop, "TopQueries"); } else { filename = String.format("%s-%s-%s-%s.csv", url.getHost(), today, prop, "TopQueries"); } OutputStreamWriter out = new OutputStreamWriter( new FileOutputStream(filePath + filename), "UTF-8"); wmt.downloadData(path, out); out.close(); } String path = (String) data.get("TOP_PAGES"); URL url = new URL(entry.getTitle().getPlainText()); filename = String.format("%s-%s-%s.csv", url.getHost(), today, "TopQueries"); OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(filePath + filename), "UTF-8"); wmt.downloadData(path, out); out.close(); } } } } catch (Exception e) { e.printStackTrace(); throw e; } }
From source file:com.joliciel.jochre.search.JochreSearch.java
/** * @param args// ww w .j a v a 2 s . com */ public static void main(String[] args) { try { Map<String, String> argMap = new HashMap<String, String>(); for (String arg : args) { int equalsPos = arg.indexOf('='); String argName = arg.substring(0, equalsPos); String argValue = arg.substring(equalsPos + 1); argMap.put(argName, argValue); } String command = argMap.get("command"); argMap.remove("command"); String logConfigPath = argMap.get("logConfigFile"); if (logConfigPath != null) { argMap.remove("logConfigFile"); Properties props = new Properties(); props.load(new FileInputStream(logConfigPath)); PropertyConfigurator.configure(props); } LOG.debug("##### Arguments:"); for (Entry<String, String> arg : argMap.entrySet()) { LOG.debug(arg.getKey() + ": " + arg.getValue()); } SearchServiceLocator locator = SearchServiceLocator.getInstance(); SearchService searchService = locator.getSearchService(); if (command.equals("buildIndex")) { String indexDirPath = argMap.get("indexDir"); String documentDirPath = argMap.get("documentDir"); File indexDir = new File(indexDirPath); indexDir.mkdirs(); File documentDir = new File(documentDirPath); JochreIndexBuilder builder = searchService.getJochreIndexBuilder(indexDir); builder.updateDocument(documentDir); } else if (command.equals("updateIndex")) { String indexDirPath = argMap.get("indexDir"); String documentDirPath = argMap.get("documentDir"); boolean forceUpdate = false; if (argMap.containsKey("forceUpdate")) { forceUpdate = argMap.get("forceUpdate").equals("true"); } File indexDir = new File(indexDirPath); indexDir.mkdirs(); File documentDir = new File(documentDirPath); JochreIndexBuilder builder = searchService.getJochreIndexBuilder(indexDir); builder.updateIndex(documentDir, forceUpdate); } else if (command.equals("search")) { HighlightServiceLocator highlightServiceLocator = HighlightServiceLocator.getInstance(locator); HighlightService highlightService = highlightServiceLocator.getHighlightService(); String indexDirPath = argMap.get("indexDir"); File indexDir = new File(indexDirPath); JochreQuery query = searchService.getJochreQuery(argMap); JochreIndexSearcher searcher = searchService.getJochreIndexSearcher(indexDir); TopDocs topDocs = searcher.search(query); Set<Integer> docIds = new LinkedHashSet<Integer>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { docIds.add(scoreDoc.doc); } Set<String> fields = new HashSet<String>(); fields.add("text"); Highlighter highlighter = highlightService.getHighlighter(query, searcher.getIndexSearcher()); HighlightManager highlightManager = highlightService .getHighlightManager(searcher.getIndexSearcher()); highlightManager.setDecimalPlaces(query.getDecimalPlaces()); highlightManager.setMinWeight(0.0); highlightManager.setIncludeText(true); highlightManager.setIncludeGraphics(true); Writer out = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)); if (command.equals("highlight")) { highlightManager.highlight(highlighter, docIds, fields, out); } else { highlightManager.findSnippets(highlighter, docIds, fields, out); } } else { throw new RuntimeException("Unknown command: " + command); } } catch (RuntimeException e) { LogUtils.logError(LOG, e); throw e; } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:de.tudarmstadt.ukp.csniper.resbuild.EvaluationItemFixer.java
public static void main(String[] args) { connect(HOST, DATABASE, USER, PASSWORD); Map<Integer, String> items = new HashMap<Integer, String>(); Map<Integer, String> failed = new HashMap<Integer, String>(); // fetch coveredTexts of dubious items and clean it PreparedStatement select = null; try {/*w w w. j av a 2s . c o m*/ StringBuilder selectQuery = new StringBuilder(); selectQuery.append("SELECT * FROM EvaluationItem "); selectQuery.append("WHERE LOCATE(coveredText, ' ') > 0 "); selectQuery.append("OR LOCATE('" + LRB + "', coveredText) > 0 "); selectQuery.append("OR LOCATE('" + RRB + "', coveredText) > 0 "); selectQuery.append("OR LEFT(coveredText, 1) = ' ' "); selectQuery.append("OR RIGHT(coveredText, 1) = ' ' "); select = connection.prepareStatement(selectQuery.toString()); log.info("Running query [" + selectQuery.toString() + "]."); ResultSet rs = select.executeQuery(); while (rs.next()) { int id = rs.getInt("id"); String coveredText = rs.getString("coveredText"); try { // special handling of double whitespace: in this case, re-fetch the text if (coveredText.contains(" ")) { coveredText = retrieveCoveredText(rs.getString("collectionId"), rs.getString("documentId"), rs.getInt("beginOffset"), rs.getInt("endOffset")); } // replace bracket placeholders and trim the text coveredText = StringUtils.replace(coveredText, LRB, "("); coveredText = StringUtils.replace(coveredText, RRB, ")"); coveredText = coveredText.trim(); items.put(id, coveredText); } catch (IllegalArgumentException e) { failed.put(id, e.getMessage()); } } } catch (SQLException e) { log.error("Exception while selecting: " + e.getMessage()); } finally { closeQuietly(select); } // write logs BufferedWriter bwf = null; BufferedWriter bws = null; try { bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(LOG_FAILED)), "UTF-8")); for (Entry<Integer, String> e : failed.entrySet()) { bwf.write(e.getKey() + " - " + e.getValue() + "\n"); } bws = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(LOG_SUCCESSFUL)), "UTF-8")); for (Entry<Integer, String> e : items.entrySet()) { bws.write(e.getKey() + " - " + e.getValue() + "\n"); } } catch (IOException e) { log.error("Got an IOException while writing the log files."); } finally { IOUtils.closeQuietly(bwf); IOUtils.closeQuietly(bws); } log.info("Texts for [" + items.size() + "] items need to be cleaned up."); // update the dubious items with the cleaned coveredText PreparedStatement update = null; try { String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?"; update = connection.prepareStatement(updateQuery); int i = 0; for (Entry<Integer, String> e : items.entrySet()) { int id = e.getKey(); String coveredText = e.getValue(); // update item in database update.setString(1, coveredText); update.setInt(2, id); update.executeUpdate(); log.debug("Updating " + id + " with [" + coveredText + "]"); // show percentage of updated items i++; int part = (int) Math.ceil((double) items.size() / 100); if (i % part == 0) { log.info(i / part + "% finished (" + i + "/" + items.size() + ")."); } } } catch (SQLException e) { log.error("Exception while updating: " + e.getMessage()); } finally { closeQuietly(update); } closeQuietly(connection); }