List of usage examples for org.apache.commons.lang StringUtils normalizeSpace
public static String normalizeSpace(String str)
From source file:de.tudarmstadt.ukp.csniper.resbuild.EvaluationItemFixer2.java
public static void main(String[] args) { connect(HOST, DATABASE, USER, PASSWORD); Map<Integer, String> items = new HashMap<Integer, String>(); Map<Integer, String> failed = new HashMap<Integer, String>(); // fetch coveredTexts of dubious items and clean it PreparedStatement select = null; PreparedStatement update = null; try {//w w w. ja v a2 s . c o m StringBuilder selectQuery = new StringBuilder(); selectQuery.append("SELECT * FROM cachedparse WHERE pennTree = 'ERROR' OR pennTree = ''"); select = connection.prepareStatement(selectQuery.toString()); log.info("Running query [" + selectQuery.toString() + "]."); ResultSet rs = select.executeQuery(); // CSVWriter writer; String text; JCas jcas = JCasFactory.createJCas(); String updateQuery = "UPDATE CachedParse SET pennTree = ? WHERE collectionId = ? AND documentId = ? AND beginOffset = ? AND endOffset = ?"; update = connection.prepareStatement(updateQuery); // File base = new File(""); AnalysisEngine sentences = createEngine(DummySentenceSplitter.class); AnalysisEngine tokenizer = createEngine(StanfordSegmenter.class, StanfordSegmenter.PARAM_CREATE_SENTENCES, false, StanfordSegmenter.PARAM_CREATE_TOKENS, true); AnalysisEngine parser = createEngine(StanfordParser.class, StanfordParser.PARAM_WRITE_CONSTITUENT, true, // StanfordParser.PARAM_CREATE_DEPENDENCY_TAGS, true, StanfordParser.PARAM_WRITE_PENN_TREE, true, StanfordParser.PARAM_LANGUAGE, "en", StanfordParser.PARAM_VARIANT, "factored"); while (rs.next()) { String collectionId = rs.getString("collectionId"); String documentId = rs.getString("documentId"); int beginOffset = rs.getInt("beginOffset"); int endOffset = rs.getInt("endOffset"); text = retrieveCoveredText(collectionId, documentId, beginOffset, endOffset); jcas.setDocumentText(text); jcas.setDocumentLanguage("en"); sentences.process(jcas); tokenizer.process(jcas); parser.process(jcas); // writer = new CSVWriter(new FileWriter(new File(base, documentId + ".csv")); System.out.println("Updating " + text); for (PennTree p : JCasUtil.select(jcas, PennTree.class)) { String tree = StringUtils.normalizeSpace(p.getPennTree()); update.setString(1, tree); update.setString(2, collectionId); update.setString(3, documentId); update.setInt(4, beginOffset); update.setInt(5, endOffset); update.executeUpdate(); System.out.println("with tree " + tree); break; } jcas.reset(); } } catch (SQLException e) { log.error("Exception while selecting: " + e.getMessage()); } catch (UIMAException e) { e.printStackTrace(); } finally { closeQuietly(select); closeQuietly(update); } // write logs // BufferedWriter bwf = null; // BufferedWriter bws = null; // try { // bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( // LOG_FAILED)), "UTF-8")); // for (Entry<Integer, String> e : failed.entrySet()) { // bwf.write(e.getKey() + " - " + e.getValue() + "\n"); // } // // bws = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( // LOG_SUCCESSFUL)), "UTF-8")); // for (Entry<Integer, String> e : items.entrySet()) { // bws.write(e.getKey() + " - " + e.getValue() + "\n"); // } // } // catch (IOException e) { // log.error("Got an IOException while writing the log files."); // } // finally { // IOUtils.closeQuietly(bwf); // IOUtils.closeQuietly(bws); // } log.info("Texts for [" + items.size() + "] items need to be cleaned up."); // update the dubious items with the cleaned coveredText // PreparedStatement update = null; // try { // String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?"; // // update = connection.prepareStatement(updateQuery); // int i = 0; // for (Entry<Integer, String> e : items.entrySet()) { // int id = e.getKey(); // String coveredText = e.getValue(); // // // update item in database // update.setString(1, coveredText); // update.setInt(2, id); // update.executeUpdate(); // log.debug("Updating " + id + " with [" + coveredText + "]"); // // // show percentage of updated items // i++; // int part = (int) Math.ceil((double) items.size() / 100); // if (i % part == 0) { // log.info(i / part + "% finished (" + i + "/" + items.size() + ")."); // } // } // } // catch (SQLException e) { // log.error("Exception while updating: " + e.getMessage()); // } // finally { // closeQuietly(update); // } closeQuietly(connection); }
From source file:com.haulmont.cuba.web.gui.components.WebLinkButton.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(BaseTheme.BUTTON_LINK, "")); }
From source file:com.haulmont.cuba.web.gui.components.WebFlowBoxLayout.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(FLOWLAYOUT_STYLENAME, "")); }
From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebFoldersPane.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(C_FOLDERS_PANE, "")); }
From source file:com.haulmont.cuba.web.gui.components.WebButtonsPanel.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(BUTTONS_PANNEL_STYLENAME, "")); }
From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebLogoutButton.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(LOGOUT_BUTTON_STYLENAME, "")); }
From source file:de.tudarmstadt.ukp.csniper.ml.NeAugmentationAnnotator.java
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { jcas = aJCas;/*from www. ja v a 2 s . com*/ for (PennTree pt : JCasUtil.select(aJCas, PennTree.class)) { String tree = StringUtils.normalizeSpace(pt.getPennTree()); PennTreeNode ptn = PennTreeUtils.parsePennTree(tree); if (ptn != null) { int augmented = augment(ptn, JCasUtil.selectCovered(NamedEntity.class, pt)); if (augmented > 0) { getLogger().info("ORIGINAL PENNTREE: [" + tree + "]"); tree = PennTreeUtils.toPennTree(ptn); pt.setPennTree(tree); getLogger().info("AUGMENTED PENNTREE: [" + tree + "]"); } } } }
From source file:de.tudarmstadt.ukp.csniper.ml.TKSVMlightFeatureExtractor.java
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // create a new instance for each PennTree Collection<PennTree> trees = JCasUtil.select(aJCas, PennTree.class); for (PennTree t : trees) { Instance<Boolean> instance = new Instance<Boolean>(); instance.add(new Feature("TK_tree", StringUtils.normalizeSpace(t.getPennTree()))); BooleanClassification bc = JCasUtil.selectSingle(aJCas, BooleanClassification.class); instance.setOutcome(bc.getExpectedLabel()); train(instance);// w ww . j a v a2 s . c o m } if (trees.size() == 0) { getLogger().warn("No PennTree found: " + aJCas.getDocumentText()); } if (trees.size() > 1) { getLogger().warn("Too many [" + trees.size() + "] PennTrees found: " + aJCas.getDocumentText()); } }
From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebTimeZoneIndicator.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(USER_TIMEZONE_LABEL_STYLENAME, "")); }
From source file:com.haulmont.cuba.web.gui.components.WebListEditor.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(LISTEDITOR_STYLENAME, "")); }