List of usage examples for org.apache.commons.lang StringUtils normalizeSpace
public static String normalizeSpace(String str)
From source file:de.tudarmstadt.ukp.csniper.resbuild.stuff.CasFlusher.java
@Override public void process(CAS aCas) throws AnalysisEngineProcessException { try {/*w w w .ja va 2 s . co m*/ int aBegin = 112715;//98877; int aEnd = 112734;//98993; OutputStream aOutputStream = System.out; Collection<? extends Annotation> annos; annos = JCasUtil.select(aCas.getJCas(), Sentence.class); Annotation a = new ArrayList<Annotation>(annos).get(92); aBegin = a.getBegin(); aEnd = a.getEnd(); if (aBegin > -1 && aEnd > -1) { annos = JCasUtil.selectCovered(aCas.getJCas(), Annotation.class, aBegin, aEnd); } else { annos = JCasUtil.select(aCas.getJCas(), Annotation.class); } for (Annotation anno : annos) { StringBuilder sb = new StringBuilder(); sb.append("[" + anno.getClass().getSimpleName() + "] "); sb.append("(" + anno.getBegin() + "," + anno.getEnd() + ") "); sb.append(anno.getCoveredText() + "\n"); try { IOUtils.write(sb, aOutputStream, "UTF-8"); } catch (IOException e) { e.printStackTrace(); } } for (PennTree pt : JCasUtil.selectCovered(aCas.getJCas(), PennTree.class, aBegin, aEnd)) { IOUtils.write(StringUtils.normalizeSpace(pt.getPennTree()), aOutputStream, "UTF-8"); } } catch (CASException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.bahmanm.karun.PackageCollection.java
/** * Reads package information from a package directory. * //from w w w. j a v a 2s .co m * @param pkgDir Package directory * @return Package */ private Package readPackage(File pkgDir) throws FileNotFoundException, IOException { File f = new File(pkgDir.getAbsolutePath() + "/desc"); FileInputStream fis = new FileInputStream(f); DataInputStream dis = new DataInputStream(fis); BufferedReader br = new BufferedReader(new InputStreamReader(dis)); String line = null; Package pkg = new Package(); try { boolean name = false; boolean desc = false; boolean version = false; while ((line = br.readLine()) != null) { line = StringUtils.normalizeSpace(line); if (line.equals("%NAME%")) { name = name ? false : true; } else if (line.equals("%VERSION%")) { version = version ? false : true; } else if (line.equals("%DESC%")) { desc = desc ? false : true; } else if (name) { pkg.setName(line); name = false; } else if (version) { pkg.setRepoVersion(line); version = false; } else if (desc) { pkg.setDescription(line); desc = false; } } } catch (IOException ex) { Logger.getLogger(PackageCollection.class.getName()).log(Level.SEVERE, null, ex); } finally { try { br.close(); dis.close(); fis.close(); } catch (IOException ioex) { throw new IOException("Error closing stream or reader: " + ioex.getMessage()); } } return pkg; }
From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java
public void classify(File aModelDir, List<EvaluationResult> aToPredictList) throws IOException, UIMAException { TKSVMlightSequenceClassifierBuilder builder = new TKSVMlightSequenceClassifierBuilder(); TKSVMlightSequenceClassifier classifier = builder.loadClassifierFromTrainingDirectory(aModelDir); File cFile = File.createTempFile("tkclassify", ".txt"); BufferedWriter bw = null;/*from ww w . j a va2 s.c o m*/ try { bw = new BufferedWriter(new FileWriter(cFile)); // predict unclassified CAS cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null); ProgressMeter progress = new ProgressMeter(aToPredictList.size()); for (EvaluationResult result : aToPredictList) { cas.setDocumentText(result.getItem().getCoveredText()); cas.setDocumentLanguage(language); // dummy sentence split sent.process(cas); // tokenize tok.process(cas); // get parse from db, or parse now String pennTree = parse(result, cas); // write tree to file Feature tree = new Feature("TK_tree", StringUtils.normalizeSpace(pennTree)); TreeFeatureVector tfv = classifier.getFeaturesEncoder().encodeAll(Arrays.asList(tree)); try { bw.write("0"); bw.write(TKSVMlightDataWriter.createString(tfv)); bw.write(SystemUtils.LINE_SEPARATOR); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } cas.reset(); progress.next(); LOG.info(progress); if (task != null) { task.increment(); task.checkCanceled(); } } } finally { IOUtils.closeQuietly(bw); } // classify all List<Double> predictions = classifier.tkSvmLightPredict2(cFile); if (predictions.size() != aToPredictList.size()) { // TODO throw different exception instead throw new IOException("there are [" + predictions.size() + "] predictions, but [" + aToPredictList.size() + "] were expected."); } for (int i = 0; i < aToPredictList.size(); i++) { Mark m = (predictions.get(i) > THRESHOLD) ? Mark.PRED_CORRECT : Mark.PRED_WRONG; aToPredictList.get(i).setResult(m.getTitle()); } }
From source file:com.haulmont.cuba.web.gui.WebWindow.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(component.getStyleName().replace(C_WINDOW_LAYOUT, "")); }
From source file:com.haulmont.cuba.web.gui.components.WebGroupBox.java
@Override public String getStyleName() { return StringUtils.normalizeSpace(super.getStyleName().replace(GROUPBOX_PANEL_STYLENAME, "")); }
From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java
public static File train(List<EvaluationResult> aTrainingList, EvaluationRepository aRepository) throws IOException, CleartkProcessingException { File modelDir = Files.createTempDir(); DefaultTKSVMlightDataWriterFactory dataWriterFactory = new DefaultTKSVMlightDataWriterFactory(); dataWriterFactory.setOutputDirectory(modelDir); DataWriter<Boolean> dataWriter = dataWriterFactory.createDataWriter(); for (EvaluationResult result : aTrainingList) { CachedParse cp = aRepository.getCachedParse(result.getItem()); if (cp == null || cp.getPennTree().isEmpty() || "ERROR".equals(cp.getPennTree())) { System.out.println("Unable to parse: [" + result.getItem().getCoveredText() + "] (cached)"); continue; }/*from w ww.j a v a 2 s . c om*/ Instance<Boolean> instance = new Instance<Boolean>(); instance.add(new Feature("TK_tree", StringUtils.normalizeSpace(cp.getPennTree()))); instance.setOutcome(Mark.fromString(result.getResult()) == Mark.CORRECT); dataWriter.write(instance); } dataWriter.finish(); // train model try { Train.main(modelDir.getPath(), "-t", "5", "-c", "1.0", "-C", "+"); } catch (Exception e) { throw new CleartkProcessingException(e); } return modelDir; }
From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java
private void addPreviewToIndexAndStore(IIngestContext context, IIndexerDocument document) throws JAXBException { if (context.getAipObject().isNewFormat()) { Preview preview = context.getAipObject() .getObjectForPathOrNull(NewSchemaPaths.PATH_CORTEX_ITEM_PREVIEW.path(), Preview.class); String subtitle = preview.getSubtitle(); if (!Strings.isNullOrEmpty(subtitle)) { document.addPreview(subtitle); }//from w ww .j av a 2 s .c o m String title = preview.getTitle(); document.storePreview(XmlProcessor.marshallToString(this.OBJECT_FACTORY.createPreview(preview)), title); } else { CortexItemPreview preview = context.getAipObject().getObjectForPathOrNull( OldSchemaPaths.PATH_CORTEX_ITEM_PREVIEW.path(), CortexItemPreview.class); String title = ""; for (PreviewDiv.Div div : preview.getDiv().getDiv()) { if (div.getClazz().equals("subtitle")) { for (Serializable content : div.getContent()) { if (content instanceof String) { document.addPreview(StringUtils.normalizeSpace((String) content)); } } } else if (div.getClazz().equals("title")) { for (Serializable content : div.getContent()) { if (content instanceof String) { title += ((String) content).trim(); } } } } document.storePreview( XmlProcessor.marshallToString(this.OBJECT_FACTORY.createCortexItemPreview(preview)), title); } }
From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java
private void addViewToIndex(IIngestContext context, IIndexerDocument document) throws JAXBException { AipObject aipObject = context.getAipObject(); if (aipObject.isNewFormat()) { String institutionName = aipObject.getObjectForPathOrNull( NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_INSTITUTION_NAME.path(), String.class); if (!Strings.isNullOrEmpty(institutionName)) { document.addInstitutionName(institutionName); }/*from www.ja v a 2s. com*/ String rights = aipObject .getObjectForPathOrNull(NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_RIGHTS.path(), String.class); if (!Strings.isNullOrEmpty(rights)) { document.addView(StringUtils.normalizeSpace(rights)); } Iterator<NewItemPropertyField> displayFields = aipObject.getIteratorForPath( NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS_DISPLAY.path(), NewItemPropertyField.class); addFieldsFromView(document, displayFields); Iterator<NewItemPropertyField> extendedFields = aipObject.getIteratorForPath( NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS_EXTENDED_DISPLAY.path(), NewItemPropertyField.class); addFieldsFromView(document, extendedFields); Cortex aip = aipObject.getAip(); if (aip != null) { View view = aip.getView(); if (view != null) { ViewInstitution institution = view.getCortexInstitution(); if (institution != null) { ViewInstitution.Locations locations = institution.getLocations(); if (locations != null) { for (ViewInstitution.Locations.Location location : locations.getLocation()) { Address institutionAddress = location.getAddress(); if (institutionAddress != null) { document.addView(institutionAddress.getStreet()); document.addView(institutionAddress.getHouseIdentifier()); document.addView(institutionAddress.getAddressSupplement()); document.addView(institutionAddress.getPostalCode()); document.addView(institutionAddress.getCity()); document.addView(institutionAddress.getCountry()); } String locationDisplayName = location.getLocationDisplayName(); if (!Strings.isNullOrEmpty(locationDisplayName)) { document.addLocationDisplayName(locationDisplayName); } } } } } } } else { String institutionName = aipObject.getObjectForPathOrNull( OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_INSTITUTION_NAME.path(), String.class); if (!Strings.isNullOrEmpty(institutionName)) { document.addInstitutionName(institutionName); } HtmlSnippet rights = aipObject.getObjectForPathOrNull( OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_RIGHTS.path(), HtmlSnippet.class); if (rights != null) { for (Serializable content : rights.getContent()) { if (content instanceof String) { document.addView(StringUtils.normalizeSpace((String) content)); } } } Iterator<ItemPropertyField> fields = aipObject.getIteratorForPath( OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS.path(), ItemPropertyField.class); while (fields.hasNext()) { ItemPropertyField field = fields.next(); for (Serializable content : field.getValue().getContent()) { if (content instanceof JAXBElement) { JAXBElement<?> element = (JAXBElement<?>) content; if (element.getValue() instanceof ItemPropertyField.Value.A) { String normalizedLink = normalizeLink((A) element.getValue()); if (!Strings.isNullOrEmpty(normalizedLink)) { document.addView(normalizedLink); } } } else if (content instanceof String) { String contentAsString = StringUtils.normalizeSpace((String) content); if (!Strings.isNullOrEmpty(contentAsString)) { document.addView(contentAsString); } } } } Address institutionAddress = aipObject.getObjectForPathOrNull( OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_INSTITUTION_ADDRESS.path(), Address.class); if (institutionAddress != null) { document.addView(institutionAddress.getStreet()); document.addView(institutionAddress.getHouseIdentifier()); document.addView(institutionAddress.getAddressSupplement()); document.addView(institutionAddress.getPostalCode()); document.addView(institutionAddress.getCity()); document.addView(institutionAddress.getCountry()); } } }
From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java
/** * Mind this method may return less results than parses were passed to it, e.g. because a * cached parse may be empty or "ERROR" in which case no result for it is generated! *//* www. ja v a 2 s. c o m*/ public static List<EvaluationResult> classifyPreParsed(File aModelDir, List<CachedParse> aParses, String aType, String aUser) throws IOException, UIMAException { TKSVMlightSequenceClassifierBuilder builder = new TKSVMlightSequenceClassifierBuilder(); TKSVMlightSequenceClassifier classifier = builder.loadClassifierFromTrainingDirectory(aModelDir); File cFile = File.createTempFile("tkclassify", ".txt"); List<EvaluationItem> items = new ArrayList<EvaluationItem>(); BufferedWriter bw = null; try { bw = new BufferedWriter(new FileWriter(cFile)); for (CachedParse parse : aParses) { if (parse.getPennTree().isEmpty() || "ERROR".equals(parse.getPennTree())) { continue; } String coveredText; try { coveredText = PennTreeUtils.toText(parse.getPennTree()); } catch (EmptyStackException e) { LOG.error("Invalid Penn Tree: [" + parse.getPennTree() + "]", e); continue; } // Prepare evaluation item to return EvaluationItem item = new EvaluationItem(); item.setType(aType); item.setBeginOffset(parse.getBeginOffset()); item.setEndOffset(parse.getEndOffset()); item.setDocumentId(parse.getDocumentId()); item.setCollectionId(parse.getCollectionId()); item.setCoveredText(coveredText); items.add(item); // write tree to file Feature tree = new Feature("TK_tree", StringUtils.normalizeSpace(parse.getPennTree())); TreeFeatureVector tfv = classifier.getFeaturesEncoder().encodeAll(Arrays.asList(tree)); bw.write("0"); bw.write(TKSVMlightDataWriter.createString(tfv)); bw.write(SystemUtils.LINE_SEPARATOR); } } catch (IOException e) { throw new AnalysisEngineProcessException(e); } finally { IOUtils.closeQuietly(bw); } // classify all List<Double> predictions = classifier.tkSvmLightPredict2(cFile); if (predictions.size() != items.size()) { // TODO throw different exception instead throw new IOException("there are [" + predictions.size() + "] predictions, but [" + items.size() + "] were expected."); } List<EvaluationResult> results = new ArrayList<EvaluationResult>(); for (EvaluationItem item : items) { results.add(new EvaluationResult(item, aUser, "")); } for (int i = 0; i < results.size(); i++) { Mark m = (predictions.get(i) > THRESHOLD) ? Mark.PRED_CORRECT : Mark.PRED_WRONG; results.get(i).setResult(m.getTitle()); } return results; }
From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java
private String normalizeLink(A link) { String linkAsString = link.getValue(); if (!Strings.isNullOrEmpty(link.getHref())) { linkAsString = link.getHref();/*from www. j av a2 s .c om*/ } if (link.getHref().matches("http://.*")) { linkAsString = ""; } return StringUtils.normalizeSpace(linkAsString); }