Example usage for org.apache.commons.lang StringUtils normalizeSpace

List of usage examples for org.apache.commons.lang StringUtils normalizeSpace

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils normalizeSpace.

Prototype

public static String normalizeSpace(String str) 

Source Link

Document

<p> Similar to <a href="http://www.w3.org/TR/xpath/#function-normalize-space"> http://www.w3.org/TR/xpath/#function-normalize -space</a> </p> <p> The function returns the argument string with whitespace normalized by using <code> #trim(String) </code> to remove leading and trailing whitespace and then replacing sequences of whitespace characters by a single space.

Usage

From source file:de.tudarmstadt.ukp.csniper.resbuild.stuff.CasFlusher.java

@Override
public void process(CAS aCas) throws AnalysisEngineProcessException {
    try {/*w  w  w .ja va  2  s .  co m*/
        int aBegin = 112715;//98877;
        int aEnd = 112734;//98993;
        OutputStream aOutputStream = System.out;

        Collection<? extends Annotation> annos;
        annos = JCasUtil.select(aCas.getJCas(), Sentence.class);
        Annotation a = new ArrayList<Annotation>(annos).get(92);
        aBegin = a.getBegin();
        aEnd = a.getEnd();
        if (aBegin > -1 && aEnd > -1) {
            annos = JCasUtil.selectCovered(aCas.getJCas(), Annotation.class, aBegin, aEnd);
        } else {
            annos = JCasUtil.select(aCas.getJCas(), Annotation.class);
        }
        for (Annotation anno : annos) {
            StringBuilder sb = new StringBuilder();
            sb.append("[" + anno.getClass().getSimpleName() + "] ");
            sb.append("(" + anno.getBegin() + "," + anno.getEnd() + ") ");
            sb.append(anno.getCoveredText() + "\n");
            try {
                IOUtils.write(sb, aOutputStream, "UTF-8");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        for (PennTree pt : JCasUtil.selectCovered(aCas.getJCas(), PennTree.class, aBegin, aEnd)) {
            IOUtils.write(StringUtils.normalizeSpace(pt.getPennTree()), aOutputStream, "UTF-8");
        }
    } catch (CASException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.bahmanm.karun.PackageCollection.java

/**
 * Reads package information from a package directory.
 * //from w w w. j a  v a 2s .co  m
 * @param pkgDir Package directory
 * @return Package
 */
private Package readPackage(File pkgDir) throws FileNotFoundException, IOException {
    File f = new File(pkgDir.getAbsolutePath() + "/desc");
    FileInputStream fis = new FileInputStream(f);
    DataInputStream dis = new DataInputStream(fis);
    BufferedReader br = new BufferedReader(new InputStreamReader(dis));
    String line = null;
    Package pkg = new Package();
    try {
        boolean name = false;
        boolean desc = false;
        boolean version = false;
        while ((line = br.readLine()) != null) {
            line = StringUtils.normalizeSpace(line);
            if (line.equals("%NAME%")) {
                name = name ? false : true;
            } else if (line.equals("%VERSION%")) {
                version = version ? false : true;
            } else if (line.equals("%DESC%")) {
                desc = desc ? false : true;
            } else if (name) {
                pkg.setName(line);
                name = false;
            } else if (version) {
                pkg.setRepoVersion(line);
                version = false;
            } else if (desc) {
                pkg.setDescription(line);
                desc = false;
            }
        }
    } catch (IOException ex) {
        Logger.getLogger(PackageCollection.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        try {
            br.close();
            dis.close();
            fis.close();
        } catch (IOException ioex) {
            throw new IOException("Error closing stream or reader: " + ioex.getMessage());
        }
    }
    return pkg;
}

From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java

public void classify(File aModelDir, List<EvaluationResult> aToPredictList) throws IOException, UIMAException {
    TKSVMlightSequenceClassifierBuilder builder = new TKSVMlightSequenceClassifierBuilder();
    TKSVMlightSequenceClassifier classifier = builder.loadClassifierFromTrainingDirectory(aModelDir);
    File cFile = File.createTempFile("tkclassify", ".txt");

    BufferedWriter bw = null;/*from   ww w  .  j  a  va2  s.c o m*/
    try {
        bw = new BufferedWriter(new FileWriter(cFile));

        // predict unclassified
        CAS cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null);
        ProgressMeter progress = new ProgressMeter(aToPredictList.size());
        for (EvaluationResult result : aToPredictList) {
            cas.setDocumentText(result.getItem().getCoveredText());
            cas.setDocumentLanguage(language);

            // dummy sentence split
            sent.process(cas);

            // tokenize
            tok.process(cas);

            // get parse from db, or parse now
            String pennTree = parse(result, cas);

            // write tree to file
            Feature tree = new Feature("TK_tree", StringUtils.normalizeSpace(pennTree));
            TreeFeatureVector tfv = classifier.getFeaturesEncoder().encodeAll(Arrays.asList(tree));
            try {
                bw.write("0");
                bw.write(TKSVMlightDataWriter.createString(tfv));
                bw.write(SystemUtils.LINE_SEPARATOR);
            } catch (IOException e) {
                throw new AnalysisEngineProcessException(e);
            }
            cas.reset();
            progress.next();
            LOG.info(progress);
            if (task != null) {
                task.increment();
                task.checkCanceled();
            }
        }
    } finally {
        IOUtils.closeQuietly(bw);
    }

    // classify all
    List<Double> predictions = classifier.tkSvmLightPredict2(cFile);

    if (predictions.size() != aToPredictList.size()) {
        // TODO throw different exception instead
        throw new IOException("there are [" + predictions.size() + "] predictions, but ["
                + aToPredictList.size() + "] were expected.");
    }

    for (int i = 0; i < aToPredictList.size(); i++) {
        Mark m = (predictions.get(i) > THRESHOLD) ? Mark.PRED_CORRECT : Mark.PRED_WRONG;
        aToPredictList.get(i).setResult(m.getTitle());
    }
}

From source file:com.haulmont.cuba.web.gui.WebWindow.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(component.getStyleName().replace(C_WINDOW_LAYOUT, ""));
}

From source file:com.haulmont.cuba.web.gui.components.WebGroupBox.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(GROUPBOX_PANEL_STYLENAME, ""));
}

From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java

public static File train(List<EvaluationResult> aTrainingList, EvaluationRepository aRepository)
        throws IOException, CleartkProcessingException {
    File modelDir = Files.createTempDir();
    DefaultTKSVMlightDataWriterFactory dataWriterFactory = new DefaultTKSVMlightDataWriterFactory();
    dataWriterFactory.setOutputDirectory(modelDir);
    DataWriter<Boolean> dataWriter = dataWriterFactory.createDataWriter();

    for (EvaluationResult result : aTrainingList) {
        CachedParse cp = aRepository.getCachedParse(result.getItem());
        if (cp == null || cp.getPennTree().isEmpty() || "ERROR".equals(cp.getPennTree())) {
            System.out.println("Unable to parse: [" + result.getItem().getCoveredText() + "] (cached)");
            continue;
        }/*from   w ww.j  a v a  2  s .  c  om*/

        Instance<Boolean> instance = new Instance<Boolean>();
        instance.add(new Feature("TK_tree", StringUtils.normalizeSpace(cp.getPennTree())));
        instance.setOutcome(Mark.fromString(result.getResult()) == Mark.CORRECT);
        dataWriter.write(instance);
    }

    dataWriter.finish();

    // train model
    try {
        Train.main(modelDir.getPath(), "-t", "5", "-c", "1.0", "-C", "+");
    } catch (Exception e) {
        throw new CleartkProcessingException(e);
    }

    return modelDir;
}

From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java

private void addPreviewToIndexAndStore(IIngestContext context, IIndexerDocument document) throws JAXBException {
    if (context.getAipObject().isNewFormat()) {
        Preview preview = context.getAipObject()
                .getObjectForPathOrNull(NewSchemaPaths.PATH_CORTEX_ITEM_PREVIEW.path(), Preview.class);

        String subtitle = preview.getSubtitle();
        if (!Strings.isNullOrEmpty(subtitle)) {
            document.addPreview(subtitle);
        }//from   w  ww .j av  a  2 s .c  o  m

        String title = preview.getTitle();
        document.storePreview(XmlProcessor.marshallToString(this.OBJECT_FACTORY.createPreview(preview)), title);
    } else {

        CortexItemPreview preview = context.getAipObject().getObjectForPathOrNull(
                OldSchemaPaths.PATH_CORTEX_ITEM_PREVIEW.path(), CortexItemPreview.class);

        String title = "";
        for (PreviewDiv.Div div : preview.getDiv().getDiv()) {
            if (div.getClazz().equals("subtitle")) {
                for (Serializable content : div.getContent()) {
                    if (content instanceof String) {
                        document.addPreview(StringUtils.normalizeSpace((String) content));
                    }
                }
            } else if (div.getClazz().equals("title")) {
                for (Serializable content : div.getContent()) {
                    if (content instanceof String) {
                        title += ((String) content).trim();
                    }
                }
            }
        }
        document.storePreview(
                XmlProcessor.marshallToString(this.OBJECT_FACTORY.createCortexItemPreview(preview)), title);
    }
}

From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java

private void addViewToIndex(IIngestContext context, IIndexerDocument document) throws JAXBException {

    AipObject aipObject = context.getAipObject();
    if (aipObject.isNewFormat()) {
        String institutionName = aipObject.getObjectForPathOrNull(
                NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_INSTITUTION_NAME.path(), String.class);
        if (!Strings.isNullOrEmpty(institutionName)) {
            document.addInstitutionName(institutionName);
        }/*from www.ja v a  2s.  com*/

        String rights = aipObject
                .getObjectForPathOrNull(NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_RIGHTS.path(), String.class);
        if (!Strings.isNullOrEmpty(rights)) {
            document.addView(StringUtils.normalizeSpace(rights));
        }

        Iterator<NewItemPropertyField> displayFields = aipObject.getIteratorForPath(
                NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS_DISPLAY.path(), NewItemPropertyField.class);

        addFieldsFromView(document, displayFields);

        Iterator<NewItemPropertyField> extendedFields = aipObject.getIteratorForPath(
                NewSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS_EXTENDED_DISPLAY.path(),
                NewItemPropertyField.class);
        addFieldsFromView(document, extendedFields);

        Cortex aip = aipObject.getAip();
        if (aip != null) {
            View view = aip.getView();
            if (view != null) {
                ViewInstitution institution = view.getCortexInstitution();
                if (institution != null) {
                    ViewInstitution.Locations locations = institution.getLocations();
                    if (locations != null) {
                        for (ViewInstitution.Locations.Location location : locations.getLocation()) {
                            Address institutionAddress = location.getAddress();
                            if (institutionAddress != null) {
                                document.addView(institutionAddress.getStreet());
                                document.addView(institutionAddress.getHouseIdentifier());
                                document.addView(institutionAddress.getAddressSupplement());
                                document.addView(institutionAddress.getPostalCode());
                                document.addView(institutionAddress.getCity());
                                document.addView(institutionAddress.getCountry());
                            }
                            String locationDisplayName = location.getLocationDisplayName();
                            if (!Strings.isNullOrEmpty(locationDisplayName)) {
                                document.addLocationDisplayName(locationDisplayName);
                            }
                        }
                    }
                }
            }
        }
    } else {
        String institutionName = aipObject.getObjectForPathOrNull(
                OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_INSTITUTION_NAME.path(), String.class);
        if (!Strings.isNullOrEmpty(institutionName)) {
            document.addInstitutionName(institutionName);
        }

        HtmlSnippet rights = aipObject.getObjectForPathOrNull(
                OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_RIGHTS.path(), HtmlSnippet.class);
        if (rights != null) {
            for (Serializable content : rights.getContent()) {
                if (content instanceof String) {
                    document.addView(StringUtils.normalizeSpace((String) content));
                }
            }
        }

        Iterator<ItemPropertyField> fields = aipObject.getIteratorForPath(
                OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_ITEM_FIELDS.path(), ItemPropertyField.class);

        while (fields.hasNext()) {
            ItemPropertyField field = fields.next();
            for (Serializable content : field.getValue().getContent()) {
                if (content instanceof JAXBElement) {
                    JAXBElement<?> element = (JAXBElement<?>) content;
                    if (element.getValue() instanceof ItemPropertyField.Value.A) {
                        String normalizedLink = normalizeLink((A) element.getValue());
                        if (!Strings.isNullOrEmpty(normalizedLink)) {
                            document.addView(normalizedLink);
                        }
                    }
                } else if (content instanceof String) {
                    String contentAsString = StringUtils.normalizeSpace((String) content);
                    if (!Strings.isNullOrEmpty(contentAsString)) {
                        document.addView(contentAsString);
                    }
                }
            }
        }

        Address institutionAddress = aipObject.getObjectForPathOrNull(
                OldSchemaPaths.PATH_CORTEX_ITEM_VIEW_INSTITUTION_ADDRESS.path(), Address.class);

        if (institutionAddress != null) {
            document.addView(institutionAddress.getStreet());
            document.addView(institutionAddress.getHouseIdentifier());
            document.addView(institutionAddress.getAddressSupplement());
            document.addView(institutionAddress.getPostalCode());
            document.addView(institutionAddress.getCity());
            document.addView(institutionAddress.getCountry());
        }

    }
}

From source file:de.tudarmstadt.ukp.csniper.webapp.evaluation.MlPipeline.java

/**
 * Mind this method may return less results than parses were passed to it, e.g. because a 
 * cached parse may be empty or "ERROR" in which case no result for it is generated!
 *//*  www.  ja  v a  2  s. c o  m*/
public static List<EvaluationResult> classifyPreParsed(File aModelDir, List<CachedParse> aParses, String aType,
        String aUser) throws IOException, UIMAException {
    TKSVMlightSequenceClassifierBuilder builder = new TKSVMlightSequenceClassifierBuilder();
    TKSVMlightSequenceClassifier classifier = builder.loadClassifierFromTrainingDirectory(aModelDir);
    File cFile = File.createTempFile("tkclassify", ".txt");

    List<EvaluationItem> items = new ArrayList<EvaluationItem>();
    BufferedWriter bw = null;
    try {
        bw = new BufferedWriter(new FileWriter(cFile));

        for (CachedParse parse : aParses) {
            if (parse.getPennTree().isEmpty() || "ERROR".equals(parse.getPennTree())) {
                continue;
            }

            String coveredText;
            try {
                coveredText = PennTreeUtils.toText(parse.getPennTree());
            } catch (EmptyStackException e) {
                LOG.error("Invalid Penn Tree: [" + parse.getPennTree() + "]", e);
                continue;
            }

            // Prepare evaluation item to return
            EvaluationItem item = new EvaluationItem();
            item.setType(aType);
            item.setBeginOffset(parse.getBeginOffset());
            item.setEndOffset(parse.getEndOffset());
            item.setDocumentId(parse.getDocumentId());
            item.setCollectionId(parse.getCollectionId());
            item.setCoveredText(coveredText);
            items.add(item);

            // write tree to file
            Feature tree = new Feature("TK_tree", StringUtils.normalizeSpace(parse.getPennTree()));
            TreeFeatureVector tfv = classifier.getFeaturesEncoder().encodeAll(Arrays.asList(tree));

            bw.write("0");
            bw.write(TKSVMlightDataWriter.createString(tfv));
            bw.write(SystemUtils.LINE_SEPARATOR);
        }
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    } finally {
        IOUtils.closeQuietly(bw);
    }

    // classify all
    List<Double> predictions = classifier.tkSvmLightPredict2(cFile);

    if (predictions.size() != items.size()) {
        // TODO throw different exception instead
        throw new IOException("there are [" + predictions.size() + "] predictions, but [" + items.size()
                + "] were expected.");
    }

    List<EvaluationResult> results = new ArrayList<EvaluationResult>();
    for (EvaluationItem item : items) {
        results.add(new EvaluationResult(item, aUser, ""));
    }

    for (int i = 0; i < results.size(); i++) {
        Mark m = (predictions.get(i) > THRESHOLD) ? Mark.PRED_CORRECT : Mark.PRED_WRONG;
        results.get(i).setResult(m.getTitle());
    }

    return results;
}

From source file:de.fhg.iais.cortex.services.ingest.worker.IndexerWorker.java

private String normalizeLink(A link) {
    String linkAsString = link.getValue();

    if (!Strings.isNullOrEmpty(link.getHref())) {
        linkAsString = link.getHref();/*from   www.  j av  a2  s  .c om*/
    }

    if (link.getHref().matches("http://.*")) {
        linkAsString = "";
    }

    return StringUtils.normalizeSpace(linkAsString);
}