Example usage for org.apache.commons.lang3 StringUtils strip

List of usage examples for org.apache.commons.lang3 StringUtils strip

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils strip.

Prototype

public static String strip(final String str) 

Source Link

Document

Strips whitespace from the start and end of a String.

This is similar to #trim(String) but removes whitespace.

Usage

From source file:ubic.gemma.core.datastructure.matrix.ExpressionDataWriterUtils.java

/**
 * Replaces spaces and hyphens with underscores.
 *
 * @param factorValue FV//  w w w  . ja va2  s . c o  m
 * @return replaced string
 */
public static String constructFactorValueName(FactorValue factorValue) {

    StringBuilder buf = new StringBuilder();

    if (factorValue.getCharacteristics().size() > 0) {
        for (Characteristic c : factorValue.getCharacteristics()) {
            buf.append(StringUtils.strip(c.getValue()));
            if (factorValue.getCharacteristics().size() > 1)
                buf.append(" | ");
        }
    } else if (factorValue.getMeasurement() != null) {
        buf.append(factorValue.getMeasurement().getValue());
    } else if (StringUtils.isNotBlank(factorValue.getValue())) {
        buf.append(StringUtils.strip(factorValue.getValue()));
    }

    String matchedFactorValue = buf.toString();

    matchedFactorValue = matchedFactorValue.trim();
    matchedFactorValue = matchedFactorValue.replaceAll("-", "_");
    matchedFactorValue = matchedFactorValue.replaceAll("\\s", "_");
    return matchedFactorValue;
}

From source file:ubic.gemma.core.expression.experiment.service.ExpressionExperimentSearchServiceImpl.java

@Override
public Collection<ExpressionExperimentValueObject> searchExpressionExperiments(List<String> query) {

    Set<ExpressionExperimentValueObject> all = new HashSet<>();
    Set<ExpressionExperimentValueObject> prev = null;
    Set<ExpressionExperimentValueObject> current;
    for (String s : query) {
        s = StringUtils.strip(s);
        if (prev == null) {
            prev = new HashSet<>(this.searchExpressionExperiments(s));
            all = new HashSet<>(prev);
            continue;
        }// ww  w .  ja  v a2s .c  o  m
        current = new HashSet<>(this.searchExpressionExperiments(s));

        all = Sets.intersection(all, current);
    }
    return all;
}

From source file:ubic.gemma.core.genome.gene.service.GeneSearchServiceImpl.java

@Override
public Map<String, GeneValueObject> searchMultipleGenesGetMap(Collection<String> query, Long taxonId) {
    Taxon taxon = taxonService.load(taxonId);

    if (taxon == null)
        throw new IllegalArgumentException("No such taxon with id=" + taxonId);

    // this deals with the simple cases. For remainder we look a little harder
    Map<String, GeneValueObject> queryToGenes = geneService.findByOfficialSymbols(query, taxonId);

    for (String line : query) {
        line = StringUtils.strip(line);

        if (StringUtils.isBlank(line)) {
            continue;
        }//from w w  w  .j a  v a  2  s  .  c  o m

        String queryAsKey = line.toLowerCase();
        if (queryToGenes.containsKey(queryAsKey)) {
            // already found.
            continue;
        }

        if (queryToGenes.size() >= GeneSearchServiceImpl.MAX_GENES_PER_QUERY) {
            GeneSearchServiceImpl.log
                    .warn("Too many genes, stopping (limit=" + GeneSearchServiceImpl.MAX_GENES_PER_QUERY + ')');
            break;
        }

        // searching one gene at a time is a bit slow; we do a quick search for symbols.
        SearchSettings settings = SearchSettingsImpl.geneSearch(line, taxon);
        List<SearchResult> geneSearchResults = searchService.speedSearch(settings).get(Gene.class);

        if (geneSearchResults == null || geneSearchResults.isEmpty()) {
            // an empty set is an indication of no results.
            queryToGenes.put(queryAsKey, null);
        } else if (geneSearchResults.size() == 1) { // Just one result so add it
            Gene g = (Gene) geneSearchResults.iterator().next().getResultObject();
            queryToGenes.put(queryAsKey, new GeneValueObject(g));
        } else { // Multiple results need to find best one
            // Usually if there is more than 1 results the search term was a official symbol and picked up matches
            // like grin1, grin2, grin3, grin (given the search term was grin)
            for (SearchResult sr : geneSearchResults) {
                Gene srGene = (Gene) sr.getResultObject();
                if (srGene.getTaxon().equals(taxon) && srGene.getOfficialSymbol().equalsIgnoreCase(line)) {
                    queryToGenes.put(queryAsKey, new GeneValueObject(srGene));
                    break; // found so done
                }
            }

        }
    }

    return queryToGenes;
}

From source file:ubic.gemma.core.loader.entrez.pubmed.PubMedXMLParser.java

/**
 * Fill in information about the book: Publisher, Editor(s), Publication year
 *
 * @param bibRef bib ref/* w  ww . java 2 s  . c o  m*/
 * @param record record
 */
private void processBookRecord(BibliographicReference bibRef, Node record) {

    NodeList recordNodes = record.getChildNodes();
    for (int p = 0; p < recordNodes.getLength(); p++) {
        Node item = recordNodes.item(p);
        if (!(item instanceof Element)) {
            continue;
        }

        String name = item.getNodeName();
        switch (name) {
        case "ArticleTitle":
            // this is the title of the chapter.
            bibRef.setTitle(StringUtils.strip(XMLUtils.getTextValue((Element) item)));
            break;
        case "Book":
            this.processBookInfo(bibRef, item);
            break;
        case "AuthorList":
            bibRef.setAuthorList(this.extractAuthorList(item.getChildNodes()));
            break;
        case "Abstract":
            bibRef.setAbstractText("");
            NodeList abstractTextSections = item.getChildNodes();
            for (int q = 0; q < abstractTextSections.getLength(); q++) {
                Node jitem = abstractTextSections.item(q);
                if (!(jitem instanceof Element)) {
                    continue;
                }
                if (jitem.getNodeName().equals("AbstractText")) {
                    bibRef.setAbstractText(
                            bibRef.getAbstractText() + (XMLUtils.getTextValue((Element) jitem)) + " ");
                }

                bibRef.setAbstractText(bibRef.getAbstractText().trim());
            }
            break;
        case "PMID":
            this.processAccession(bibRef, item);
            break;
        case "ContributionDate":
            /*
             * Unusual, but happens for books that are updated with new sections. We use this instead of the
             * publication date.
             */
            this.extractBookPublicationYear(bibRef, item);
            break;
        default:
            log.warn("Unrecognized node name " + name);
        }
    }

}

From source file:ubic.gemma.core.loader.expression.geo.GeoFamilyParser.java

/**
 * Extract a key and value pair from a line in the format #key = value.
 *
 * @param line line/*from w  ww  . j  a v a 2  s . c o  m*/
 * @return Map containing the String key and String value. Return null if it is misformatted.
 */
private Map<String, String> extractKeyValue(String line) {
    if (!line.startsWith("#"))
        throw new IllegalArgumentException("Wrong type of line");
    Map<String, String> result = new HashMap<>();
    String fixed = line.substring(line.indexOf('#') + 1);

    String[] tokens = fixed.split("=", 2);
    if (tokens.length != 2) {
        GeoFamilyParser.log.warn("Invalid key-value line, expected an '=' somewhere, got: '" + line + "'");
        return null;
    }
    String key = tokens[0];
    String value = tokens[1];
    key = StringUtils.strip(key);
    value = StringUtils.strip(value);
    result.put(key, value);
    return result;
}

From source file:ubic.gemma.core.loader.expression.geo.GeoFamilyParser.java

/**
 * Extract a value from a line in the format xxxx=value.
 *
 * @param line line/*from   w ww  .  j  a v  a2  s .  c o  m*/
 * @return String following the first occurrence of '=', or null if there is no '=' in the String.
 */
private String extractValue(String line) {
    int eqIndex = line.indexOf('=');
    if (eqIndex < 0) {
        return null; // that's okay, there are lines that just indicate the end of sections.
    }

    return StringUtils.strip(line.substring(eqIndex + 1));
}

From source file:ubic.gemma.core.loader.expression.geo.service.GeoBrowser.java

/**
 * Retrieves and parses tab delimited file from GEO. File contains pageSize GEO records starting from startPage.
 *
 * @param startPage start page//w w w  . ja va2s .  c  om
 * @param pageSize  page size
 * @return list of GeoRecords
 * @throws IOException    if there is a problem while manipulating the file
 * @throws ParseException if there is a parsing problem
 */
public List<GeoRecord> getRecentGeoRecords(int startPage, int pageSize) throws IOException, ParseException {

    if (startPage < 0 || pageSize < 0)
        throw new IllegalArgumentException("Values must be greater than zero ");

    List<GeoRecord> records = new ArrayList<>();
    URL url;
    try {
        url = new URL(GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize);
    } catch (MalformedURLException e) {
        throw new RuntimeException("Invalid URL: " + GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize,
                e);
    }

    URLConnection conn = url.openConnection();
    conn.connect();
    try (InputStream is = conn.getInputStream();
            BufferedReader br = new BufferedReader(new InputStreamReader(is))) {

        // We are getting a tab delimited file.

        // Read columns headers.
        String headerLine = br.readLine();
        String[] headers = StringUtil.csvSplit(headerLine);

        // Map column names to their indices (handy later).
        Map<String, Integer> columnNameToIndex = new HashMap<>();
        for (int i = 0; i < headers.length; i++) {
            columnNameToIndex.put(headers[i], i);
        }

        // Read the rest of the file.
        String line;
        while ((line = br.readLine()) != null) {
            String[] fields = StringUtil.csvSplit(line);

            GeoRecord geoRecord = new GeoRecord();
            geoRecord.setGeoAccession(fields[columnNameToIndex.get("Accession")]);
            geoRecord.setTitle(StringUtils.strip(
                    fields[columnNameToIndex.get("Title")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "")));

            String sampleCountS = fields[columnNameToIndex.get("Sample Count")];
            if (StringUtils.isNotBlank(sampleCountS)) {
                try {
                    geoRecord.setNumSamples(Integer.parseInt(sampleCountS));
                } catch (NumberFormatException e) {
                    throw new RuntimeException("Could not parse sample count: " + sampleCountS);
                }
            } else {
                GeoBrowser.log.warn("No sample count for " + geoRecord.getGeoAccession());
            }
            geoRecord.setContactName(
                    fields[columnNameToIndex.get("Contact")].replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""));

            String[] taxons = fields[columnNameToIndex.get("Taxonomy")]
                    .replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, "").split(";");
            geoRecord.getOrganisms().addAll(Arrays.asList(taxons));

            Date date = DateUtils.parseDate(fields[columnNameToIndex.get("Release Date")]
                    .replaceAll(GeoBrowser.FLANKING_QUOTES_REGEX, ""), DATE_FORMATS);
            geoRecord.setReleaseDate(date);

            geoRecord.setSeriesType(fields[columnNameToIndex.get("Series Type")]);

            records.add(geoRecord);
        }

    }

    if (records.isEmpty()) {
        GeoBrowser.log.warn("No records obtained");
    }
    return records;

}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * This method reads the file line e.g. $Run time : Category=environmental_history Type=categorical and creates
 * experimental factors from it and adds them to the experimental design.
 * NOTE that this doesn't have the ability to add values to existing factors, which might be desirable.
 *
 * @param experimentalDesign          Experimental design for this expression experiment
 * @param experimentalFactorFileLines List of strings representing lines from input file containing experimental
 *                                    factors
 * @param headerFields                Sample header line split on tab.
 * @param factorValueLines            Lines containing biomaterial names and their factor values
 *///from   w w w . j a va  2s  . c o m
private void addExperimentalFactorsToExperimentalDesign(ExperimentalDesign experimentalDesign,
        List<String> experimentalFactorFileLines, String[] headerFields, List<String> factorValueLines) {

    int maxWait = 0;

    if (efoService.isEnabled()) {
        while (!efoService.isOntologyLoaded()) {
            try {
                Thread.sleep(10000);
                if (maxWait++ > 10) {
                    ExperimentalDesignImporterImpl.log.error("EFO is not loaded and gave up waiting");
                    break;
                    // this is okay, we can get by using OntologyTermSimple.
                }
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    Collection<OntologyTerm> terms = ontologyService.getCategoryTerms();
    if (experimentalDesign.getExperimentalFactors() == null) {
        experimentalDesign.setExperimentalFactors(new HashSet<ExperimentalFactor>());
    }

    Map<String, Set<String>> mapFactorSampleValues = this.getMapFactorSampleValues(headerFields,
            factorValueLines);

    for (String experimentalFactorFileLine : experimentalFactorFileLines) {

        // $Run time : Category=EnvironmentalHistory Type=categorical
        String[] experimentalFactorfields = experimentalFactorFileLine.split(":");

        String factorValue = (StringUtils.strip(experimentalFactorfields[0].replaceFirst(
                Pattern.quote(ExperimentalDesignImporterImpl.EXPERIMENTAL_FACTOR_DESCRIPTION_LINE_INDICATOR)
                        + "\\s*",
                ""))).trim();
        String categoryAndType = StringUtils.strip(experimentalFactorfields[1]);
        String[] categoryAndTypeFields = StringUtils.split(categoryAndType);

        // e.g. Category=EnvironmentalHistory
        String category = categoryAndTypeFields[0];
        // e.g. EnvironmentalHistory
        String categoryValue = StringUtils.split(category, "=")[1];

        ExperimentalFactor experimentalFactorFromFile = ExperimentalFactor.Factory.newInstance();
        experimentalFactorFromFile.setExperimentalDesign(experimentalDesign);
        Characteristic vc = this.termForCategoryLookup(categoryValue, terms);

        // e.g. Category=EnvironmentalHistory
        String categoryTypeValue = categoryAndTypeFields[1];
        String factorType = StringUtils.split(categoryTypeValue, "=")[1];

        // vc.setCategory( categoryType );

        experimentalFactorFromFile.setCategory(vc);
        experimentalFactorFromFile.setName(factorValue);
        experimentalFactorFromFile.setDescription(factorValue);
        experimentalFactorFromFile.setType(
                factorType.equalsIgnoreCase("CATEGORICAL") ? FactorType.CATEGORICAL : FactorType.CONTINUOUS);

        this.addFactorValuesToExperimentalFactor(experimentalFactorFromFile, mapFactorSampleValues, factorType);

        if (!this.checkForDuplicateExperimentalFactorOnExperimentalDesign(experimentalDesign,
                experimentalFactorFromFile)) {
            experimentalDesign.getExperimentalFactors().add(experimentalFactorFromFile);
            ExperimentalDesignImporterImpl.log.info("Added " + experimentalFactorFromFile);
        }
    }

}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Add the factor values to the biomaterials
 *
 * @param  experimentBioMaterials Current expression experiment's biomaterials.
 * @param  experimentalDesign     experimental design
 * @param  factorValueLines       Lines from file containing factor values and biomaterial ids
 * @param  headerFields           header fields
 * @return                        Collection of biomaterials associated with this experiment, this is returned as
 *                                the biomaterial is in a
 *                                bioassay (first one retrieved)
 *///from   w w  w  .  jav a  2  s. co  m
private Collection<BioMaterial> addFactorValuesToBioMaterialsInExpressionExperiment(
        Collection<BioMaterial> experimentBioMaterials, ExperimentalDesign experimentalDesign,
        List<String> factorValueLines, String[] headerFields) {
    ExperimentalDesignImporterImpl.log
            .debug("Adding factors values to biomaterials: " + experimentalDesign.getId());
    Collection<ExperimentalFactor> experimentalFactorsInExperiment = experimentalDesign
            .getExperimentalFactors();
    Collection<BioMaterial> biomaterialsWithFactorValuesInExperiment = new HashSet<>();

    Collection<BioMaterial> seenBioMaterials = new HashSet<>();

    Map<ExperimentalFactor, Collection<BioMaterial>> factorsAssociatedWithBioMaterials = new HashMap<>();

    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        String externalId = null;
        boolean hasExternalId = headerFields[1].toUpperCase().equals("EXTERNALID");
        if (hasExternalId) {
            externalId = factorValueFields[1];
        }
        BioMaterial currentBioMaterial = this.getBioMaterialFromExpressionExperiment(experimentBioMaterials,
                factorValueFields[0], externalId);

        if (currentBioMaterial == null) {
            // this could just be due to extras.
            throw new IllegalStateException(
                    "No biomaterial for " + factorValueFields[0] + ", " + factorValueFields[1]);
        }

        if (seenBioMaterials.contains(currentBioMaterial)) {
            throw new IllegalArgumentException(
                    "A biomaterial occurred more than once in the file: " + currentBioMaterial);
        }

        seenBioMaterials.add(currentBioMaterial);

        int start = 1;
        if (hasExternalId) {
            start = 2;
        }

        for (int i = start; i < factorValueFields.length; i++) {
            ExperimentalFactor currentExperimentalFactor = null;
            String currentExperimentalFactorName = StringUtils.strip(headerFields[i]);

            FactorValue currentFactorValue = null;
            String currentFVtext = StringUtils.strip(factorValueFields[i]);

            if (StringUtils.isBlank(currentFVtext)) {
                // Missing value. Note that catching 'NA' etc. is hard, because they could be valid strings.
                continue;
            }

            for (ExperimentalFactor experimentalFactor : experimentalFactorsInExperiment) {
                if (experimentalFactor.getName().equals(currentExperimentalFactorName)) {
                    currentExperimentalFactor = experimentalFactor;
                }
            }

            if (currentExperimentalFactor == null)
                throw new IllegalStateException("No factor matches column " + currentExperimentalFactorName);

            Collection<FactorValue> factorValuesInCurrentExperimentalFactor = currentExperimentalFactor
                    .getFactorValues();

            for (FactorValue factorValue : factorValuesInCurrentExperimentalFactor) {
                String fvv = factorValue.getValue();
                if (StringUtils.isBlank(fvv)) {
                    // try characteristics; this would be a mess if there are more than one.
                    if (factorValue.getCharacteristics().size() == 1) {
                        fvv = factorValue.getCharacteristics().iterator().next().getValue();
                        if (StringUtils.isBlank(fvv)) {
                            continue; // we can't match to factor values that lack a value string.
                        }
                    }

                }

                if (fvv.trim().equalsIgnoreCase(currentFVtext)) {
                    currentFactorValue = factorValue;
                }
            }

            /*
             * If we can't find the factorvalue that matches this, we don't get a value for this biomaterial.
             */
            if (currentFactorValue == null) {
                ExperimentalDesignImporterImpl.log.error("No factor value for " + currentExperimentalFactor
                        + " matches the text value=" + currentFVtext);
            } else {
                if (!this.checkForDuplicateFactorOnBioMaterial(currentBioMaterial, currentFactorValue)) {
                    currentBioMaterial.getFactorValues().add(currentFactorValue);
                }
            }

            ExperimentalDesignImporterImpl.log.debug(
                    "Added factor value " + currentFactorValue + " to biomaterial " + currentBioMaterial);
            biomaterialsWithFactorValuesInExperiment.add(currentBioMaterial);

            if (!factorsAssociatedWithBioMaterials.containsKey(currentExperimentalFactor)) {
                factorsAssociatedWithBioMaterials.put(currentExperimentalFactor, new HashSet<BioMaterial>());
            }
            factorsAssociatedWithBioMaterials.get(currentExperimentalFactor).add(currentBioMaterial);

        }

    }

    /*
     * Check if every biomaterial got used. Worth a warning, at least.
     */
    for (ExperimentalFactor ef : factorsAssociatedWithBioMaterials.keySet()) {
        if (!factorsAssociatedWithBioMaterials.get(ef).containsAll(experimentBioMaterials)) {
            ExperimentalDesignImporterImpl.log.warn(
                    "File did not contain values for all factor - biomaterial combinations: Missing at least one for "
                            + ef + " [populated " + factorsAssociatedWithBioMaterials.get(ef).size() + "/"
                            + experimentBioMaterials.size() + " ]");
        }
    }

    return biomaterialsWithFactorValuesInExperiment;
}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Get a map of experimental values keyed on experimental factor name
 *
 * @param  headerFields     header fields
 * @param  factorValueLines factor value lines
 * @return                  map of experimental factor values keyed on experimental factor
 */// w  ww  . j av  a  2  s .c om
private Map<String, Set<String>> getMapFactorSampleValues(String[] headerFields,
        List<String> factorValueLines) {
    Map<String, Set<String>> factorSampleValues = new HashMap<>();
    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        for (int i = 1; i < headerFields.length; i++) {

            // get the key
            String value = headerFields[i];
            value = StringUtils.strip(value);
            String factorValue = StringUtils.strip(factorValueFields[i]);
            Set<String> listFactorValues = factorSampleValues.get(value);
            if (listFactorValues == null) {
                listFactorValues = new HashSet<>();
            }
            listFactorValues.add(factorValue);
            factorSampleValues.put(value, listFactorValues);

        }

    }
    return factorSampleValues;

}