Example usage for org.apache.commons.lang3 StringUtils strip

List of usage examples for org.apache.commons.lang3 StringUtils strip

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils strip.

Prototype

public static String strip(final String str) 

Source Link

Document

Strips whitespace from the start and end of a String.

This is similar to #trim(String) but removes whitespace.

Usage

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Validates that the input for experimental factors is correct: Experimental factor file line should be for e.g.
 * #$Run time : Category=EnvironmentalHistory Type=categorical Checks there is a colon, between experimental factor
 * and category and that category is correctly formatted.
 *
 * @param  sampleHeaderLine        Lines in file corresponding to order of experimental factors
 * @param  experimentalFactorLines The lines in the file corresponding to experimental factors.
 * @throws IOException             Experimental factor lines were not correctly format.
 *///from  w w w  .  ja v  a2 s.co m
private void validateExperimentalFactorFileContent(List<String> experimentalFactorLines,
        String sampleHeaderLine) throws IOException {
    Set<String> experimentalFactorValueNames = new HashSet<>();
    // validate experimental factor lines
    for (String line : experimentalFactorLines) {
        String[] fields = line.split(":");
        if (fields.length != 2) {
            throw new IOException(
                    "EF description must have two fields with a single ':' in between (" + line + ")");
        }
        String factorName = StringUtils.strip(fields[0].replaceFirst(
                Pattern.quote(ExperimentalDesignImporterImpl.EXPERIMENTAL_FACTOR_DESCRIPTION_LINE_INDICATOR)
                        + "\\s*",
                ""));

        experimentalFactorValueNames.add(factorName);
        String category = StringUtils.strip(fields[1]);

        String[] descriptions = StringUtils.split(category);

        if (descriptions.length != 2) {
            throw new IOException("EF details should have the format 'Category=CATEGORY Type=TYPE'");
        }

    }

    this.validateSampleHeaderFileContent(experimentalFactorValueNames, experimentalFactorLines.size(),
            sampleHeaderLine);

}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Validates that the sample header is correctly formatted. Checks that the experimental factors defined in the
 * header match those in the experimental factor file lines.
 *
 * @param  experimentalFactorValueNames experimental factor value names
 * @param  numberOfExperimentalFactors  number fo EFs
 * @param  sampleHeaderLine             sample header line
 * @throws IOException                  Validation fails.
 *//*  w w  w  . ja v  a2 s  . co m*/
private void validateSampleHeaderFileContent(Set<String> experimentalFactorValueNames,
        Integer numberOfExperimentalFactors, String sampleHeaderLine) throws IOException {
    String[] headerFields = StringUtils.splitPreserveAllTokens(sampleHeaderLine, "\t");

    // we might have the ids, and the external id.
    if (headerFields.length > numberOfExperimentalFactors
            + ExperimentalDesignImporterImpl.NUMBER_OF_EXTRA_COLUMNS_ALLOWED) {
        throw new IOException("Expected "
                + (numberOfExperimentalFactors + ExperimentalDesignImporterImpl.NUMBER_OF_EXTRA_COLUMNS_ALLOWED)
                + " columns based on EF descriptions (plus id column), got " + headerFields.length);
    }

    for (int i = 1; i < headerFields.length; i++) {

        String value = headerFields[i];

        value = StringUtils.strip(value);

        if (value.equals("ExternalID")) {
            // that's fine.
            continue;
        }

        if (!experimentalFactorValueNames.contains(value)) {
            throw new IOException("Expected to find an EF matching the column heading '" + value + "'");
        }

    }

}

From source file:ubic.gemma.core.loader.genome.FastaParser.java

/**
 * <pre>//from   w ww . j a  v  a 2 s  . c o m
 *        Affymetrix targets or collapsed sequence     target:array:probeset;
 *        Affymetrix &quot;style&quot; file            target:probename
 *        Affymetrix probe                             probe:array:probeset:xcoord:ycoord; Interrogation_Position=XXXX; Antisense;
 *        Affymetrix consensus/exemplar                exemplar:array:probeset; gb|accession; gb:accession /DEF=Homo sapiens metalloprotease-like, disintegrin-like, cysteine-rich protein 2 delta (ADAM22) mRNA, alternative splice product, complete cds.  /FEA=mRNA /GEN=ADAM22 /PROD=metalloprotease-like,
 *        Affymetrix-like format                       array:probe or other string containing ':'.
 * </pre>
 *
 * @param bioSequence BA
 * @param header header
 * @return boolean always true
 */
@SuppressWarnings("SameReturnValue") // Consistency with other similar methods
private boolean parseAffyHeader(BioSequence bioSequence, String header) {
    // affymetrix format
    String[] split = StringUtils.split(header, ":;");

    String firstTag = StringUtils.removeStart(split[0], ">");
    switch (firstTag) {
    case "probe":
        bioSequence.setName(split[1] + ":" + split[2] + ":" + split[3] + ":" + split[4]);
        break;
    case "target":
        // split[1] = array name or probe name
        // split[2] = probe name
        if (split.length > 2) {
            bioSequence.setName(split[2]);
        } else {
            bioSequence.setName(split[1]);
        }

        break;
    case "exemplar":
        bioSequence.setName(split[1] + ":" + split[2]);
        bioSequence.setDescription(split[3]);
        break;
    default:
        // This is the case if the xxxx:xxxx format is used on non-affy
        bioSequence.setName(StringUtils.removeStart(header, ">"));
        return true;
    }

    for (String string : split) {

        string = StringUtils.strip(string);

        // fill in the sequence database entry
        if (string.startsWith("gb|") || string.startsWith("gb:")) {
            String[] splits = StringUtils.split(string, ":|");
            String genbankAcc = splits[1];
            DatabaseEntry genbank = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);
            bioSequence.setName(genbank.getAccession());
            bioSequence.setSequenceDatabaseEntry(genbank);
            if (RecordParser.log.isDebugEnabled())
                RecordParser.log.debug("Got genbank accession " + genbankAcc + " for " + bioSequence.getName());
            break;
        }

    }
    return true;
}

From source file:ubic.gemma.core.loader.genome.taxon.TaxonParser.java

@Override
public Taxon parseOneLine(String line) {
    String[] fields = StringUtils.splitPreserveAllTokens(line, '|');

    int ncbiid = Integer.parseInt(StringUtils.strip(fields[0]));

    if (!results.containsKey(ncbiid)) {
        Taxon t = Taxon.Factory.newInstance();
        t.setNcbiId(ncbiid);/*w  w w  .  j  a v a 2s.  c om*/
        t.setIsGenesUsable(false);
        results.put(ncbiid, t);
    }

    String tag = StringUtils.strip(fields[3]);
    if (tag.equals("scientific name")) {
        results.get(ncbiid).setScientificName(StringUtils.strip(fields[1]));
    } else if (tag.equals("genbank common name")) {
        results.get(ncbiid).setCommonName(fields[1]);
    }

    return results.get(ncbiid);

}

From source file:ubic.gemma.core.loader.pazar.PazarParser.java

@Override
public PazarRecord parseOneLine(String line) {
    if (line == null || line.isEmpty())
        return null;

    if (line.startsWith("TF_PAZAR_ID"))
        return null;

    String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
    if (fields.length < 2)
        return null;

    PazarRecord r = new PazarRecord();

    r.setPazarTfId(StringUtils.strip(fields[0]));
    r.setTfAcc(fields[1]);/*  w  w  w  . j  ava  2 s  . co m*/
    r.setSpecies(fields[2]);
    r.setPazarTargetGeneId(fields[3]);
    r.setTargetGeneAcc(fields[4]);
    r.setProject(fields[6]);
    r.setPubMedId(fields[7]);
    // r.setMethod(fields[8);

    return r;

}

From source file:ubic.gemma.core.search.GeneSetSearchImpl.java

@Override
public GeneSet findByGoId(String goId, Taxon taxon) {
    OntologyTerm goTerm = geneOntologyService.getTermForId(StringUtils.strip(goId));

    if (goTerm == null) {
        return null;
    }//from   w ww.j  a  v a  2s  .c o m
    // if taxon is null, this returns a geneset with genes from different taxons
    return this.goTermToGeneSet(goTerm, taxon);
}

From source file:ubic.gemma.core.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSet> findByGoTermName(String goTermName, Taxon taxon, Integer maxGoTermsProcessed,
        Integer maxGeneSetSize) {
    Collection<? extends OntologyResource> matches = this.geneOntologyService
            .findTerm(StringUtils.strip(goTermName));

    Collection<GeneSet> results = new HashSet<>();

    for (OntologyResource t : matches) {
        assert t instanceof OntologyTerm;

        if (taxon == null) {
            Collection<GeneSet> sets = this.goTermToGeneSets((OntologyTerm) t, maxGeneSetSize);
            results.addAll(sets);/*w w w.  java  2 s  .  com*/

            // noinspection StatementWithEmptyBody // FIXME should we count each species as one go?
            if (maxGoTermsProcessed != null && results.size() > maxGoTermsProcessed) {
                // return results;
            }
        } else {

            GeneSet converted = this.goTermToGeneSet(t, taxon, maxGeneSetSize);
            // converted will be null if its size is more than maxGeneSetSize
            if (converted != null) {
                results.add(converted);

            }
        }

        if (maxGoTermsProcessed != null && results.size() > maxGoTermsProcessed) {
            return results;
        }
    }

    return results;

}

From source file:ubic.gemma.core.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSet> findByName(String name) {
    return geneSetService.findByName(StringUtils.strip(name));
}

From source file:ubic.gemma.core.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSet> findByName(String name, Taxon taxon) {
    return geneSetService.findByName(StringUtils.strip(name), taxon);
}

From source file:ubic.gemma.core.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSetValueObject> findByPhenotypeName(String phenotypeQuery, Taxon taxon) {

    StopWatch timer = new StopWatch();
    timer.start();/*  w  w w .j a v a2  s  .c  o  m*/
    Collection<CharacteristicValueObject> phenotypes = phenotypeAssociationManagerService
            .searchOntologyForPhenotypes(StringUtils.strip(phenotypeQuery), null);

    Collection<GeneSetValueObject> results = new HashSet<>();

    if (phenotypes.isEmpty()) {
        return results;
    }

    if (timer.getTime() > 200) {
        GeneSetSearchImpl.log.info("Find phenotypes: " + timer.getTime() + "ms");
    }

    GeneSetSearchImpl.log.debug(" Converting CharacteristicValueObjects collection(size:" + phenotypes.size()
            + ") into GeneSets for  phenotype query " + phenotypeQuery);
    Map<String, CharacteristicValueObject> uris = new HashMap<>();
    for (CharacteristicValueObject cvo : phenotypes) {
        uris.put(cvo.getValueUri(), cvo);
    }

    Map<String, Collection<? extends GeneValueObject>> genes = phenotypeAssociationManagerService
            .findCandidateGenesForEach(uris.keySet(), taxon);

    if (timer.getTime() > 500) {
        GeneSetSearchImpl.log.info("Find phenotype genes done at " + timer.getTime() + "ms");
    }

    for (String uri : genes.keySet()) {

        Collection<? extends GeneValueObject> gvos = genes.get(uri);

        if (gvos.isEmpty())
            continue;

        Collection<Long> geneIds = EntityUtils.getIds(gvos);

        GeneSetValueObject transientGeneSet = new GeneSetValueObject();

        transientGeneSet.setName(this.uri2phenoID(uris.get(uri)));
        transientGeneSet.setDescription(uris.get(uri).getValue());
        transientGeneSet.setGeneIds(geneIds);

        transientGeneSet.setTaxonId(gvos.iterator().next().getTaxonId());
        transientGeneSet.setTaxonName(gvos.iterator().next().getTaxonCommonName());

        results.add(transientGeneSet);

    }

    if (timer.getTime() > 1000) {
        GeneSetSearchImpl.log.info("Loaded " + phenotypes.size() + " phenotype gene sets for query "
                + phenotypeQuery + " in " + timer.getTime() + "ms");
    }
    return results;

}