Example usage for org.apache.commons.lang3 StringUtils splitPreserveAllTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils splitPreserveAllTokens.

Prototype

public static String[] splitPreserveAllTokens(final String str, final String separatorChars)

Source Link

Document

Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators.

Usage

From source file:ubic.gemma.core.loader.pazar.PazarParser.java

@Override
public PazarRecord parseOneLine(String line) {
    if (line == null || line.isEmpty())
        return null;

    if (line.startsWith("TF_PAZAR_ID"))
        return null;

    String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
    if (fields.length < 2)
        return null;

    PazarRecord r = new PazarRecord();

    r.setPazarTfId(StringUtils.strip(fields[0]));
    r.setTfAcc(fields[1]);/*  w  ww .  j  av a  2s  . c  o m*/
    r.setSpecies(fields[2]);
    r.setPazarTargetGeneId(fields[3]);
    r.setTargetGeneAcc(fields[4]);
    r.setProject(fields[6]);
    r.setPubMedId(fields[7]);
    // r.setMethod(fields[8);

    return r;

}

From source file:ubic.gemma.core.loader.protein.biomart.BiomartEnsembleNcbiParser.java

/**
 * Method to parse one biomart line, note that there is a many to many relationship between ensemble ids and entrez
 * gene ids.//from   www  .jav  a  2s  . c o  m
 *
 * @return BioMartEnsembleNcbi Value object representing the line parsed
 */
@Override
public Ensembl2NcbiValueObject parseOneLine(String line) {

    int bioMartFieldsPerRow = this.getBioMartFieldsPerRow();
    // header line from the bioMart headers then ignore it
    if (line.startsWith(this.bioMartHeaderFields[0]) || line.isEmpty()) {
        return null;
    }
    // split the line into the attributes
    String[] fields = StringUtils.splitPreserveAllTokens(line, BiomartEnsembleNcbiParser.FIELD_DELIM);
    // validate that correct format
    if (fields.length != bioMartFieldsPerRow) {
        /*
         * I think we should just continue on. Previous behaviour was to throw an exception.
         */
        return null;
    }
    // create the object
    try {
        return this.createBioMartEnsembleNcbi(fields);

    } catch (NumberFormatException e) {
        throw new FileFormatException(e);
    } catch (FileFormatException e) {
        throw new RuntimeException(e);
    }

}

From source file:ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionFileParser.java

/**
 * Parse a string file line into an array representing the components, on successful validation create a
 * StringProteinProteinInteraction value object.
 *
 * @param line The line to parse/*from  w ww .j a va 2s .c o m*/
 * @return StringProteinProteinInteraction the value object.
 */
@Override
public StringProteinProteinInteraction parseOneLine(String line) {

    // header line skip or empty line
    if (line.startsWith("protein") || line.isEmpty()) {
        return null;
    }

    String[] fields = StringUtils.splitPreserveAllTokens(line,
            StringProteinProteinInteractionFileParser.FIELD_DELIMITER);

    if (fields.length != StringProteinProteinInteractionFileParser.STRING_PROTEIN_PROTEIN_INTERACTION_FIELDS_PER_ROW) {
        log.info("check file format");
        throw new FileFormatException("Line + " + line + " is not in the right format: has " + fields.length
                + " fields, expected "
                + StringProteinProteinInteractionFileParser.STRING_PROTEIN_PROTEIN_INTERACTION_FIELDS_PER_ROW);
    }

    try {
        return this.createStringProteinProteinInteraction(fields);

    } catch (NumberFormatException | FileFormatException e) {
        throw new RuntimeException(e);
    }

}