List of usage examples for org.apache.commons.lang3 StringUtils splitPreserveAllTokens
public static String[] splitPreserveAllTokens(final String str, final String separatorChars)
Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators.
From source file:ubic.gemma.core.loader.pazar.PazarParser.java
@Override public PazarRecord parseOneLine(String line) { if (line == null || line.isEmpty()) return null; if (line.startsWith("TF_PAZAR_ID")) return null; String[] fields = StringUtils.splitPreserveAllTokens(line, '\t'); if (fields.length < 2) return null; PazarRecord r = new PazarRecord(); r.setPazarTfId(StringUtils.strip(fields[0])); r.setTfAcc(fields[1]);/* w ww . j av a 2s . c o m*/ r.setSpecies(fields[2]); r.setPazarTargetGeneId(fields[3]); r.setTargetGeneAcc(fields[4]); r.setProject(fields[6]); r.setPubMedId(fields[7]); // r.setMethod(fields[8); return r; }
From source file:ubic.gemma.core.loader.protein.biomart.BiomartEnsembleNcbiParser.java
/** * Method to parse one biomart line, note that there is a many to many relationship between ensemble ids and entrez * gene ids.//from www .jav a 2s . c o m * * @return BioMartEnsembleNcbi Value object representing the line parsed */ @Override public Ensembl2NcbiValueObject parseOneLine(String line) { int bioMartFieldsPerRow = this.getBioMartFieldsPerRow(); // header line from the bioMart headers then ignore it if (line.startsWith(this.bioMartHeaderFields[0]) || line.isEmpty()) { return null; } // split the line into the attributes String[] fields = StringUtils.splitPreserveAllTokens(line, BiomartEnsembleNcbiParser.FIELD_DELIM); // validate that correct format if (fields.length != bioMartFieldsPerRow) { /* * I think we should just continue on. Previous behaviour was to throw an exception. */ return null; } // create the object try { return this.createBioMartEnsembleNcbi(fields); } catch (NumberFormatException e) { throw new FileFormatException(e); } catch (FileFormatException e) { throw new RuntimeException(e); } }
From source file:ubic.gemma.core.loader.protein.string.StringProteinProteinInteractionFileParser.java
/** * Parse a string file line into an array representing the components, on successful validation create a * StringProteinProteinInteraction value object. * * @param line The line to parse/*from w ww .j a va 2s .c o m*/ * @return StringProteinProteinInteraction the value object. */ @Override public StringProteinProteinInteraction parseOneLine(String line) { // header line skip or empty line if (line.startsWith("protein") || line.isEmpty()) { return null; } String[] fields = StringUtils.splitPreserveAllTokens(line, StringProteinProteinInteractionFileParser.FIELD_DELIMITER); if (fields.length != StringProteinProteinInteractionFileParser.STRING_PROTEIN_PROTEIN_INTERACTION_FIELDS_PER_ROW) { log.info("check file format"); throw new FileFormatException("Line + " + line + " is not in the right format: has " + fields.length + " fields, expected " + StringProteinProteinInteractionFileParser.STRING_PROTEIN_PROTEIN_INTERACTION_FIELDS_PER_ROW); } try { return this.createStringProteinProteinInteraction(fields); } catch (NumberFormatException | FileFormatException e) { throw new RuntimeException(e); } }