Example usage for org.apache.commons.lang3 StringUtils splitPreserveAllTokens

List of usage examples for org.apache.commons.lang3 StringUtils splitPreserveAllTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils splitPreserveAllTokens.

Prototype

public static String[] splitPreserveAllTokens(final String str, final String separatorChars) 

Source Link

Document

Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators.

Usage

From source file:ubic.gemma.core.analysis.service.ArrayDesignAnnotationServiceImpl.java

private static Map<Long, Collection<Gene>> parseAnnotationFile(Map<Long, Collection<Gene>> results,
        InputStream is, Map<String, Long> probeNameToId) {
    try {//from ww w.  j a v  a 2 s . co  m

        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line;

        while ((line = br.readLine()) != null) {
            if (StringUtils.isBlank(line)
                    || line.startsWith(ArrayDesignAnnotationServiceImpl.COMMENT_CHARACTER)) {
                continue;
            }
            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');

            if (fields.length < 3)
                continue; // means there are no gene annotations.

            String probeName = fields[0];

            if (!probeNameToId.containsKey(probeName))
                continue;
            Long probeId = probeNameToId.get(probeName);

            List<String> geneSymbols = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[1], '|'));
            List<String> geneNames = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[2], '|'));

            if (geneSymbols.size() != geneNames.size()) {
                ArrayDesignAnnotationServiceImpl.log.warn(
                        "Annotation file format error: Unequal number of gene symbols and names for probe="
                                + probeName + ", skipping row");
                continue;
            }

            List<String> gemmaGeneIds = null;
            List<String> ncbiIds = null;

            if (fields.length > 4) { // new style. fields[3] is the GO annotations.
                gemmaGeneIds = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[4], '|'));
            }
            if (fields.length > 5) {
                ncbiIds = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[5], '|'));
            }

            for (int i = 0; i < geneSymbols.size(); i++) {

                String symbol = geneSymbols.get(i);
                String name = geneNames.get(i);

                if (StringUtils.isBlank(symbol)) {
                    continue;
                }

                String[] symbolsB = StringUtils.split(symbol, ',');
                String[] namesB = StringUtils.split(name, '$');

                for (int j = 0; j < symbolsB.length; j++) {

                    String s = symbolsB[j];

                    Gene g = Gene.Factory.newInstance();
                    g.setOfficialSymbol(s);

                    try {
                        if (gemmaGeneIds != null) {
                            g.setId(Long.parseLong(gemmaGeneIds.get(j)));
                        }

                        if (ncbiIds != null) {
                            g.setNcbiGeneId(Integer.parseInt(ncbiIds.get(j)));
                        }
                    } catch (NumberFormatException e) {
                        // oh well, couldn't populate extra info.
                    }

                    if (namesB.length >= j + 1) {
                        String n = namesB[j];
                        g.setName(n);
                    }

                    results.get(probeId).add(g);
                }
            }
        }

        return results;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:ubic.gemma.core.loader.association.NCBIGene2GOAssociationParser.java

/**
 * Note that "-" means a missing value, which in practice only occurs in the "qualifier" and "pubmed" columns.
 *
 * @param  line line//from   w w  w.  j  a  v a2 s .com
 * @return      Object
 */
@SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use
public Gene2GOAssociation mapFromGene2GO(String line) {

    String[] values = StringUtils.splitPreserveAllTokens(line, "\t");

    if (line.startsWith(NCBIGene2GOAssociationParser.COMMENT_INDICATOR))
        return null;

    if (values.length < 8)
        return null;

    Integer taxonId;
    try {
        taxonId = Integer.parseInt(values[TAX_ID]);
    } catch (NumberFormatException e) {
        throw new RuntimeException(e);
    }

    if (!taxaNcbiIds.containsKey(taxonId)) {
        return null;
    }

    Gene gene = Gene.Factory.newInstance();
    gene.setNcbiGeneId(Integer.parseInt(values[GENE_ID]));

    gene.setTaxon(taxaNcbiIds.get(taxonId));
    Characteristic oe = Characteristic.Factory.newInstance();
    String value = values[GO_ID].replace(":", "_");
    oe.setValueUri(GeneOntologyService.BASE_GO_URI + value);
    oe.setValue(value);

    // g2GOAss.setSource( ncbiGeneDb );

    GOEvidenceCode evcode = null;
    String evidenceCode = values[EVIDENCE_CODE];

    if (!(StringUtils.isBlank(evidenceCode) || evidenceCode.equals("-"))) {

        if (NCBIGene2GOAssociationParser.ignoredEvidenceCodes.contains(evidenceCode)) {
            return null;
        }

        evcode = GOEvidenceCode.fromString(evidenceCode);

    }
    Gene2GOAssociation g2GOAss = Gene2GOAssociation.Factory.newInstance(gene, oe, evcode);

    try {
        queue.put(g2GOAss);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    return g2GOAss;
}

From source file:ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignParser.java

@Override
public ArrayDesign parseOneLine(String line) {
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
    ad.setName(fields[0]);/*  w w  w.  j  a  v a  2s.  co  m*/
    ad.setDescription(fields[5]);

    Taxon t = Taxon.Factory.newInstance();
    t.setCommonName(fields[4].toLowerCase());
    t.setIsGenesUsable(true); // assumption
    ad.setPrimaryTaxon(t);

    Contact manufacturer = Contact.Factory.newInstance();
    manufacturer.setName(fields[1]);
    ad.setDesignProvider(manufacturer);

    ad.setAdvertisedNumberOfDesignElements(Integer.parseInt(fields[4]));
    return ad;
}

From source file:ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignProbeMapperServiceImpl.java

@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB,
        boolean ncbiIds) throws IOException {

    if (taxon == null && !ncbiIds) {
        throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
    }//from ww  w .j  a  v  a  2 s . co  m

    if (arrayDesign.getTechnologyType().equals(TechnologyType.GENELIST)
            || arrayDesign.getTechnologyType().equals(TechnologyType.SEQUENCING)
            || arrayDesign.getTechnologyType().equals(TechnologyType.OTHER)) {
        throw new IllegalArgumentException(
                "Do not use this service to process platforms that do not use an probe-based technology.");
    }

    try (BufferedReader b = new BufferedReader(new FileReader(source))) {
        String line;
        int numSkipped = 0;

        ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
        arrayDesignService.deleteGeneProductAssociations(arrayDesign);

        while ((line = b.readLine()) != null) {

            if (StringUtils.isBlank(line)) {
                continue;
            }
            if (line.startsWith("#")) {
                continue;
            }

            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
            if (fields.length != 3) {
                throw new IOException("Illegal format, expected three columns, got " + fields.length);
            }

            String probeId = fields[0];
            String seqName = fields[1];

            /*
             * FIXME. We have to allow NCBI gene ids here.
             */
            String geneSymbol = fields[2];

            if (StringUtils.isBlank(geneSymbol)) {
                numSkipped++;
                continue;
            }

            CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);

            if (c == null) {
                if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
                    ArrayDesignProbeMapperServiceImpl.log
                            .debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
                numSkipped++;
                continue;
            }

            // a probe can have more than one gene associated with it if so they are piped |
            Collection<Gene> geneListProbe = new HashSet<>();

            // indicate multiple genes
            Gene geneDetails;

            StringTokenizer st = new StringTokenizer(geneSymbol, "|");
            while (st.hasMoreTokens()) {
                String geneToken = st.nextToken().trim();
                if (ncbiIds) {
                    geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
                } else {
                    geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
                }
                if (geneDetails != null) {
                    geneListProbe.add(geneDetails);
                }
            }

            if (geneListProbe.size() == 0) {
                ArrayDesignProbeMapperServiceImpl.log
                        .warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
                numSkipped++;
                continue;
            } else if (geneListProbe.size() > 1) {
                // this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
                // make a
                // lot of fuss.
                ArrayDesignProbeMapperServiceImpl.log
                        .debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
            }

            BioSequence bs = c.getBiologicalCharacteristic();

            if (bs != null) {
                if (StringUtils.isNotBlank(seqName)) {
                    bs = bioSequenceService.thaw(bs);
                    if (!bs.getName().equals(seqName)) {
                        ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for "
                                + probeId + " does not match existing entry " + bs.getName() + ", skipping");
                        numSkipped++;
                        continue;
                    }

                }
                // otherwise we assume everything is okay.
            } else {
                // create one based on the text provided.
                if (StringUtils.isBlank(seqName)) {
                    ArrayDesignProbeMapperServiceImpl.log.warn(
                            "You must provide sequence names for probes which are not already mapped. probeName="
                                    + probeId + " had no sequence associated and no name provided; skipping");
                    numSkipped++;
                    continue;
                }

                bs = BioSequence.Factory.newInstance();
                bs.setName(seqName);
                bs.setTaxon(taxon);
                bs.setDescription("Imported from annotation file");

                // Placeholder.
                bs.setType(SequenceType.OTHER);

                bs = bioSequenceService.create(bs);

                c.setBiologicalCharacteristic(bs);

                compositeSequenceService.update(c);
            }

            assert bs != null;
            assert bs.getId() != null;
            for (Gene gene : geneListProbe) {
                gene = geneService.thaw(gene);
                if (gene.getProducts().size() == 0) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene
                            + ", it cannot be mapped to probes. Skipping");
                    numSkipped++;
                    continue;
                }
                for (GeneProduct gp : gene.getProducts()) {
                    AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
                    association.setBioSequence(bs);
                    association.setGeneProduct(gp);
                    association.setSource(sourceDB);
                    annotationAssociationService.create(association);
                }

            }

        }

        arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());

        this.deleteOldFiles(arrayDesign);

        ArrayDesignProbeMapperServiceImpl.log.info(
                "Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
    }
}

From source file:ubic.gemma.core.loader.expression.arrayDesign.CompositeSequenceParser.java

@Override
public CompositeSequence parseOneLine(String line) {
    String[] tokens = StringUtils.splitPreserveAllTokens(line, '\t');

    if (tokens.length != 3) {
        return null;
    }// w  w  w  .j  a v  a  2  s  .co m

    String probeid = tokens[0];
    String genbankAcc = tokens[1];
    String description = tokens[2];

    CompositeSequence result = CompositeSequence.Factory.newInstance();
    result.setName(probeid);
    result.setDescription(description);

    DatabaseEntry dbEntry = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);

    BioSequence biologicalCharacteristic = BioSequence.Factory.newInstance();
    biologicalCharacteristic.setName(genbankAcc); // this will be changed later, typically.

    // this will be changed later, typically.
    biologicalCharacteristic.setDescription(description + " (From platform source)");

    biologicalCharacteristic.setSequenceDatabaseEntry(dbEntry);

    result.setBiologicalCharacteristic(biologicalCharacteristic);

    return result;

}

From source file:ubic.gemma.core.loader.expression.geo.GeoFamilyParser.java

/**
 * If a line does not have the same number of fields as the column headings, it is skipped.
 *
 * @param line line/*  w  w w .  j a va 2s . c  o  m*/
 */
private void parsePlatformLine(String line) {

    if (!haveReadPlatformHeader) {
        haveReadPlatformHeader = true;
        return;
    }
    GeoPlatform currentPlatform = results.getPlatformMap().get(currentPlatformAccession);
    assert currentPlatform != null;

    /*
     * Skip platform information when it is not going to be usable, unless we are ONLY parsing a platform.
     */
    // Actually this isn't as important, since we filter out bad elements.
    // if ( !processPlatformsOnly && !currentPlatform.useDataFromGeo() ) {
    // return;
    // }

    String[] tokens = StringUtils.splitPreserveAllTokens(line, GeoFamilyParser.FIELD_DELIM);

    List<String> columnNames = currentPlatform.getColumnNames();
    int numColumns = columnNames.size();

    if (numColumns != tokens.length && numWarnings < GeoFamilyParser.MAX_WARNINGS) {
        GeoFamilyParser.log.warn("Wrong number of tokens in line (" + tokens.length + ", expected " + numColumns
                + "), line was '" + line + "'; Possible corrupt file or invalid format?");
        numWarnings++;
        if (numWarnings == GeoFamilyParser.MAX_WARNINGS) {
            GeoFamilyParser.log.warn("Further warnings suppressed");
        }

        return;
    }

    for (int i = 0; i < tokens.length; i++) {
        String token = tokens[i];
        String columnName = columnNames.get(i);
        currentPlatform.addToColumnData(columnName, token);
    }
    platformLines++;
}

From source file:ubic.gemma.core.loader.expression.geo.GeoFamilyParser.java

/**
 * The data for one sample is all the values for each quantitation type.
 * Important implementation note: In the sample table sections of GSEXXX_family files, the first column is always
 * ID_REF, according to the kind folks at NCBI. If this changes, this code will BREAK.
 * Similarly, the column names between the different samples are not necessarily the same, but we trust that they
 * all refer to the same quantitation types in the same order, for a given platform. That is, the nth column for
 * this sample 'means' the same thing as the nth column for another sample in this series (on the same platform). If
 * that isn't true, this will be BROKEN. However, we do try to sort it out if we can.
 *
 * @param line line//  www . jav a  2s .  c  om
 */
private void parseSampleDataLine(String line) {

    if (StringUtils.isBlank(line))
        return;

    if (!haveReadSampleDataHeader) {
        haveReadSampleDataHeader = true;
        previousNumTokens = null;
        this.initializeQuantitationTypes();
        return;
    }

    GeoSample sample = results.getSampleMap().get(currentSampleAccession);

    /*
     * skip this step if it's not a supported platform type (RNA-seq, exon arrays: we put the data in later)
     */
    if (!sample.hasUsableData()) {
        return;
    }

    String[] tokens = StringUtils.splitPreserveAllTokens(line, GeoFamilyParser.FIELD_DELIM);

    assert tokens != null;

    /*
     * This can happen in some files that are mildly corrupted. -- we have to ignore it.
     */
    if (tokens.length <= 1 && numWarnings < GeoFamilyParser.MAX_WARNINGS) {
        GeoFamilyParser.log.error("Parse error, sample data line has too few elements (" + tokens.length
                + "), line was '" + line + "'");
        numWarnings++;
        if (numWarnings == GeoFamilyParser.MAX_WARNINGS) {
            GeoFamilyParser.log.warn("Further warnings suppressed");
        }
        return;
    }

    if (previousNumTokens != null && tokens.length != previousNumTokens) {
        GeoFamilyParser.log.warn("Last line had " + (previousNumTokens - 1)
                + " quantitation types, this one has " + (tokens.length - 1));
    }

    previousNumTokens = tokens.length;

    if (results.getSeriesMap().get(currentSeriesAccession) == null) {
        return; // this happens if we are parsing a GPL file.
    }

    GeoPlatform platformForSample = sample.getPlatforms().iterator().next(); // slow

    GeoValues values = results.getSeriesMap().get(currentSeriesAccession).getValues();

    String designElement = tokens[0]; // ID_REF. For bug 1709, adding toLower() will fix this.
    Map<Integer, Integer> map = quantitationTypeTargetColumn.get(platformForSample);

    for (int i = 1; i < tokens.length; i++) {
        String value = tokens[i];
        int qtIndex = i - 1;

        /*
         * This map tells us which column this quantitation type is SUPPOSED to go in.
         */

        if (map.containsKey(qtIndex))
            qtIndex = map.get(qtIndex);
        if (!this.isWantedQuantitationType(qtIndex)) {
            continue;
        }

        if (GeoFamilyParser.log.isTraceEnabled()) {
            GeoFamilyParser.log
                    .trace("Adding: " + value + " to  quantitationType " + (qtIndex) + " for " + designElement);
        }
        values.addValue(sample, qtIndex, designElement, value);
        processedDesignElements.add(designElement);
    }

    sampleDataLines++;
}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

@Override
@Transactional//from w ww .  ja  v  a 2  s . c  o  m
public void importDesign(ExpressionExperiment experiment, InputStream is) throws IOException {
    this.efoService = this.ontologyService.getExperimentalFactorOntologyService();

    ExperimentalDesignImporterImpl.log.debug("Parsing input file");
    boolean readHeader = false;

    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    String line;

    //    experiment = expressionExperimentService.thawBioAssays( experiment );
    ExperimentalDesign experimentalDesign = experiment.getExperimentalDesign();

    if (!experimentalDesign.getExperimentalFactors().isEmpty()) {
        ExperimentalDesignImporterImpl.log
                .warn("Experimental design already has factors, import will add new ones");
    }

    experimentalDesign.setDescription("Parsed from file.");

    List<String> experimentalFactorLines = new ArrayList<>();
    String sampleHeaderLine = "";
    List<String> factorValueLines = new ArrayList<>();

    while ((line = r.readLine()) != null) {
        if (line.startsWith(ExperimentalDesignImporterImpl.EXPERIMENTAL_FACTOR_DESCRIPTION_LINE_INDICATOR)) {
            experimentalFactorLines.add(line);
        } else if (line.startsWith("#") || StringUtils.isBlank(line)) {
            //noinspection UnnecessaryContinue // Better for readability
            continue;
        } else if (!readHeader) {
            sampleHeaderLine = line;
            readHeader = true;
        } else {
            factorValueLines.add(line);
        }
    }
    String[] headerFields = StringUtils.splitPreserveAllTokens(sampleHeaderLine, "\t");

    Collection<BioMaterial> experimentBioMaterials = this.bioMaterialService.findByExperiment(experiment);

    this.validateFileComponents(experimentalFactorLines, sampleHeaderLine, factorValueLines);
    this.validateExperimentalFactorFileContent(experimentalFactorLines, sampleHeaderLine);
    this.validateFactorFileContent(experimentalFactorLines.size(), factorValueLines);
    this.validateBioMaterialFileContent(experimentBioMaterials, factorValueLines);

    // build up the composite: create experimental factor then add the experimental value
    this.addExperimentalFactorsToExperimentalDesign(experimentalDesign, experimentalFactorLines, headerFields,
            factorValueLines);

    assert !experimentalDesign.getExperimentalFactors().isEmpty();
    assert !experiment.getExperimentalDesign().getExperimentalFactors().isEmpty();

    experimentalDesignService.update(experimentalDesign);

    Collection<BioMaterial> bioMaterialsWithFactorValues = this
            .addFactorValuesToBioMaterialsInExpressionExperiment(experimentBioMaterials, experimentalDesign,
                    factorValueLines, headerFields);

    for (BioMaterial bioMaterial : bioMaterialsWithFactorValues) {
        this.bioMaterialService.update(bioMaterial);
    }

}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Add the factor values to the biomaterials
 *
 * @param  experimentBioMaterials Current expression experiment's biomaterials.
 * @param  experimentalDesign     experimental design
 * @param  factorValueLines       Lines from file containing factor values and biomaterial ids
 * @param  headerFields           header fields
 * @return                        Collection of biomaterials associated with this experiment, this is returned as
 *                                the biomaterial is in a
 *                                bioassay (first one retrieved)
 *//*from  w w w. j a  va2s.c  o  m*/
private Collection<BioMaterial> addFactorValuesToBioMaterialsInExpressionExperiment(
        Collection<BioMaterial> experimentBioMaterials, ExperimentalDesign experimentalDesign,
        List<String> factorValueLines, String[] headerFields) {
    ExperimentalDesignImporterImpl.log
            .debug("Adding factors values to biomaterials: " + experimentalDesign.getId());
    Collection<ExperimentalFactor> experimentalFactorsInExperiment = experimentalDesign
            .getExperimentalFactors();
    Collection<BioMaterial> biomaterialsWithFactorValuesInExperiment = new HashSet<>();

    Collection<BioMaterial> seenBioMaterials = new HashSet<>();

    Map<ExperimentalFactor, Collection<BioMaterial>> factorsAssociatedWithBioMaterials = new HashMap<>();

    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        String externalId = null;
        boolean hasExternalId = headerFields[1].toUpperCase().equals("EXTERNALID");
        if (hasExternalId) {
            externalId = factorValueFields[1];
        }
        BioMaterial currentBioMaterial = this.getBioMaterialFromExpressionExperiment(experimentBioMaterials,
                factorValueFields[0], externalId);

        if (currentBioMaterial == null) {
            // this could just be due to extras.
            throw new IllegalStateException(
                    "No biomaterial for " + factorValueFields[0] + ", " + factorValueFields[1]);
        }

        if (seenBioMaterials.contains(currentBioMaterial)) {
            throw new IllegalArgumentException(
                    "A biomaterial occurred more than once in the file: " + currentBioMaterial);
        }

        seenBioMaterials.add(currentBioMaterial);

        int start = 1;
        if (hasExternalId) {
            start = 2;
        }

        for (int i = start; i < factorValueFields.length; i++) {
            ExperimentalFactor currentExperimentalFactor = null;
            String currentExperimentalFactorName = StringUtils.strip(headerFields[i]);

            FactorValue currentFactorValue = null;
            String currentFVtext = StringUtils.strip(factorValueFields[i]);

            if (StringUtils.isBlank(currentFVtext)) {
                // Missing value. Note that catching 'NA' etc. is hard, because they could be valid strings.
                continue;
            }

            for (ExperimentalFactor experimentalFactor : experimentalFactorsInExperiment) {
                if (experimentalFactor.getName().equals(currentExperimentalFactorName)) {
                    currentExperimentalFactor = experimentalFactor;
                }
            }

            if (currentExperimentalFactor == null)
                throw new IllegalStateException("No factor matches column " + currentExperimentalFactorName);

            Collection<FactorValue> factorValuesInCurrentExperimentalFactor = currentExperimentalFactor
                    .getFactorValues();

            for (FactorValue factorValue : factorValuesInCurrentExperimentalFactor) {
                String fvv = factorValue.getValue();
                if (StringUtils.isBlank(fvv)) {
                    // try characteristics; this would be a mess if there are more than one.
                    if (factorValue.getCharacteristics().size() == 1) {
                        fvv = factorValue.getCharacteristics().iterator().next().getValue();
                        if (StringUtils.isBlank(fvv)) {
                            continue; // we can't match to factor values that lack a value string.
                        }
                    }

                }

                if (fvv.trim().equalsIgnoreCase(currentFVtext)) {
                    currentFactorValue = factorValue;
                }
            }

            /*
             * If we can't find the factorvalue that matches this, we don't get a value for this biomaterial.
             */
            if (currentFactorValue == null) {
                ExperimentalDesignImporterImpl.log.error("No factor value for " + currentExperimentalFactor
                        + " matches the text value=" + currentFVtext);
            } else {
                if (!this.checkForDuplicateFactorOnBioMaterial(currentBioMaterial, currentFactorValue)) {
                    currentBioMaterial.getFactorValues().add(currentFactorValue);
                }
            }

            ExperimentalDesignImporterImpl.log.debug(
                    "Added factor value " + currentFactorValue + " to biomaterial " + currentBioMaterial);
            biomaterialsWithFactorValuesInExperiment.add(currentBioMaterial);

            if (!factorsAssociatedWithBioMaterials.containsKey(currentExperimentalFactor)) {
                factorsAssociatedWithBioMaterials.put(currentExperimentalFactor, new HashSet<BioMaterial>());
            }
            factorsAssociatedWithBioMaterials.get(currentExperimentalFactor).add(currentBioMaterial);

        }

    }

    /*
     * Check if every biomaterial got used. Worth a warning, at least.
     */
    for (ExperimentalFactor ef : factorsAssociatedWithBioMaterials.keySet()) {
        if (!factorsAssociatedWithBioMaterials.get(ef).containsAll(experimentBioMaterials)) {
            ExperimentalDesignImporterImpl.log.warn(
                    "File did not contain values for all factor - biomaterial combinations: Missing at least one for "
                            + ef + " [populated " + factorsAssociatedWithBioMaterials.get(ef).size() + "/"
                            + experimentBioMaterials.size() + " ]");
        }
    }

    return biomaterialsWithFactorValuesInExperiment;
}

From source file:ubic.gemma.core.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Get a map of experimental values keyed on experimental factor name
 *
 * @param  headerFields     header fields
 * @param  factorValueLines factor value lines
 * @return                  map of experimental factor values keyed on experimental factor
 *///from w  w w  .  j  a v  a  2s.c  om
private Map<String, Set<String>> getMapFactorSampleValues(String[] headerFields,
        List<String> factorValueLines) {
    Map<String, Set<String>> factorSampleValues = new HashMap<>();
    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        for (int i = 1; i < headerFields.length; i++) {

            // get the key
            String value = headerFields[i];
            value = StringUtils.strip(value);
            String factorValue = StringUtils.strip(factorValueFields[i]);
            Set<String> listFactorValues = factorSampleValues.get(value);
            if (listFactorValues == null) {
                listFactorValues = new HashSet<>();
            }
            listFactorValues.add(factorValue);
            factorSampleValues.put(value, listFactorValues);

        }

    }
    return factorSampleValues;

}