Java tutorial
/* * The Gemma project * * Copyright (c) 2013 University of British Columbia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package ubic.gemma.core.analysis.preprocess; import cern.colt.list.DoubleArrayList; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.basecode.io.ByteArrayConverter; import ubic.basecode.math.DescriptiveWithMissing; import ubic.basecode.math.Rank; import ubic.gemma.core.datastructure.matrix.*; import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType; import ubic.gemma.model.common.auditAndSecurity.eventType.ProcessedVectorComputationEvent; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorDao; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.bioAssayData.RawExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import java.util.*; /** * Transactional methods. * * @author Paul * @see ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService */ @Service public class ProcessedExpressionDataVectorCreateHelperServiceImpl implements ProcessedExpressionDataVectorCreateHelperService { private static final Log log = LogFactory.getLog(ProcessedExpressionDataVectorCreateHelperServiceImpl.class); @Autowired private ExpressionExperimentService eeService; @Autowired private AuditTrailService auditTrailService; @Autowired private BioAssayDimensionService bioAssayDimensionService; @Autowired private ExpressionExperimentDao expressionExperimentDao; @Autowired private RawExpressionDataVectorService rawExpressionDataVectorService; @Autowired private QuantitationTypeService quantitationTypeService; @Autowired private ProcessedExpressionDataVectorService processedExpressionDataVectorService; @SuppressWarnings("unchecked") @Override @Transactional public ExpressionExperiment createProcessedDataVectors(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> vecs) { // assumption: all the same QT. Further assumption: bioassaydimension already persistent. QuantitationType qt = vecs.iterator().next().getQuantitationType(); qt = quantitationTypeService.create(qt); for (ProcessedExpressionDataVector v : vecs) { v.setQuantitationType(qt); } ee.getProcessedExpressionDataVectors().clear(); ee.getProcessedExpressionDataVectors().addAll(vecs); expressionExperimentDao.update(ee); assert ee.getNumberOfDataVectors() != null; this.audit(ee); return ee; } @Override @Transactional public ExpressionExperiment createProcessedExpressionData(ExpressionExperiment ee) { ee = processedExpressionDataVectorService.createProcessedDataVectors(ee); assert ee.getNumberOfDataVectors() != null; this.audit(ee); return ee; } @Override @Transactional public void reorderByDesign(Long eeId) { ExpressionExperiment ee = expressionExperimentDao.load(eeId); if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info(ee.getShortName() + " does not have a populated experimental design, skipping"); return; } Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors(); if (processedDataVectors.size() == 0) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info(ee.getShortName() + " does not have processed data"); return; } Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee); if (dims.size() > 1) { this.checkAllBioAssayDimensionsMatch(dims); } BioAssayDimension bioassaydim = dims.iterator().next(); List<BioMaterial> start = new ArrayList<>(); for (BioAssay ba : bioassaydim.getBioAssays()) { start.add(ba.getSampleUsed()); } /* * Get the ordering we want. */ List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort .orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors()); /* * Map of biomaterials to the new order index. */ final Map<BioMaterial, Integer> ordering = new HashMap<>(); int i = 0; for (BioMaterial bioMaterial : orderByExperimentalDesign) { ordering.put(bioMaterial, i); i++; } /* * Map of the original order to new order of bioassays. */ Map<Integer, Integer> indexes = new HashMap<>(); Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>(); for (BioAssayDimension bioAssayDimension : dims) { Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays(); assert bioAssays != null; /* * Initialize the new bioassay list. */ List<BioAssay> resorted = new ArrayList<>(bioAssays.size()); for (int m = 0; m < bioAssays.size(); m++) { resorted.add(null); } for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) { BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex); BioMaterial sam1 = bioAssay.getSampleUsed(); if (ordering.containsKey(sam1)) { Integer newIndex = ordering.get(sam1); resorted.set(newIndex, bioAssay); /* * Should be the same for all dimensions.... */ assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex); indexes.put(oldIndex, newIndex); } else { throw new IllegalStateException(); } } BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance(); newBioAssayDimension.setBioAssays(resorted); newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design"); newBioAssayDimension.setDescription("Data was reordered based on the experimental design."); newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension); old2new.put(bioAssayDimension, newBioAssayDimension); } ByteArrayConverter converter = new ByteArrayConverter(); for (ProcessedExpressionDataVector v : processedDataVectors) { BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension()); assert revisedBioAssayDimension != null; double[] data = converter.byteArrayToDoubles(v.getData()); /* * Put the data in the order of the bioAssayDimension. */ Double[] resortedData = new Double[data.length]; for (int k = 0; k < data.length; k++) { resortedData[k] = data[indexes.get(k)]; } v.setData(converter.toBytes(resortedData)); v.setBioAssayDimension(revisedBioAssayDimension); } ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors"); this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design"); } /** * @return expression ranks based on computed intensities */ @Override public ExpressionExperiment updateRanks(ExpressionExperiment ee) { ee = expressionExperimentDao.thaw(ee); Collection<ProcessedExpressionDataVector> processedVectors = ee.getProcessedExpressionDataVectors(); processedExpressionDataVectorService.thaw(processedVectors); StopWatch timer = new StopWatch(); timer.start(); ExpressionDataDoubleMatrix intensities = this.loadIntensities(ee, processedVectors); if (intensities == null) { return ee; } ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Load intensities: " + timer.getTime() + "ms"); Collection<ProcessedExpressionDataVector> updatedVectors = this.computeRanks(processedVectors, intensities); if (updatedVectors == null) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Could not get preferred data vectors, not updating ranks data"); return ee; } ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Updating ranks data for " + updatedVectors.size() + " vectors ..."); this.processedExpressionDataVectorService.update(updatedVectors); ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Done"); return ee; } /** * Computes expression intensities depending on which ArrayDesign TechnologyType is used. * * @return ExpressionDataDoubleMatrix */ private ExpressionDataDoubleMatrix loadIntensities(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> processedVectors) { Collection<ArrayDesign> arrayDesignsUsed = this.eeService.getArrayDesignsUsed(ee); assert !arrayDesignsUsed.isEmpty(); ArrayDesign arrayDesign = arrayDesignsUsed.iterator().next(); assert arrayDesign != null && arrayDesign.getTechnologyType() != null; ExpressionDataDoubleMatrix intensities; if (arrayDesign.getTechnologyType().equals(TechnologyType.TWOCOLOR) || arrayDesign.getTechnologyType().equals(TechnologyType.DUALMODE)) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Computing intensities for two-color data from underlying data"); /* * Get vectors needed to compute intensities. */ Collection<QuantitationType> quantitationTypes = eeService.getQuantitationTypes(ee); Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder .getUsefulQuantitationTypes(quantitationTypes); if (usefulQuantitationTypes.isEmpty()) { throw new IllegalStateException("No useful quantitation types for " + ee.getShortName()); } Collection<? extends DesignElementDataVector> vectors = rawExpressionDataVectorService .find(usefulQuantitationTypes); if (vectors.isEmpty()) { vectors = processedExpressionDataVectorService.find(usefulQuantitationTypes); } if (vectors.isEmpty()) { throw new IllegalStateException( "No vectors for useful quantitation types for " + ee.getShortName()); } ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Vectors loaded ..."); Collection<DesignElementDataVector> vs = new HashSet<>(vectors); rawExpressionDataVectorService.thawRawAndProcessed(vs); ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(processedVectors, vectors); intensities = builder.getIntensity(); ExpressionDataBooleanMatrix missingValues = builder.getMissingValueData(); if (missingValues == null) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .warn("Could not locate missing value matrix for " + ee + ", rank computation skipped (needed for two-color data)"); return intensities; } if (intensities == null) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .warn("Could not locate intensity matrix for " + ee + ", rank computation skipped (needed for two-color data)"); return null; } ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Masking ..."); this.maskMissingValues(intensities, missingValues); } else { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Computing intensities directly from processed data"); intensities = new ExpressionDataDoubleMatrix(processedVectors); } return intensities; } private void audit(ExpressionExperiment ee) { AuditEventType eventType = ProcessedVectorComputationEvent.Factory.newInstance(); auditTrailService.addUpdateEvent(ee, eventType, ""); } /** * Make sure we have only one ordering!!! If the sample matching is botched, there will be problems. */ private void checkAllBioAssayDimensionsMatch(Collection<BioAssayDimension> dims) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .info("Data set has more than one bioassaydimension for its processed data vectors"); List<BioMaterial> ordering = new ArrayList<>(); int i = 0; for (BioAssayDimension dim : dims) { int j = 0; for (BioAssay ba : dim.getBioAssays()) { BioMaterial sample = ba.getSampleUsed(); if (i == 0) { ordering.add(sample); } else { if (!ordering.get(j).equals(sample)) { throw new IllegalStateException( "Two dimensions didn't have the same BioMaterial ordering for the same data set."); } j++; } } i++; } } private Collection<ProcessedExpressionDataVector> computeRanks( Collection<ProcessedExpressionDataVector> processedDataVectors, ExpressionDataDoubleMatrix intensities) { DoubleArrayList ranksByMean = this.getRanks(intensities, ProcessedExpressionDataVectorDao.RankMethod.mean); assert ranksByMean != null; DoubleArrayList ranksByMax = this.getRanks(intensities, ProcessedExpressionDataVectorDao.RankMethod.max); assert ranksByMax != null; for (ProcessedExpressionDataVector vector : processedDataVectors) { CompositeSequence de = vector.getDesignElement(); if (intensities.getRow(de) == null) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log .warn("No intensity value for " + de + ", rank for vector will be null"); vector.setRankByMean(null); vector.setRankByMax(null); continue; } Integer i = intensities.getRowIndex(de); double rankByMean = ranksByMean.get(i) / ranksByMean.size(); double rankByMax = ranksByMax.get(i) / ranksByMax.size(); vector.setRankByMean(rankByMean); vector.setRankByMax(rankByMax); } return processedDataVectors; } private DoubleArrayList getRanks(ExpressionDataDoubleMatrix intensities, ProcessedExpressionDataVectorDao.RankMethod method) { ProcessedExpressionDataVectorCreateHelperServiceImpl.log.debug("Getting ranks"); assert intensities != null; DoubleArrayList result = new DoubleArrayList(intensities.rows()); for (ExpressionDataMatrixRowElement de : intensities.getRowElements()) { double[] rowObj = ArrayUtils.toPrimitive(intensities.getRow(de.getDesignElement())); double valueForRank = Double.MIN_VALUE; if (rowObj != null) { DoubleArrayList row = new DoubleArrayList(rowObj); switch (method) { case max: valueForRank = DescriptiveWithMissing.max(row); break; case mean: valueForRank = DescriptiveWithMissing.mean(row); break; default: throw new UnsupportedOperationException(); } } result.add(valueForRank); } return Rank.rankTransform(result); } /** * Masking is done even if the array design is not two-color, so the decision whether to mask or not must be done * elsewhere. * * @param inMatrix The matrix to be masked */ private void maskMissingValues(ExpressionDataDoubleMatrix inMatrix, ExpressionDataBooleanMatrix missingValues) { if (missingValues != null) ExpressionDataDoubleMatrixUtil.maskMatrix(inMatrix, missingValues); } }