Java tutorial
/* * Copyright (C) 2009-2010 Institute for Computational Biomedicine, * Weill Medical College of Cornell University * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.cornell.med.icb.goby.modes; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import edu.cornell.med.icb.goby.algorithmic.algorithm.ATGCCorrectionWeight; import edu.cornell.med.icb.goby.algorithmic.algorithm.ATProportionWeight; import edu.cornell.med.icb.goby.algorithmic.algorithm.BaseProportionWeight; import edu.cornell.med.icb.goby.algorithmic.algorithm.GCProportionWeight; import edu.cornell.med.icb.goby.algorithmic.algorithm.HeptamerWeight; import edu.cornell.med.icb.goby.algorithmic.algorithm.WeightCalculator; import edu.cornell.med.icb.goby.algorithmic.data.HeptamerInfo; import edu.cornell.med.icb.goby.algorithmic.data.WeightsInfo; import edu.cornell.med.icb.goby.reads.Reads; import edu.cornell.med.icb.goby.reads.ReadsReader; import it.unimi.dsi.lang.MutableString; import it.unimi.dsi.logging.ProgressLogger; import org.apache.commons.io.FilenameUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.io.FileInputStream; import java.io.IOException; import java.util.List; import java.util.LinkedList; /** * Create the read to weight map. This class scans a compact reads file to determine which heptamer * occurs at the beginning of each read. When a heptamer is found that exists in the heptamer to * weight data structure provided as input * (see {@link edu.cornell.med.icb.goby.modes.HeptamerWeightsMode} mode to generate this data * structure), the read is associated to the heptamer weight in a map. This map is written as * Java serialized file for use with modes that estimate gene/transcript/exon/other counts. * * @author Fabien Campagne * Date: May 17 2009 * Time: 11:15 AM */ public class ReadsToWeightsMode extends AbstractGobyMode { /** * The mode name. */ private static final String MODE_NAME = "reads-to-weights"; /** * The mode description help text. */ private static final String MODE_DESCRIPTION = "Create a data structure that maps reads to a weights."; /** * Used to log debug and informational messages. */ private static final Log LOG = LogFactory.getLog(ReadsToWeightsMode.class); private List<String> inputFilenames; private String mapFilename; private String heptamerInfoFilename; private String estimationMethod; boolean colorSpace; @Override public String getModeName() { return MODE_NAME; } @Override public String getModeDescription() { return MODE_DESCRIPTION; } /** * Configure. * * @param args command line arguments * @return this object for chaining * @throws java.io.IOException error parsing * @throws com.martiansoftware.jsap.JSAPException * error parsing */ @Override public AbstractCommandLineMode configure(final String[] args) throws IOException, JSAPException { final JSAPResult jsapResult = parseJsapArguments(args); for (String inputFilename : jsapResult.getStringArray("input")) { addInputFilename(inputFilename); } heptamerInfoFilename = jsapResult.getString("heptamer-info"); mapFilename = jsapResult.getString("map"); estimationMethod = jsapResult.getString("method"); colorSpace = jsapResult.getBoolean("color-space"); return this; } enum WeightCalculationMethod { HEPTAMERS, G, C, A, T, GC, AT, ATGC } public synchronized void addInputFilename(final String inputFilename) { if (inputFilenames == null) { inputFilenames = new LinkedList<String>(); } inputFilenames.add(inputFilename); } public List<String> getInputFilenames() { return inputFilenames; } public boolean getColorSpace() { return colorSpace; } public void setColorSpace(final boolean colorSpace) { this.colorSpace = colorSpace; } public String getMapFilename() { return mapFilename; } public void setMapFilename(final String mapFilename) { this.mapFilename = mapFilename; } public String getHeptamerInfoFilename() { return heptamerInfoFilename; } public void setHeptamerInfoFilename(final String heptamerInfoFilename) { this.heptamerInfoFilename = heptamerInfoFilename; } public String getEstimationMethod() { return estimationMethod; } public void setEstimationMethod(final String estimationMethod) { this.estimationMethod = estimationMethod; } @Override public void execute() throws IOException { HeptamerInfo heptamers = null; try { if (heptamerInfoFilename != null) { heptamers = HeptamerInfo.load(heptamerInfoFilename); } } catch (ClassNotFoundException e) { System.err.println("Cannot load heptamer information from file " + heptamerInfoFilename); System.exit(1); } final ProgressLogger progress = new ProgressLogger(); progress.start(); progress.displayFreeMemory = true; WeightCalculator calculator = null; WeightCalculationMethod method = null; try { method = WeightCalculationMethod.valueOf(estimationMethod.toUpperCase()); } catch (IllegalArgumentException e) { System.err.println("The estimation method entered is not valid. Valid methods include " + "heptamers, GC, AT, A, T, C, G "); System.exit(1); } switch (method) { case HEPTAMERS: if (heptamers == null) { System.err.println("Heptamer info must be provided to estimate heptamer weights."); System.exit(0); } calculator = new HeptamerWeight(heptamers); break; case G: { final BaseProportionWeight calc = new BaseProportionWeight(colorSpace); calc.setBase('G'); calculator = calc; break; } case C: { final BaseProportionWeight calc = new BaseProportionWeight(colorSpace); calc.setBase('C'); calculator = calc; break; } case A: { final BaseProportionWeight calc = new BaseProportionWeight(colorSpace); calc.setBase('A'); calculator = calc; break; } case T: { final BaseProportionWeight calc = new BaseProportionWeight(colorSpace); calc.setBase('T'); calculator = calc; break; } case GC: calculator = new GCProportionWeight(colorSpace); break; case AT: calculator = new ATProportionWeight(colorSpace); break; case ATGC: calculator = new ATGCCorrectionWeight(colorSpace); break; } for (final String inputFilename : inputFilenames) { // for each reads file: LOG.info("Now scanning " + inputFilename); if (inputFilenames.size() >= 1) { // if we process one or more reads file, build the map filename dynamically for each input file. mapFilename = FilenameUtils.removeExtension(inputFilename) + "." + calculator.id() + "-weights"; } final ReadsReader reader = new ReadsReader(new FileInputStream(inputFilename)); try { final WeightsInfo weights = new WeightsInfo(); final MutableString sequence = new MutableString(); int numberOfReads = 0; for (final Reads.ReadEntry readEntry : reader) { ReadsReader.decodeSequence(readEntry, sequence); final int readIndex = readEntry.getReadIndex(); final float weight = calculator.weight(sequence); weights.setWeight(readIndex, weight); progress.lightUpdate(); numberOfReads++; } weights.size(numberOfReads); progress.stop(); weights.save(mapFilename); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // NOPMD // silently ignore } } } } } public static void main(final String[] args) throws IOException, JSAPException { new ReadsToWeightsMode().configure(args).execute(); } }