Java tutorial
/** * @UNCC Fodor Lab * @author Anthony Fodor * @email anthony.fodor@gmail.com * @date Feb 9, 2017 * @disclaimer This code is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version, * provided that any use properly credits the author. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details at http://www.gnu.org * */ package bioLockJ.module.parser.r16s; import java.io.BufferedReader; import java.io.File; import java.util.StringTokenizer; import org.apache.commons.io.filefilter.IOFileFilter; import org.apache.commons.io.filefilter.NameFileFilter; import org.apache.commons.io.filefilter.TrueFileFilter; import bioLockJ.AppController; import bioLockJ.Config; import bioLockJ.Constants; import bioLockJ.Log; import bioLockJ.module.parser.ParserModule; import bioLockJ.node.r16s.QiimeNode; import bioLockJ.util.MetadataUtil; import bioLockJ.util.QiimeMappingUtil; /** * To see file format: > head otu_table_L2.txt * * # Constructed from biom file #OTU ID 3A.1 6A.1 120A.1 7A.1 * k__Bacteria;p__Actinobacteria 419.0 26.0 90.0 70.0 * */ public class QiimeParser extends ParserModule { /** * QIIME doesn't support the Config.getBoolean( Config.INPUT_DEMULTIPLEX ) option */ @Override public void checkDependencies() throws Exception { super.checkDependencies(); if (Config.getBoolean(Config.INPUT_DEMULTIPLEX)) { throw new Exception("BioLockJ does not support: " + Config.INPUT_DEMULTIPLEX + "=TRUE for QIIME"); } } /** * Merge meta if executing a re-run (if needed). * Convert the qiime mapping into R-friendly metadata keyed by SAMPLE_ID. * Then proceed as ususal. */ @Override public void executeProjectFile() throws Exception { QiimeMappingUtil.buildMapping(getTempDir(), getInputFiles().get(0)); super.executeProjectFile(); } /** * Create OTU nodes based on classifier output. One file will have info for all sampelIDs, * which are indexed within orderedSampleIDs. */ @Override protected void createOtuNodes() throws Exception { final File file = getInputFiles().get(0); Log.out.info("PARSE FILE = " + file.getName()); final BufferedReader reader = AppController.getFileReader(file); try { for (String line = reader.readLine(); line != null; line = reader.readLine()) { if (!line.startsWith("#")) { final StringTokenizer st = new StringTokenizer(line, Constants.TAB_DELIM); int index = 0; final String taxa = st.nextToken(); while (st.hasMoreTokens()) { final Integer count = Double.valueOf(st.nextToken()).intValue(); final String id = QiimeMappingUtil.getSampleIds().get(index++); if (count > 0) { final QiimeNode node = new QiimeNode(taxa, count); addOtuNode(id, node); } } } } } catch (final Exception ex) { throw new Exception("Error occurred parsing file: " + file.getName(), ex); } finally { reader.close(); } } /** * Get the output for the line, merged with its metadata. */ @Override protected String getMergedLine(final String line) throws Exception { final StringBuffer sb = new StringBuffer(); final String sampleId = new StringTokenizer(line, Constants.TAB_DELIM).nextToken(); if (MetadataUtil.getMetaFileFirstColValues().contains(sampleId)) { sb.append(rFormat(line)); for (final String attribute : MetadataUtil.getAttributes(sampleId)) { sb.append(Constants.TAB_DELIM).append(rFormat(attribute)); } } else { Log.out.warn("Missing record for: " + sampleId + " in metadata: " + MetadataUtil.getMetadata().getAbsolutePath()); return null; } if (mergeLineCount++ < 2) { Log.out.info("Example: Merge Metadata Line [" + sampleId + "] = " + sb.toString()); } return sb.toString(); } /** * Init input files to find the most specific taxa file, this will contain all the info * for all taxa levels above it. */ @Override protected void initInputFiles(final File dir) throws Exception { final String searchTerm = getLowestTaxaLevelFileName(); Log.out.info("Recursively search for most specific taxa file " + searchTerm + " in: " + getDirName(dir)); final IOFileFilter ff = new NameFileFilter(searchTerm); setModuleInput(dir, ff, TrueFileFilter.INSTANCE); } /** * Find the lowest taxa level. * @return * @throws Exception */ private String getLowestTaxaLevelFileName() throws Exception { String level = ""; if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.SPECIES)) { level = "7"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.GENUS)) { level = "6"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.FAMILY)) { level = "5"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.ORDER)) { level = "4"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.CLASS)) { level = "3"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.PHYLUM)) { level = "2"; } else if (Config.requireSet(Config.REPORT_TAXONOMY_LEVELS).contains(Constants.DOMAIN)) { level = "1"; } return OTU_TABLE_PREFIX + level + ".txt"; } private int mergeLineCount = 0; private static final String OTU_TABLE_PREFIX = "otu_table_L"; }