com.act.lcms.db.io.parser.PlateCompositionParser.java Source code

Java tutorial

Introduction

Here is the source code for com.act.lcms.db.io.parser.PlateCompositionParser.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.lcms.db.io.parser;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class PlateCompositionParser {
    // TODO: factor out the well composition tables into a common parser that can be refined by type after some practice.

    private Map<String, String> plateProperties = new HashMap<>();
    private Map<String, Map<Pair<String, String>, String>> compositionTables = new HashMap<>();
    private Map<Pair<String, String>, Pair<Integer, Integer>> coordinatesToIndices = new HashMap<>();

    public void processFile(File inFile) throws IOException {
        try (BufferedReader br = new BufferedReader(new FileReader(inFile))) {
            String line;

            boolean readingCompositionTable = false;
            String compositionTableName = null;
            List<String> compositionTableColumns = null;
            int rowIndexInCompositionTable = 0;
            while ((line = br.readLine()) != null) {

                if (line.startsWith(">>")) {
                    // TODO: add max table width based on plate type.
                    String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "\t");
                    readingCompositionTable = true;
                    if (fields.length < 2) {
                        throw new RuntimeException(
                                String.format("Found malformed composition table header: %s", line));
                    }
                    compositionTableColumns = Arrays.asList(fields);
                    compositionTableName = fields[0].replaceFirst("^>>", "");
                    rowIndexInCompositionTable = 0;

                } else if (line.startsWith(">")) {
                    String[] fields = StringUtils.split(line, "\t", 2);
                    // Found a plate attribute.
                    if (fields.length != 2) {
                        System.err.format("Too few fields: %s\n", StringUtils.join(fields, ", "));
                        System.err.flush();
                        throw new RuntimeException(String.format("Found malformed plate attribute line: %s", line));
                    }
                    plateProperties.put(fields[0].replaceFirst("^>", ""), fields[1]);

                } else if (line.trim().length() == 0) {
                    // Assume a blank line terminates a composition table.
                    readingCompositionTable = false;
                    compositionTableName = null;
                    compositionTableColumns = null;
                    rowIndexInCompositionTable = 0;

                } else if (readingCompositionTable) {
                    // This split method with a very long name preserves blanks and doesn't merge consecutive delimiters.
                    String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "\t");
                    // The split ^^ preserves blanks, so we can exactly compare the lengths.
                    if (fields.length != compositionTableColumns.size()) {
                        throw new RuntimeException(String.format(
                                "Found %d fields where %d were expected in composition table line:\n  '%s'\n",
                                fields.length, compositionTableColumns.size(), line));
                    }

                    for (int i = 1; i < fields.length; i++) {
                        String val = compositionTableColumns.get(i);
                        // No need to store empty values;
                        if (val == null || val.isEmpty()) {
                            continue;
                        }
                        Pair<String, String> coordinates = Pair.of(fields[0], val);
                        // Note: assumes every row appears in each composition table (even empty ones).
                        coordinatesToIndices.put(coordinates, Pair.of(rowIndexInCompositionTable, i - 1));
                        Map<Pair<String, String>, String> thisTable = compositionTables.get(compositionTableName);
                        if (thisTable == null) {
                            thisTable = new HashMap<>();
                            compositionTables.put(compositionTableName, thisTable);
                        }
                        // TODO: add paranoid check for repeated keys?  Shouldn't be possible unless tables are repeated.
                        thisTable.put(coordinates, fields[i]);
                    }
                    rowIndexInCompositionTable++;
                }
            }
        }
    }

    public Map<String, String> getPlateProperties() {
        return plateProperties;
    }

    public Map<String, Map<Pair<String, String>, String>> getCompositionTables() {
        return compositionTables;
    }

    public Map<Pair<String, String>, Pair<Integer, Integer>> getCoordinatesToIndices() {
        return coordinatesToIndices;
    }
}