com.act.biointerpretation.Utils.OrgMinimalPrefixGenerator.java Source code

Java tutorial

Introduction

Here is the source code for com.act.biointerpretation.Utils.OrgMinimalPrefixGenerator.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.biointerpretation.Utils;

import act.shared.Organism;
import org.apache.commons.collections4.trie.PatriciaTrie;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;

public class OrgMinimalPrefixGenerator {

    Map<String, String> orgNameToMinimalPrefix;

    /*
    We initialize a prefix tree using the PatriciaTrie API with all the organism names in our database. The API orders the
    strings lexicographically. The API also provides us a prefixMap function that creates a SortedMap of all strings that
    have the passed in string as a prefix. Hence, every string in a prefix map of a given string can be assigned that
    original string as a minimal prefix.
        
    In a lexicographic ordering, a prefix string must come before a longer string that has that prefix. This ensures that
    when we generate prefix maps in lexicographic order, we will be assigning the smallest length prefix for all organism
    names.
     */
    public OrgMinimalPrefixGenerator(Iterator<Organism> orgIterator) {
        Map<String, Long> orgMap = new HashMap<>();

        while (orgIterator.hasNext()) {
            Organism org = orgIterator.next();
            orgMap.put(org.getName(), 1L);
        }

        PatriciaTrie orgPrefixTrie = new PatriciaTrie<>(orgMap);
        orgNameToMinimalPrefix = new HashMap<>();

        while (orgPrefixTrie.size() != 0) {
            String firstKey = (String) orgPrefixTrie.firstKey();
            orgNameToMinimalPrefix.put(firstKey, firstKey);
            orgPrefixTrie.remove(firstKey);

            SortedMap<String, Long> keyPrefixMap = orgPrefixTrie.prefixMap(firstKey);

            List<String> namesToRemove = new ArrayList<>();

            for (String orgWithPrefix : keyPrefixMap.keySet()) {
                orgNameToMinimalPrefix.put(orgWithPrefix, firstKey);
                namesToRemove.add(orgWithPrefix);
            }

            for (String nameToRemove : namesToRemove) {
                orgPrefixTrie.remove(nameToRemove);
            }
        }
    }

    public Map<String, String> getMinimalPrefixMapping() {
        return orgNameToMinimalPrefix;
    }

}