Fasta.java Source code

Java tutorial

Introduction

Here is the source code for Fasta.java

Source

import org.apache.commons.io.FileUtils;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
 * Copyright (C) 2016  Thomas Gregory
    
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
    
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
    
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
    
 * Created by Charles Gregory on 11/6/2015.
 * Controls the fasta files. This class can download fasta files from phagesdb.org
 * using the phagelist. It can also parse the fasta sequence and split the fasta into pieces.
 */
public class Fasta {
    //Reads fasta sequence
    public static String[] parse(String path) {
        String seq1 = null;
        String seq2 = null;
        String[] r = new String[2];
        try {
            LinkedHashMap<String, DNASequence> f = FastaReaderHelper.readFastaDNASequence(new File(path));
            List<String> dnaList = new ArrayList<>(1);
            for (Map.Entry<String, DNASequence> stringDNASequenceEntry : f.entrySet()) {
                dnaList.add(stringDNASequenceEntry.getValue().getSequenceAsString().toUpperCase());
                dnaList.add(stringDNASequenceEntry.getValue().getReverseComplement().getSequenceAsString()
                        .toUpperCase());
            }
            seq1 = dnaList.get(0);
            seq2 = dnaList.get(1);
            r[0] = seq1;
            r[1] = seq2;
        } catch (java.lang.Exception e) {
            System.out.print(e);
        }
        return r;
    }

    /**Splits the fasta sequence into a set of every possible
    //sequence of a certain size which can be found in the sequence
    including the reverse strand*/
    public static Set<CharSequence> splitFasta(String[] seq, int length) {

        Set<CharSequence> collect = IntStream.range(0, length).mapToObj(start -> {
            List<CharSequence> primers = new ArrayList<>();
            for (int i = start; i < seq[0].length() - length; i += length) {
                CharSequence s = seq[0].substring(i, i + length);
                primers.add(s);
            }
            return primers;
        }).flatMap((i) -> i.stream()).collect(Collectors.toSet());
        Set<CharSequence> collect2 = IntStream.range(0, length).mapToObj(start -> {
            List<CharSequence> primers = new ArrayList<>();
            for (int i = start; i < seq[1].length() - length; i += length) {
                CharSequence s = seq[1].substring(i, i + length);
                primers.add(s);
            }
            return primers;
        }).flatMap((i) -> i.stream()).collect(Collectors.toSet());
        collect.addAll(collect2);
        return collect;
    }

    //Downloads the fasta files from phagesdb.org based off the name of the phage
    //Also controls for inconsistencies in the phagelist
    public static String Download(String name) {
        String path;
        if (name.equals("BrownCNA")) {
            path = "http://phagesdb.org/media/fastas/Browncna.fasta";
        } else if (name.equals("GUmbie")) {
            path = "http://phagesdb.org/media/fastas/Gumbie.fasta";
        } else if (name.equals("Numberten")) {
            path = "http://phagesdb.org/media/fastas/NumberTen.fasta";
        } else if (name.equals("Seabiscuit")) {
            path = "http://phagesdb.org/media/fastas/SeaBiscuit.fasta";
        } else if (name.equals("Caliburn")) {
            path = "http://phagesdb.org/media/fastas/Excalibur.fasta";
        } else if (name.equals("Godpower")) {
            path = "http://phagesdb.org/media/fastas/GodPower.fasta";
        } else if (name.equals("Romney")) {
            path = "http://phagesdb.org/media/fastas/Romney2012.fasta";
        } else {
            path = "http://phagesdb.org/media/fastas/" + name + ".fasta";
        }
        String base = new File("").getAbsolutePath();
        name = base + "/Fastas/" + name + ".fasta";
        File file = new File(name);
        try {
            if (!file.exists()) {
                URL netPath = new URL(path);
                FileUtils.copyURLToFile(netPath, file);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return file.toString();
    }

    //processes a phage based off name by downloading, parsing, and splitting it
    public static Set<CharSequence> process(String name, int bps) {
        String path = Download(name);
        String[] seq = parse(path);
        return splitFasta(seq, bps);
    }

    private static List<CharSequence> splitFasta2(String[] seq, int length) {

        List<CharSequence> collect = IntStream.range(0, length).mapToObj(start -> {
            List<CharSequence> primers = new ArrayList<>();
            for (int i = start; i < seq[0].length() - length; i += length) {
                CharSequence s = seq[0].substring(i, i + length);
                primers.add(s);
            }
            return primers;
        }).flatMap((i) -> i.stream()).collect(Collectors.toList());
        List<CharSequence> collect2 = IntStream.range(0, length).mapToObj(start -> {
            List<CharSequence> primers = new ArrayList<>();
            for (int i = start; i < seq[1].length() - length; i += length) {
                CharSequence s = seq[1].substring(i, i + length);
                primers.add(s);
            }
            return primers;
        }).flatMap((i) -> i.stream()).collect(Collectors.toList());
        collect.addAll(collect2);
        return collect;
    }

    public static List<CharSequence> processPrimers(String name, int bps) {
        String path = Download(name);
        String[] seq = parse(path);
        return splitFasta2(seq, bps);
    }
}