org.opencb.cellbase.mongodb.db.GenomeSequenceMongoDBAdaptor.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.cellbase.mongodb.db.GenomeSequenceMongoDBAdaptor.java

Source

/*
 * Copyright 2015 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.cellbase.mongodb.db;

import com.mongodb.*;
import org.opencb.biodata.models.feature.Region;
import org.opencb.cellbase.core.common.GenomeSequenceFeature;
import org.opencb.cellbase.core.lib.api.core.GenomeSequenceDBAdaptor;
import org.opencb.cellbase.mongodb.MongoDBCollectionConfiguration;
import org.opencb.datastore.core.QueryOptions;
import org.opencb.datastore.core.QueryResult;
import org.opencb.datastore.mongodb.MongoDataStore;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class GenomeSequenceMongoDBAdaptor extends MongoDBAdaptor implements GenomeSequenceDBAdaptor {

    private int chunkSize = MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE;

    public GenomeSequenceMongoDBAdaptor(DB db) {
        super(db);
    }

    public GenomeSequenceMongoDBAdaptor(DB db, String species, String version) {
        super(db, species, version);
        mongoDBCollection = db.getCollection("genome_sequence");
    }

    public GenomeSequenceMongoDBAdaptor(String species, String assembly, MongoDataStore mongoDataStore) {
        super(species, assembly, mongoDataStore);
        mongoDBCollection2 = mongoDataStore.getCollection("genome_sequence");

        logger.info("GenomeSequenceMongoDBAdaptor: in 'constructor'");
    }

    private int getChunk(int position) {
        return (position / this.chunkSize);
    }

    private int getOffset(int position) {
        return (position % this.chunkSize);
    }

    public static String getComplementarySequence(String sequence) {
        sequence = sequence.replace("A", "1");
        sequence = sequence.replace("T", "2");
        sequence = sequence.replace("C", "3");
        sequence = sequence.replace("G", "4");
        sequence = sequence.replace("1", "T");
        sequence = sequence.replace("2", "A");
        sequence = sequence.replace("3", "G");
        sequence = sequence.replace("4", "C");
        return sequence;
    }

    @Override
    public QueryResult getByRegion(String chromosome, int start, int end, QueryOptions options) {
        Region region = new Region(chromosome, start, end);
        return getAllByRegionList(Arrays.asList(region), options).get(0);
    }

    @Override
    public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
        /****/
        String chunkIdSuffix = this.chunkSize / 1000 + "k";
        /****/

        List<DBObject> queries = new ArrayList<>();
        List<String> ids = new ArrayList<>(regions.size());
        List<String> chunkIds;
        List<Integer> integerChunkIds;
        for (Region region : regions) {
            chunkIds = new ArrayList<>();
            integerChunkIds = new ArrayList<>();
            // positions below 1 are not allowed
            if (region.getStart() < 1) {
                region.setStart(1);
            }
            if (region.getEnd() < 1) {
                region.setEnd(1);
            }

            /****/
            int regionChunkStart = getChunk(region.getStart());
            int regionChunkEnd = getChunk(region.getEnd());
            for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
                String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix;
                chunkIds.add(chunkIdStr);
                integerChunkIds.add(chunkId);
            }
            //            QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds);
            QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds);
            /****/
            queries.add(builder.get());
            ids.add(region.toString());

            logger.info(builder.get().toString());
        }

        List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
        for (int i = 0; i < regions.size(); i++) {
            Region region = regions.get(i);
            QueryResult queryResult = queryResults.get(i);

            List list = queryResult.getResult();
            StringBuilder sb = new StringBuilder();
            for (int j = 0; j < list.size(); j++) {
                BasicDBObject chunk = (BasicDBObject) list.get(j);
                sb.append(chunk.get("sequence"));
            }

            int startStr = getOffset(region.getStart());
            int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1;

            String subStr = "";

            if (getChunk(region.getStart()) > 0) {
                if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
                    subStr = sb.toString().substring(startStr, endStr);
                }
            } else {
                if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
                    subStr = sb.toString().substring(startStr - 1, endStr - 1);
                }
            }
            GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(),
                    region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"),
                    ((BasicDBObject) list.get(0)).getString("assembly"), subStr);
            //            GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr);

            queryResult.setResult(Arrays.asList(genomeSequenceFeature));
        }

        return queryResults;
    }

    @Override
    public String getRevComp(String sequence) {
        return null; //To change body of implemented methods use File | Settings | File Templates.
    }

    //    private List<GenomeSequenceChunk> executeQuery(DBObject query) {
    //        List<GenomeSequenceChunk> result = null;
    //        DBCursor cursor = mongoDBCollection.find(query);
    //        try {
    //            if (cursor != null) {
    //                result = new ArrayList<GenomeSequenceChunk>(cursor.size());
    ////            Gson jsonObjectMapper = new Gson();
    //                GenomeSequenceChunk chunk = null;
    //                while (cursor.hasNext()) {
    ////               chunk = (GenomeSequenceChunk) jsonObjectMapper.fromJson(cursor.next().toString(), GenomeSequenceChunk.class);
    //                    result.add(chunk);
    //                }
    //            }
    //        } finally {
    //            cursor.close();
    //        }
    //        return result;
    //    }

    //   @Override
    //   public GenomeSequenceFeature getByRegion(String chromosome, int start, int end) {
    //      // positions below 1 are not allowed
    //      if (start < 1) {
    //         start = 1;
    //      }
    //        if (end < 1) {
    //            end = 1;
    //        }
    //      QueryBuilder builder = QueryBuilder.start("chromosome").is(chromosome.trim()).and("chunkId")
    //            .greaterThanEquals(getChunkId(start)).lessThanEquals(getChunkId(end));
    //
    //      System.out.println(builder.get().toString());
    //      List<GenomeSequenceChunk> chunkList = executeQuery(builder.get());
    //      StringBuilder sb = new StringBuilder();
    //      for (GenomeSequenceChunk chunk : chunkList) {
    //         sb.append(chunk.getSequence());
    //      }
    //
    //      int startStr = getOffset(start);
    //      int endStr = getOffset(start) + (end - start) + 1;
    //      String subStr = "";
    //      if (getChunkId(end) > 0) {
    //         if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
    //            subStr = sb.toString().substring(startStr, endStr);
    //         }
    //      } else {
    //         if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
    //            subStr = sb.toString().substring(startStr-1, endStr - 1);
    //         }
    //      }
    //
    //      return new GenomeSequenceFeature(chromosome, start, end, subStr);
    //   }
    //
    //   @Override
    //   public GenomeSequenceFeature getByRegion(String chromosome, int start, int end, int strand) {
    //      GenomeSequenceFeature genomeSequence = this.getByRegion(chromosome, start, end);
    //
    //      if (strand == -1) {
    //         genomeSequence.setSequence(getRevComp(genomeSequence.getSequence()));
    //      }
    //
    //      return genomeSequence;
    //   }
    //
    //   @Override
    //   public List<GenomeSequenceFeature> getByRegionList(List<Region> regions) {
    //      List<GenomeSequenceFeature> result = new ArrayList<GenomeSequenceFeature>(regions.size());
    //      for (Region region : regions) {
    //         result.add(getByRegion(region.getSequenceName(), region.getStart(), region.getEnd(), 1));
    //      }
    //      return result;
    //   }
    //
    //   @Override
    //   public List<GenomeSequenceFeature> getByRegionList(List<Region> regions, int strand) {
    //      List<GenomeSequenceFeature> result = new ArrayList<GenomeSequenceFeature>(regions.size());
    //      for (Region region : regions) {
    //         result.add(getByRegion(region.getSequenceName(), region.getStart(), region.getEnd(), strand));
    //      }
    //      return result;
    //   }
    //
    //   @Override
    //   public String getRevComp(String sequence) {
    //      String sequenceRef = new String();
    //      sequenceRef = new StringBuffer(sequence).reverse().toString();
    //      return getComplementarySequence(sequenceRef);
    //   }

}