edu.scripps.fl.pubchem.app.RelationDownloader.java Source code

Java tutorial

Introduction

Here is the source code for edu.scripps.fl.pubchem.app.RelationDownloader.java

Source

/*
 * Copyright 2010 The Scripps Research Institute
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.scripps.fl.pubchem.app;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.VFS;
import org.hibernate.Query;
import org.hibernate.Session;
import org.hibernate.Transaction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.scripps.fl.pubchem.PubChemDB;
import edu.scripps.fl.pubchem.db.Relation;
import edu.scripps.fl.pubchem.util.CommandLineHandler;

public class RelationDownloader {

    private static final Logger log = LoggerFactory.getLogger(RelationDownloader.class);

    private static final String assayNeighborURL = "ftp://ftp.ncbi.nlm.nih.gov/pubchem/Bioassay/AssayNeighbors";

    public static void main(String[] args) throws Exception {
        new CommandLineHandler().handle(args);
        new RelationDownloader().call();
    }

    protected void update(long fromId, String name, Collection<Relation> relations) {
        Session session = PubChemDB.getSession();
        Transaction trx = session.beginTransaction();

        Query query = session.createQuery("delete from Relation where fromId = ? and relationName = ?");
        query.setLong(0, fromId);
        query.setString(1, name);
        query.executeUpdate();

        for (Relation relation : relations) {
            session.save(relation);
        }
        relations.clear();

        session.flush();
        trx.commit();
    }

    public void call() throws Exception {
        Pattern pattern = Pattern.compile("^AID(\\d+)\\s+AID(\\d+)$");

        FileObject folder = VFS.getManager().resolveFile(assayNeighborURL);
        for (FileObject rFile : folder.getChildren()) {
            String name = rFile.getName().getBaseName();
            log.info("Processing file: " + name);
            BufferedReader reader = new BufferedReader(new InputStreamReader(rFile.getContent().getInputStream()));
            String line = null;
            long lastFrom = 0;
            List<Relation> relations = new ArrayList(100);
            while (null != (line = reader.readLine())) {
                Matcher matcher = pattern.matcher(line);
                if (!matcher.matches())
                    throw new java.lang.UnsupportedOperationException("Cannot determine AIDs from line: " + line);
                long from = Long.parseLong(matcher.group(1));
                long to = Long.parseLong(matcher.group(2));
                if (lastFrom == 0) // very first time only.
                    lastFrom = from;
                if (from != lastFrom) { // when we change to the next aid in the file
                    update(from, name, relations);
                    PubChemDB.getSession().clear();
                    lastFrom = from;
                }
                Relation relation = new Relation();
                relation.setFromDb("pcassay");
                relation.setToDb("pcassay");
                relation.setFromId(from);
                relation.setToId(to);
                relation.setRelationName(name);
                relations.add(relation);
            }
            update(lastFrom, name, relations);
        }
    }
}