edu.umkc.sce.App.java Source code

Java tutorial

Introduction

Here is the source code for edu.umkc.sce.App.java

Source

/*
 * Copyright 2014 Ryan Linneman
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.umkc.sce;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor.Builder;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.jena.riot.RDFLanguages;

import com.google.common.base.Stopwatch;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFormatter;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;

/**
 * Hadoop/Hbase/Jena Hello world!
 * 
 */
public class App extends Configured implements Tool {
    private static final String MY_NAMESPACE = "foo";

    public static void main(String[] args) {
        Configuration conf = new Configuration();
        int result;
        try {
            result = ToolRunner.run(conf, new App(), args);
        } catch (Exception e) {
            e.printStackTrace();
            result = -1;
        }
        System.exit(result);
    }

    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        GenericOptionsParser parser = new GenericOptionsParser(conf, args);
        args = parser.getRemainingArgs();
        if (args.length != 1) {
            GenericOptionsParser.printGenericCommandUsage(System.out);

            truncate(getAdmin().listTableNamesByNamespace(MY_NAMESPACE));
            System.exit(2);
        }

        String importFile = args[0];
        FileSystem fs = null;
        BufferedReader br = null;
        Model m = null;

        try {
            fs = FileSystem.get(conf);
            Path path = new Path(importFile);
            br = new BufferedReader(new InputStreamReader(fs.open(path)));
            m = createModel();

            Stopwatch sw = new Stopwatch();
            sw.start();
            m.read(br, null, RDFLanguages.strLangNTriples);
            sw.stop();
            System.out.printf("Loading '%s' took %d.\n", importFile, sw.elapsedTime(TimeUnit.MILLISECONDS));
            sw.reset();
            sw.start();
            runTestQuery(m);
            sw.stop();
            System.out.printf("Query '%s' took %d.\n", query, sw.elapsedTime(TimeUnit.MILLISECONDS));
            sw.reset();
            sw.start();

            createStore(m);
            sw.stop();
            System.out.printf("loadHbase took %d.\n", sw.elapsedTime(TimeUnit.MILLISECONDS));
        } finally {
            if (m != null)
                m.close();
            if (br != null)
                br.close();
            if (fs != null)
                fs.close();
        }

        return 0;
    }

    private Model createModel() {
        Model model = ModelFactory.createDefaultModel();
        // Configuration conf = HBaseConfiguration.create(getConf());
        // HBaseAdmin admin = null;
        // Model model;
        // conf.setQuietMode(true);
        //
        // try {
        // admin = new HBaseAdmin(conf);
        // } catch (MasterNotRunningException | ZooKeeperConnectionException e)
        // {
        // // TODO Auto-generated catch block
        // e.printStackTrace();
        // } catch (IOException e) {
        // e.printStackTrace();
        // }
        // if (admin == null)
        // return null;
        //
        // try {
        // com.hp.hpl.jena.sdb.Store store = SDBFactory
        // .connectStore("sdb.ttl");
        //
        // store.getTableFormatter().create();
        //
        // model = SDBFactory.connectDefaultModel(store);
        // } finally {
        // try {
        // admin.close();
        // } catch (IOException e) {
        // }
        // }
        return model;
    }

    private final String query = "select ?x ?z ?a " + "where " + "{ "
            + " ?x <http://purl.uniprot.org/core/reviewed> ?y . "
            + " ?x <http://purl.uniprot.org/core/created> ?b . "
            + " ?x <http://purl.uniprot.org/core/mnemonic> \"003L_IIV3\" . "
            + " ?x <http://purl.uniprot.org/core/citation> ?z . "
            + " ?z <http://purl.uniprot.org/core/author> ?a . " + "}";

    private void runTestQuery(Model model) {
        QueryExecution qe = QueryExecutionFactory.create(query, model);
        try {
            ResultSet results = qe.execSelect();
            ResultSetFormatter.out(System.out, results);
        } finally {
            qe.close();
        }
    }

    private void createStore(Model model)
            throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
        HBaseAdmin admin = getAdmin();
        // com.hp.hpl.jena.sdb.Store store = null;

        try {
            Builder ndesc = NamespaceDescriptor.create(MY_NAMESPACE);
            try {
                admin.createNamespace(ndesc.build());
            } catch (IOException ex) {
            }

            //         truncate(admin.listTableNamesByNamespace(MY_NAMESPACE));
            // store.getTableFormatter().create();
            //
            // TableName tableName = TableName.valueOf("foo", "bar");
            //
            // if (admin.tableExists(tableName)) {
            // admin.disableTable(tableName);
            // admin.deleteTable(tableName);
            // admin.deleteNamespace(tableName.getNamespaceAsString());
            // } else {
            loadData(model);
            // }

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            admin.close();
        }
    }

    /**
     * @param listTableNamesByNamespace
     * @throws IOException
     * @throws ZooKeeperConnectionException
     * @throws MasterNotRunningException
     */
    private void truncate(final TableName[] tableNames)
            throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
        HBaseAdmin admin = getAdmin();
        for (TableName tableName : tableNames) {
            System.out.printf("Dropping %s\n", tableName.getNameAsString());
            if (admin.isTableEnabled(tableName)) {
                admin.disableTable(tableName);
            }
            admin.deleteTable(tableName);
        }
    }

    private void loadData(Model model) throws IOException {
        HashMap<TableName, HTable> tables = new HashMap<TableName, HTable>();
        StmtIterator statements = model.listStatements();
        while (statements.hasNext()) {
            Statement stmt = statements.nextStatement();
            Put put = createPut(stmt);
            TableName tableName = getTableName(stmt.getPredicate());
            HTable table = null;
            if (tables.containsKey(tableName)) {
                table = tables.get(tableName);
            } else {
                table = createTable(tableName);
                tables.put(tableName, table);
            }
            table.put(put);
        }

        for (HTable table : tables.values()) {
            table.flushCommits();
        }

    }

    private HTable createTable(TableName name)
            throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
        HBaseAdmin admin = getAdmin();
        HTable table = null;
        if (!admin.tableExists(name)) {
            System.out.printf("Creating table %s\n", name.getNameAsString());
            HTableDescriptor desc = new HTableDescriptor(name);
            HColumnDescriptor cf = new HColumnDescriptor("rdf".getBytes());
            desc.addFamily(cf);
            admin.createTable(desc);
        }

        table = new HTable(getHBaseConf(), name);
        table.setAutoFlush(false, true);
        return table;
    }

    /**
     * @return
     * @throws IOException
     * @throws ZooKeeperConnectionException
     * @throws MasterNotRunningException
     */
    private HBaseAdmin getAdmin() throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
        if (_admin == null) {
            _admin = new HBaseAdmin(getHBaseConf());
        }
        return _admin;
    }

    private HBaseAdmin _admin;
    private Configuration _hbaseConf;

    private Configuration getHBaseConf() {
        if (_hbaseConf == null) {
            Configuration conf = getConf();
            conf.setQuietMode(true);
            conf = HBaseConfiguration.create(conf);
            conf.setQuietMode(true);

            _hbaseConf = conf;
        }
        return _hbaseConf;
    }

    /**
     * @param predicate
     * @return
     */
    private TableName getTableName(Property predicate) {
        TableName name = null;

        name = TableName.valueOf(MY_NAMESPACE, predicate.getLocalName());

        return name;
    }

    private Put createPut(Statement s) {
        Property p = s.getPredicate();
        String predicateName = p.getLocalName();
        Put put = new Put(predicateName.getBytes());
        RDFNode o = s.getObject();

        put.add("rdf".getBytes(), "o".getBytes(),
                o.isLiteral() ? o.asLiteral().getString().getBytes() : o.asResource().getLocalName().getBytes());

        Resource sub = s.getSubject();

        put.add("rdf".getBytes(), "s".getBytes(), sub.getLocalName().getBytes());

        return put;
    }
}