gridool.mapred.db.task.DBMapShuffleTaskBase.java Source code

Java tutorial

Introduction

Here is the source code for gridool.mapred.db.task.DBMapShuffleTaskBase.java

Source

/*
 * @(#)$Id$
 *
 * Copyright 2006-2008 Makoto YUI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * Contributors:
 *     Makoto YUI - initial implementation
 */
package gridool.mapred.db.task;

import gridool.GridException;
import gridool.GridJob;
import gridool.GridKernel;
import gridool.annotation.GridKernelResource;
import gridool.construct.GridTaskAdapter;
import gridool.db.record.DBRecord;
import gridool.mapred.db.DBMapReduceJobConf;
import gridool.util.collections.ArrayQueue;
import gridool.util.collections.BoundedArrayQueue;
import gridool.util.concurrent.DirectExecutorService;
import gridool.util.concurrent.ExecutorFactory;
import gridool.util.concurrent.ExecutorUtils;

import java.io.Serializable;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.concurrent.ExecutorService;

import javax.annotation.Nonnull;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * 
 * <DIV lang="en"></DIV>
 * <DIV lang="ja"></DIV>
 * 
 * @author Makoto YUI (yuin405@gmail.com)
 */
public abstract class DBMapShuffleTaskBase<IN_TYPE extends DBRecord, OUT_TYPE> extends GridTaskAdapter {
    private static final long serialVersionUID = -4028443291695700765L;
    protected static final Log LOG = LogFactory.getLog(DBMapShuffleTaskBase.class);

    @Nonnull
    protected final DBMapReduceJobConf jobConf;

    // ------------------------
    // injected resources

    @GridKernelResource
    protected transient GridKernel kernel;

    // ------------------------
    // working resources

    protected transient int shuffleUnits = 1024;
    protected transient int shuffleThreads = Runtime.getRuntime().availableProcessors();
    protected transient ExecutorService shuffleExecPool;
    protected transient BoundedArrayQueue<OUT_TYPE> shuffleSink;

    @SuppressWarnings("unchecked")
    public DBMapShuffleTaskBase(GridJob job, @Nonnull DBMapReduceJobConf jobConf) {
        super(job, true);
        assert (jobConf != null);
        this.jobConf = jobConf;
    }

    @Override
    public boolean injectResources() {
        return true;
    }

    /**
     * Override to change the number of shuffle units. 512 by the default.
     */
    protected int shuffleUnits() {
        return shuffleUnits;
    }

    public void setShuffleUnits(int shuffleUnits) {
        this.shuffleUnits = shuffleUnits;
    }

    /**
     * Override to change the number of shuffle threads.
     * Shuffle implies burst network traffic. 
     * 
     * @return number of shuffle threads. {@link Runtime#availableProcessors()} by the default.
     */
    protected int shuffleThreads() {
        return shuffleThreads;
    }

    public void setShuffleThreads(int shuffleThreads) {
        this.shuffleThreads = shuffleThreads;
    }

    protected final Serializable execute() throws GridException {
        int numShuffleThreads = shuffleThreads();
        this.shuffleExecPool = (numShuffleThreads <= 0) ? new DirectExecutorService()
                : ExecutorFactory.newFixedThreadPool(numShuffleThreads, "Gridool#Shuffle", true);
        this.shuffleSink = new BoundedArrayQueue<OUT_TYPE>(shuffleUnits());

        // execute a query
        final Connection conn;
        try {
            conn = jobConf.getConnection(false);
            configureConnection(conn);
        } catch (ClassNotFoundException e) {
            LOG.error(e);
            throw new GridException(e);
        } catch (SQLException e) {
            LOG.error(e);
            throw new GridException(e);
        }
        assert (conn != null);
        final String query = jobConf.getInputQuery();
        final Statement statement;
        final ResultSet results;
        try {
            statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
            results = statement.executeQuery(query);
        } catch (SQLException e) {
            try {
                conn.close();
            } catch (SQLException sqle) {// avoid
                LOG.debug(sqle.getMessage());
            }
            LOG.error(e);
            throw new GridException(e);
        }

        try {
            preprocess(conn, results);
        } catch (SQLException e) {
            LOG.error(e);
            throw new GridException(e);
        }

        // Iterate over records
        // process -> shuffle is consequently called
        try {
            while (results.next()) {
                IN_TYPE record = prepareInputRecord();
                readFields(record, results);
                if (!process(record)) {
                    break;
                }
            }
        } catch (SQLException e) {
            LOG.error(e);
            throw new GridException(e);
        } finally {
            try {
                statement.close();
            } catch (SQLException e) {
                LOG.debug("failed closing a statement", e);
            }
            try {
                conn.close();
            } catch (SQLException e) {
                LOG.debug("failed closing a connection", e);
            }
        }
        postShuffle();
        return null;
    }

    private static void configureConnection(final Connection conn) {
        try {
            conn.setReadOnly(true); // should *not* call setReadOnly in a transaction (for MonetDB)
            conn.setAutoCommit(false);
        } catch (SQLException e) {
            LOG.warn("failed to configure a connection", e);
        }
    }

    protected void preprocess(Connection conn, ResultSet results) throws SQLException {
    }

    protected IN_TYPE prepareInputRecord() {
        return jobConf.<IN_TYPE>createMapInputRecord();
    }

    protected void readFields(IN_TYPE record, ResultSet results) throws SQLException {
        record.readFields(results);
    }

    /**
     * Process a record. This is the map function.
     * 
     * @return true/false to continue/stop mapping.
     */
    protected abstract boolean process(@Nonnull IN_TYPE record);

    protected final void shuffle(@Nonnull final OUT_TYPE record) {
        assert (shuffleSink != null);
        if (!shuffleSink.offer(record)) {
            invokeShuffle(shuffleExecPool, shuffleSink);
            this.shuffleSink = new BoundedArrayQueue<OUT_TYPE>(shuffleUnits());
            shuffleSink.offer(record);
        }
    }

    protected abstract void invokeShuffle(@Nonnull final ExecutorService shuffleExecPool,
            @Nonnull final ArrayQueue<OUT_TYPE> queue);

    protected void postShuffle() {
        if (!shuffleSink.isEmpty()) {
            invokeShuffle(shuffleExecPool, shuffleSink);
        }
        ExecutorUtils.shutdownAndAwaitTermination(shuffleExecPool);
    }
}