com.avira.couchdoop.imp.CouchbaseViewInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.avira.couchdoop.imp.CouchbaseViewInputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.avira.couchdoop.imp;

import com.avira.couchdoop.ArgsException;
import com.avira.couchdoop.CouchbaseArgs;
import com.couchbase.client.CouchbaseClient;
import com.couchbase.client.protocol.views.*;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.util.*;

/**
 * This input format reads documents from a Couchbase view queried by a list of view keys.
 * <p/>
 * Instances emit document IDs as key and the corresponding Couchbase {@link com.couchbase.client.protocol.views.ViewRow}
 * as value.
 * <p/>
 * The view keys passed as input are distributed evenly across a configurable number of Mapper tasks.
 */
public class CouchbaseViewInputFormat extends InputFormat<Text, ViewRow> {

    public static class CouchbaseViewInputSplit extends InputSplit implements Writable {

        private List<String> viewKeys = new ArrayList<>();

        /**
         * Default constructor (necessary because this is a Writable)
         */
        public CouchbaseViewInputSplit() {
        }

        @Override
        public long getLength() throws IOException, InterruptedException {
            // The split size is calculated only roughly from the number of keys
            // out of performance considerations
            return viewKeys.size();
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            // It is assumed that the Couchbase nodes cannot be local.
            return new String[] {};
        }

        @Override
        public void write(DataOutput out) throws IOException {
            Text[] viewKeysTexts = new Text[viewKeys.size()];

            int i = 0;
            for (String viewKey : viewKeys) {
                viewKeysTexts[i++] = new Text(viewKey);
            }

            ArrayWritable viewKeysWritable = new ArrayWritable(Text.class, viewKeysTexts);
            viewKeysWritable.write(out);
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            ArrayWritable viewKeysWritable = new ArrayWritable(Text.class);
            viewKeysWritable.readFields(in);

            Collections.addAll(viewKeys, viewKeysWritable.toStrings());
        }

        public void addKey(String key) {
            viewKeys.add(key);
        }

        public List<String> getKeys() {
            return viewKeys;
        }
    }

    public static class CouchbaseViewRecordReader extends RecordReader<Text, ViewRow> {

        private List<URI> couchbaseUrls;
        private String couchbaseBucket;
        private String couchbasePassword;
        private String couchbaseDesignDocName;
        private String couchbaseViewName;
        private int couchbaseDocsPerPage;

        private Queue<String> keyQueue = new LinkedList<>();
        private int totalNumKeys;

        private CouchbaseClient couchbaseClient;
        private View view;
        private Paginator paginator;
        private Iterator<ViewRow> rowIterator;

        private Text key = new Text();
        private ViewRow value;

        private boolean finished = false;

        private static final Logger LOGGER = LoggerFactory.getLogger(CouchbaseViewRecordReader.class);

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext context)
                throws IOException, InterruptedException {
            initCouchbaseArgs(context);

            //Add all keys to a queue
            CouchbaseViewInputSplit couchbaseViewInputSplit = (CouchbaseViewInputSplit) inputSplit;
            keyQueue.addAll(couchbaseViewInputSplit.getKeys());
            totalNumKeys = keyQueue.size();

            if (0 == keyQueue.size()) {
                //No keys
                return;
            }

            initCouchbaseClient();
            initCouchbaseView();
            initNextRowIterator();
        }

        private boolean initNextRowIterator() {
            if ((paginator != null) && paginator.hasNext()) {
                //We have a next page, get the row iterator from there
                rowIterator = paginator.next().iterator();
                return true;
            }

            String nextKey = keyQueue.poll();
            if (nextKey == null) {
                //No more keys, no more row iterators, return false
                return false;
            }

            Query query = getQueryForKey(nextKey);
            paginator = couchbaseClient.paginatedQuery(view, query, couchbaseDocsPerPage);

            //Loaded a new Paginator, start over
            return initNextRowIterator();
        }

        private Query getQueryForKey(String key) {
            Query query = new Query();
            query.setKey(key);
            query.setIncludeDocs(true);
            return query;
        }

        private void initCouchbaseView() throws IOException {
            // Prepare for querying the Couchbase view.
            LOGGER.info("Querying Couchbase for view {}...", couchbaseViewName);
            view = couchbaseClient.getView(couchbaseDesignDocName, couchbaseViewName);
        }

        private void initCouchbaseArgs(TaskAttemptContext context) {
            Configuration conf = context.getConfiguration();
            ImportViewArgs importViewArgs;
            try {
                importViewArgs = new ImportViewArgs(conf);
            } catch (ArgsException e) {
                throw new IllegalArgumentException(e);
            }

            // Check args presence.
            if (importViewArgs.getUrls() == null || importViewArgs.getBucket() == null
                    || importViewArgs.getPassword() == null) {
                throw new IllegalArgumentException("Not all Couchbase arguments are present: " + importViewArgs);
            }

            couchbaseUrls = importViewArgs.getUrls();
            couchbaseBucket = importViewArgs.getBucket();
            couchbasePassword = importViewArgs.getPassword();
            couchbaseDesignDocName = importViewArgs.getDesignDocumentName();
            couchbaseViewName = importViewArgs.getViewName();
            couchbaseDocsPerPage = importViewArgs.getDocumentsPerPage();
        }

        private void initCouchbaseClient() throws IOException {
            //Skip if couchbaseClient is already set
            if (couchbaseClient != null) {
                return;
            }

            LOGGER.info("Connecting to Couchbase...");
            try {
                couchbaseClient = new CouchbaseClient(couchbaseUrls, couchbaseBucket, couchbasePassword);
                LOGGER.info("Connected to Couchbase.");
            } catch (IOException e) {
                LOGGER.error(ExceptionUtils.getStackTrace(e));
                throw e;
            }
        }

        private void disconnectFromCouchbase() {
            //Skip if couchbaseClient is not set
            if (couchbaseClient == null) {
                return;
            }
            LOGGER.info("Disconnecting from Couchbase...");
            couchbaseClient.shutdown();
            couchbaseClient = null;
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            //If we can get a new row, we're fine
            if (nextRow()) {
                return true;
            }

            //Try to get a new rowIterator and make another attempt
            while (initNextRowIterator()) {
                if (nextRow()) {
                    return true;
                }
            }

            return false;
        }

        private boolean nextRow() {
            if (rowIterator == null || !rowIterator.hasNext()) {
                return false;
            }
            value = rowIterator.next();
            key.set(value.getId());
            return true;
        }

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return key;
        }

        @Override
        public ViewRow getCurrentValue() throws IOException, InterruptedException {
            return value;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return (float) (totalNumKeys - keyQueue.size()) / totalNumKeys;
        }

        @Override
        public void close() throws IOException {
            disconnectFromCouchbase();
        }
    }

    private static final Logger LOGGER = LoggerFactory.getLogger(CouchbaseViewInputFormat.class);

    @Override
    public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
        List<InputSplit> inputSplits = new ArrayList<InputSplit>();

        ImportViewArgs importViewArgs;
        try {
            importViewArgs = new ImportViewArgs(jobContext.getConfiguration());
        } catch (ArgsException e) {
            throw new RuntimeException("ImportViewArgs can't load settings from Hadoop Configuration");
        }
        String[] viewKeys = importViewArgs.getViewKeys();
        int viewKeysPerMapTask = (int) Math.ceil((double) viewKeys.length / importViewArgs.getNumMappers());

        LOGGER.info("Number of keys per map task is {} ({} / {})", viewKeysPerMapTask, viewKeys.length,
                importViewArgs.getNumMappers());

        CouchbaseViewInputSplit inputSplit = new CouchbaseViewInputSplit();
        int keysInCurrentSplit = 0;

        for (String viewKey : viewKeys) {
            //If we have enough keys, add the split and create a new one
            if (keysInCurrentSplit == viewKeysPerMapTask) {
                inputSplits.add(inputSplit);
                inputSplit = new CouchbaseViewInputSplit();
                keysInCurrentSplit = 0;
            }
            inputSplit.addKey(viewKey);
            keysInCurrentSplit++;
        }

        //Also add last inputSplit
        inputSplits.add(inputSplit);

        return inputSplits;
    }

    @Override
    public RecordReader<Text, ViewRow> createRecordReader(InputSplit inputSplit,
            TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new CouchbaseViewRecordReader();
    }

    public static void initJob(Job job, String urls, String bucket, String password, String designDocumentName,
            String viewName, String viewKeys) {
        job.setInputFormatClass(CouchbaseViewInputFormat.class);

        Configuration conf = job.getConfiguration();
        conf.set(CouchbaseArgs.ARG_COUCHBASE_URLS.getPropertyName(), urls);
        conf.set(CouchbaseArgs.ARG_COUCHBASE_BUCKET.getPropertyName(), bucket);
        conf.set(CouchbaseArgs.ARG_COUCHBASE_PASSWORD.getPropertyName(), password);
        conf.set(ImportViewArgs.ARG_DESIGNDOC_NAME.getPropertyName(), designDocumentName);
        conf.set(ImportViewArgs.ARG_VIEW_NAME.getPropertyName(), viewName);
        conf.set(ImportViewArgs.ARG_VIEW_KEYS.getPropertyName(), viewKeys);
    }

}