co.cask.cdap.internal.app.runtime.batch.dataset.DatasetInputFormatProvider.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.dataset.DatasetInputFormatProvider.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch.dataset;

import co.cask.cdap.api.data.batch.BatchReadable;
import co.cask.cdap.api.data.batch.InputFormatProvider;
import co.cask.cdap.api.data.batch.Split;
import co.cask.cdap.api.dataset.Dataset;
import co.cask.cdap.common.conf.ConfigurationUtil;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;

/**
 * A {@link InputFormatProvider} that provides {@link InputFormat} for read through Dataset.
 */
public class DatasetInputFormatProvider implements InputFormatProvider {

    private final String datasetName;
    private final Map<String, String> datasetArgs;
    private final Dataset dataset;
    private final List<Split> splits;
    private final Class<? extends AbstractBatchReadableInputFormat> batchReadableInputFormat;

    public DatasetInputFormatProvider(String datasetName, Map<String, String> datasetArgs, Dataset dataset,
            @Nullable List<Split> splits,
            Class<? extends AbstractBatchReadableInputFormat> batchReadableInputFormat) {
        this.datasetName = datasetName;
        this.datasetArgs = ImmutableMap.copyOf(datasetArgs);
        this.dataset = dataset;
        this.splits = splits == null ? null : ImmutableList.copyOf(splits);
        this.batchReadableInputFormat = batchReadableInputFormat;
    }

    @Override
    public String getInputFormatClassName() {
        return dataset instanceof InputFormatProvider ? ((InputFormatProvider) dataset).getInputFormatClassName()
                : batchReadableInputFormat.getName();
    }

    @Override
    public Map<String, String> getInputFormatConfiguration() {
        if (dataset instanceof InputFormatProvider) {
            return ((InputFormatProvider) dataset).getInputFormatConfiguration();
        }
        return createBatchReadableConfiguration();
    }

    private Map<String, String> createBatchReadableConfiguration() {
        List<Split> splits = this.splits;
        if (splits == null) {
            splits = ((BatchReadable<?, ?>) dataset).getSplits();
        }
        Configuration hConf = new Configuration();
        hConf.clear();

        try {
            AbstractBatchReadableInputFormat.setDatasetSplits(hConf, datasetName, datasetArgs, splits);
            return ConfigurationUtil.toMap(hConf);
        } catch (IOException e) {
            throw new IllegalArgumentException(e);
        }
    }
}