Java tutorial
/* * Copyright 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.hydrator.plugin.batch; import net.sf.JRecord.Common.AbstractFieldValue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.SplittableCompressionCodec; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; import java.util.LinkedHashMap; /** * InputFormat class for CopybookReader plugin. */ public class CopybookInputFormat extends FileInputFormat<LongWritable, LinkedHashMap<String, AbstractFieldValue>> { public static final String COPYBOOK_INPUTFORMAT_CBL_CONTENTS = "copybook.inputformat.cbl.contents"; public static final String COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH = "copybook.inputformat.data.hdfs.path"; public static void setCopybookInputformatCblContents(Job job, String copybookCOntents) { job.getConfiguration().set(COPYBOOK_INPUTFORMAT_CBL_CONTENTS, copybookCOntents); } public static void setBinaryFilePath(Job job, String binaryFile) { job.getConfiguration().set(COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH, binaryFile); } @Override public RecordReader<LongWritable, LinkedHashMap<String, AbstractFieldValue>> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new CopybookRecordReader(); } @Override protected boolean isSplitable(JobContext context, Path file) { Configuration conf = context.getConfiguration(); Path path = new Path(conf.get(COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH)); final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path); return (null == codec) ? true : codec instanceof SplittableCompressionCodec; } }