Java tutorial
package com.bonc.mr_roamRecognition_hjpt.comm; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader; import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReaderWrapper; import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import com.bonc.mr_roamRecognition_hjpt.util.ProvUtil; /** * Input format that is a <code>CombineFileInputFormat</code>-equivalent for * <code>TextInputFormat</code>. * * @see CombineFileInputFormat */ @InterfaceAudience.Public @InterfaceStability.Stable public class PathCombineTextInputFormat extends CombineFileInputFormat<Text, Text> { private static final Log LOG = LogFactory.getLog(PathCombineTextInputFormat.class); public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { return new CombineFileRecordReader<Text, Text>((CombineFileSplit) split, context, TextRecordReaderWrapper.class); } /** * A record reader that may be passed to * <code>CombineFileRecordReader</code> so that it can be used in a * <code>CombineFileInputFormat</code>-equivalent for * <code>TextInputFormat</code>. * * @see CombineFileRecordReader * @see CombineFileInputFormat * @see TextInputFormat */ private static class TextRecordReaderWrapper extends CombineFileRecordReaderWrapper<Text, Text> { // this constructor signature is required by CombineFileRecordReader public TextRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer idx) throws IOException, InterruptedException { super(new GetFileTextInputFormat(), split, context, idx); } } @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<PathFilter> filters = getPoll(); for (int i = 0; i < filters.size(); i++) { PathFilter pathFilter = filters.get(i); createPool(pathFilter); } return super.getSplits(job); } public synchronized static List<PathFilter> getPoll() { List<PathFilter> pools = new ArrayList<PathFilter>(); Map<String, String> map = ProvUtil.getCode(); for (Map.Entry<String, String> entry : map.entrySet()) { final String prov_id = entry.getValue(); pools.add(new PathFilter() { String provId = prov_id; @Override public boolean accept(Path path) { String fileName = path.getParent().toString(); boolean need = fileName.endsWith(prov_id); return need; } }); } return pools; } }