de.tudarmstadt.lt.n2n.hadoop.FlipJoBims.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.lt.n2n.hadoop.FlipJoBims.java

Source

/*
 *   Copyright 2013
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */
package de.tudarmstadt.lt.n2n.hadoop;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

/**
 *
 * @author Steffen Remus
 */
public class FlipJoBims {

    public static void main(String[] args) throws Exception {

        JobConf conf = new JobConf(FlipJoBims.class);

        /* begin necessary for UKP cluster */
        conf.setMemoryForMapTask(1000L); // 1 GB /* necessary for UKP cdh3 */
        conf.setMemoryForReduceTask(1000L); // 1 GB /* necessary for UKP cdh3 */
        FileOutputFormat.setCompressOutput(conf, true); // compress output
        FileOutputFormat.setOutputCompressorClass(conf,
                org.apache.hadoop.io.compress.BZip2Codec.class); /* use the bzip2 codec for compression */
        conf.setCompressMapOutput(true); // compress mapper output
        /* end necessary for UKP cluster */

        conf.setJobName(FlipJoBims.class.getSimpleName());
        args = new GenericOptionsParser(conf, args).getRemainingArgs();

        conf.setInputFormat(KeyValueTextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        conf.setMapperClass(FlipJoBims.Map.class);
        conf.setNumReduceTasks(0);
        // conf.setReducerClass(IdentityReducer.class);

        conf.setMapOutputKeyClass(Text.class);
        conf.setOutputKeyClass(Text.class);

        conf.setMapOutputValueClass(Text.class);
        conf.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);

    }

    public static class Map extends MapReduceBase implements Mapper<Text, Text, Text, Text> {
        private Text _key = new Text();

        @Override
        public void map(Text key, Text value, OutputCollector<Text, Text> collector, Reporter reporter)
                throws IOException {
            // tab_index
            String value_as_string = value.toString();
            int tab_index = value.toString().indexOf('\t');
            _key.set(value_as_string.substring(0, tab_index));
            value.set(key.toString() + value_as_string.substring(tab_index));
            collector.collect(_key, value);
        }
    }

}