com.jbw.taroutputformat.TarOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.jbw.taroutputformat.TarOutputFormat.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.jbw.taroutputformat;

import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *
 * @author alvin
 */
public class TarOutputFormat<K, V> extends FileOutputFormat<K, V> {

    private static class TarOutputWriter<K, V> extends RecordWriter<K, V> {

        private TarArchiveOutputStream output;

        public TarOutputWriter(OutputStream os) {
            this.output = new TarArchiveOutputStream(os);
        }

        @Override
        public synchronized void write(K k, V v) throws IOException, InterruptedException {
            if (k == null || v == null) {
                return;
            }
            TarArchiveEntry mtd = new TarArchiveEntry(k.toString());
            byte[] b = v.toString().getBytes();
            mtd.setSize(b.length);
            output.putArchiveEntry(mtd);
            output.write(b); //IOUtils.copyBytes(new ByteArrayInputStream())
            output.closeArchiveEntry();
        }

        @Override
        public void close(TaskAttemptContext tac) throws IOException, InterruptedException {
            if (output != null) {
                output.flush();
                output.finish();
            }
        }

    }

    @Override
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException {
        Configuration conf = tac.getConfiguration();
        String extension = ".tar";
        Path file = getDefaultWorkFile(tac, extension);
        FileSystem fs = file.getFileSystem(conf);
        OutputStream fileOut = fs.create(file, false);
        return new TarOutputWriter<>(fileOut);
    }
}