com.jbw.tar.sf.TarOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.jbw.tar.sf.TarOutputFormat.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.jbw.tar.sf;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *
 * @author alvin
 * @param <K>
 * @param <V>
 */
public class TarOutputFormat<K, V> extends FileOutputFormat<K, V> {

    private static class TarOutputWriter<K, V> extends RecordWriter<K, V> {

        private final TarArchiveOutputStream output;

        public TarOutputWriter(OutputStream os) {
            this.output = new TarArchiveOutputStream(os);
        }

        @Override
        public synchronized void write(K key, V value) throws IOException {
            if (key == null || value == null) {
                return;
            }

            TarArchiveEntry mtd = new TarArchiveEntry(key.toString());
            byte[] b = value.toString().getBytes();
            mtd.setSize(b.length);
            output.putArchiveEntry(mtd);
            IOUtils.copyBytes(new ByteArrayInputStream(b), output, 4096, false);
            output.closeArchiveEntry();

        }

        @Override
        public synchronized void close(TaskAttemptContext context) throws IOException {
            if (output != null) {
                output.flush();
                output.finish();
            }
        }
    }

    @Override
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();
        String extension = ".tar";

        Path file = getDefaultWorkFile(context, extension);
        FileSystem fs = file.getFileSystem(conf);
        OutputStream fileOut = fs.create(file, false);

        //tar?
        return new TarOutputWriter<>(fileOut);

    }
}