Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.jbw.tar.sf; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.OutputStream; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * * @author alvin * @param <K> * @param <V> */ public class TarOutputFormat<K, V> extends FileOutputFormat<K, V> { private static class TarOutputWriter<K, V> extends RecordWriter<K, V> { private final TarArchiveOutputStream output; public TarOutputWriter(OutputStream os) { this.output = new TarArchiveOutputStream(os); } @Override public synchronized void write(K key, V value) throws IOException { if (key == null || value == null) { return; } TarArchiveEntry mtd = new TarArchiveEntry(key.toString()); byte[] b = value.toString().getBytes(); mtd.setSize(b.length); output.putArchiveEntry(mtd); IOUtils.copyBytes(new ByteArrayInputStream(b), output, 4096, false); output.closeArchiveEntry(); } @Override public synchronized void close(TaskAttemptContext context) throws IOException { if (output != null) { output.flush(); output.finish(); } } } @Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extension = ".tar"; Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); OutputStream fileOut = fs.create(file, false); //tar? return new TarOutputWriter<>(fileOut); } }