Java tutorial
/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.io.text; import java.io.DataOutputStream; import java.io.IOException; import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ReflectionUtils; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat; import org.apache.pig.data.Tuple; import org.codehaus.jackson.JsonNode; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.io.TeeWriter; import com.linkedin.cubert.operator.PhaseContext; import com.linkedin.cubert.utils.JsonUtils; /** * Writes TEE data in TEXT format. * * @author Maneesh Varshney * */ public class TextTeeWriter implements TeeWriter { /** * The whole purpose of this delegate is to have access to the PigLineRecordWriter * class, which is a nested protected class. * * @author Maneesh Varshney * */ private static final class Delegate extends PigTextOutputFormat { PigLineRecordWriter writer; public Delegate(DataOutputStream out, byte delimiter) { super(delimiter); writer = new PigLineRecordWriter(out, delimiter); } public void write(Tuple tuple) throws IOException { writer.write(null, tuple); } } private Delegate delegate; private DataOutputStream out; @Override public void open(Configuration conf, JsonNode json, BlockSchema schema, Path root, String filename) throws IOException { String separator = "\t"; if (json.has("params") && !json.get("params").isNull() && json.get("params").has("separator")) { separator = JsonUtils.getText(json.get("params"), "separator"); } separator = StringEscapeUtils.unescapeJava(separator); byte[] bytes = separator.getBytes("UTF-8"); if (bytes.length > 1) { throw new RuntimeException(String.format("Invalid separator in text output format %s", separator)); } final TaskAttemptContext context = PhaseContext.isMapper() ? PhaseContext.getMapContext() : PhaseContext.getRedContext(); String extension = ""; CompressionCodec codec = null; final boolean isCompressed = FileOutputFormat.getCompressOutput(context); if (isCompressed) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } out = FileSystem.get(conf).create(new Path(root, filename + extension)); if (isCompressed) { out = new DataOutputStream(codec.createOutputStream(out)); } delegate = new Delegate(out, bytes[0]); } @Override public void write(Tuple tuple) throws IOException { delegate.write(tuple); } @Override public void close() throws IOException { out.close(); } @Override public void flush() throws IOException { out.flush(); } }