org.apache.lens.lib.query.LensFileOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lens.lib.query.LensFileOutputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.lens.lib.query;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lens.server.api.LensConfConstants;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * File output format which would write Text values in the charset enconding passed.
 */
public class LensFileOutputFormat extends FileOutputFormat<NullWritable, Text> {

    /**
     * The Constant UTF8.
     */
    public static final String UTF8 = "UTF-8";

    /**
     * The Constant UTF16LE.
     */
    public static final String UTF16LE = "UTF-16LE";

    /**
     * The Constant NEWLINE.
     */
    public static final String NEWLINE = "\n";

    /**
     * Regex pattern for valid file path name characters
     */
    public static final Pattern VALID_PATTERN = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");

    /**
     * The Class LensRowWriter.
     */
    public static class LensRowWriter implements RecordWriter<NullWritable, Text> {

        /**
         * The out.
         */
        protected OutputStreamWriter out;

        /**
         * The tmp path.
         */
        private Path tmpPath;

        /**
         * The extn.
         */
        private String extn;

        /**
         * Instantiates a new lens row writer.
         *
         * @param out      the out
         * @param encoding the encoding
         * @param tmpPath  the tmp path
         * @param extn     the extn
         */
        public LensRowWriter(DataOutputStream out, String encoding, Path tmpPath, String extn) {
            this.tmpPath = tmpPath;
            this.extn = extn;
            try {
                this.out = new OutputStreamWriter(out, encoding);
            } catch (UnsupportedEncodingException uee) {
                throw new IllegalArgumentException("can't find " + encoding + " encoding");
            }
        }

        /**
         * Instantiates a new lens row writer.
         *
         * @param out the out
         */
        public LensRowWriter(DataOutputStream out) {
            this(out, UTF8, null, null);
        }

        /*
         * (non-Javadoc)
         *
         * @see org.apache.hadoop.mapred.RecordWriter#write(java.lang.Object, java.lang.Object)
         */
        public synchronized void write(NullWritable key, Text value) throws IOException {
            boolean nullValue = value == null;
            if (!nullValue) {
                out.write(value.toString());
            }
            out.write(NEWLINE);
        }

        /*
         * (non-Javadoc)
         *
         * @see org.apache.hadoop.mapred.RecordWriter#close(org.apache.hadoop.mapred.Reporter)
         */
        public synchronized void close(Reporter reporter) throws IOException {
            if (out != null) {
                out.close();
            }
        }

        public Path getTmpPath() {
            return tmpPath;
        }

        public String getExtn() {
            return extn;
        }

        public String getEncoding() {
            return out.getEncoding();
        }
    }

    /*
     * (non-Javadoc)
     *
     * @see org.apache.hadoop.mapred.FileOutputFormat#getRecordWriter(org.apache.hadoop.fs.FileSystem,
     * org.apache.hadoop.mapred.JobConf, java.lang.String, org.apache.hadoop.util.Progressable)
     */
    public RecordWriter<NullWritable, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
            Progressable progress) throws IOException {
        return createRecordWriter(job, FileOutputFormat.getTaskOutputPath(job, name), progress,
                getCompressOutput(job), getOuptutFileExtn(job), getResultEncoding(job));
    }

    /**
     * Creates the record writer.
     *
     * @param conf         the conf
     * @param tmpWorkPath  the tmp work path
     * @param progress     the progress
     * @param isCompressed the is compressed
     * @param extn         the extn
     * @param encoding     the encoding
     * @return the lens row writer
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public static LensRowWriter createRecordWriter(Configuration conf, Path tmpWorkPath, Progressable progress,
            boolean isCompressed, String extn, String encoding) throws IOException {
        Path file;
        if (extn != null) {
            file = new Path(tmpWorkPath + extn);
        } else {
            file = tmpWorkPath;
        }
        if (!isCompressed) {
            FileSystem fs = file.getFileSystem(conf);
            FSDataOutputStream fileOut = fs.create(file, progress);
            return new LensRowWriter(fileOut, encoding, file, extn);
        } else {
            Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(conf);
            // create the named codec
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
            // build the filename including the extension
            String codecExtn = codec.getDefaultExtension();
            file = new Path(file + codecExtn);
            FileSystem fs = file.getFileSystem(conf);
            FSDataOutputStream fileOut = fs.create(file, progress);
            return new LensRowWriter(new DataOutputStream(codec.createOutputStream(fileOut)), encoding, file,
                    extn + codecExtn);
        }
    }

    /**
     * Gets the result encoding.
     *
     * @param conf the conf
     * @return the result encoding
     */
    public String getResultEncoding(Configuration conf) {
        return conf.get(LensConfConstants.QUERY_OUTPUT_CHARSET_ENCODING,
                LensConfConstants.DEFAULT_OUTPUT_CHARSET_ENCODING);
    }

    /**
     * Gets the ouptut file extn.
     *
     * @param conf the conf
     * @return the ouptut file extn
     */
    public String getOuptutFileExtn(Configuration conf) {
        return conf.get(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, LensConfConstants.DEFAULT_OUTPUT_FILE_EXTN);
    }

    /**
     * Gets the output compressor class.
     *
     * @param conf the conf
     * @return the output compressor class
     */
    public static Class<? extends CompressionCodec> getOutputCompressorClass(Configuration conf) {
        Class<? extends CompressionCodec> codecClass;

        String name = conf.get(LensConfConstants.QUERY_OUTPUT_COMPRESSION_CODEC,
                LensConfConstants.DEFAULT_OUTPUT_COMPRESSION_CODEC);
        try {
            codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class);
        } catch (ClassNotFoundException e) {
            throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
        }
        return codecClass;
    }

    /**
     * Gets the compress output.
     *
     * @param conf the conf
     * @return the compress output
     */
    public static boolean getCompressOutput(Configuration conf) {
        return conf.getBoolean(LensConfConstants.QUERY_OUTPUT_ENABLE_COMPRESSION,
                LensConfConstants.DEFAULT_OUTPUT_ENABLE_COMPRESSION);
    }

    /**
     * Generates a valid output file name from the given name
     * @param name name to be converted
     * @return Valid output file name
     */
    public static String getValidOutputFileName(String name) {
        Matcher matcher = VALID_PATTERN.matcher(name);
        String validName = matcher.replaceAll("_");
        if (validName.length() > 50) {
            validName = validName.substring(0, 50);
        }
        return validName;
    }
}