org.pentaho.hadoop.shim.common.format.avro.PentahoAvroOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.shim.common.format.avro.PentahoAvroOutputFormat.java

Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/
package org.pentaho.hadoop.shim.common.format.avro;

import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.commons.lang.StringUtils;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.hadoop.shim.api.format.IPentahoAvroOutputFormat;
import org.pentaho.hadoop.shim.api.format.SchemaDescription;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.zip.Deflater;

/**
 * @author tkafalas
 */
public class PentahoAvroOutputFormat implements IPentahoAvroOutputFormat {
    private Schema schema;
    private String outputFilename;
    private SchemaDescription schemaDescription;
    private CodecFactory codecFactory;

    private String nameSpace;
    private String recordName;
    private String docValue;
    private String schemaFilename;

    @Override
    public IPentahoRecordWriter createRecordWriter() throws Exception {
        validate();
        AvroSchemaConverter converter = new AvroSchemaConverter(schemaDescription, nameSpace, recordName, docValue);
        schema = converter.getAvroSchema();
        converter.writeAvroSchemaToFile(schemaFilename);
        DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
        DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
        dataFileWriter.setCodec(codecFactory);
        dataFileWriter.create(schema, KettleVFS.getOutputStream(outputFilename, false));
        return new PentahoAvroRecordWriter(dataFileWriter, schema, schemaDescription);
    }

    private void validate() throws Exception {
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/mm/dd HH:mm:ss");
        String date = dateFormat.format(new Date());

        StringBuffer errors = new StringBuffer();
        if (StringUtils.isEmpty(outputFilename)) {
            errors.append("\n");
            errors.append(date
                    + " - Unable to run [TRANS_NAME]. Please set the Avro Output Folder/File name for [STEP_NAME].");
        }
        if (StringUtils.isEmpty(nameSpace)) {
            errors.append("\n");
            errors.append(
                    date + " - Unable to run [TRANS_NAME]. Please set the Avro Schema Namespace for [STEP_NAME].");
        }
        if (StringUtils.isEmpty(recordName)) {
            errors.append("\n");
            errors.append(date
                    + " - Unable to run [TRANS_NAME]. Please set the Avro Schema Record name for [STEP_NAME].");
        }
        if (!StringUtils.isEmpty(errors.toString())) {
            throw new Exception(errors.toString());
        }
    }

    @Override
    public void setSchemaDescription(SchemaDescription schemaDescription) throws Exception {
        this.schemaDescription = schemaDescription;
    }

    @Override
    public void setOutputFile(String file) throws Exception {
        this.outputFilename = file;
    }

    @Override
    public void setCompression(COMPRESSION compression) {
        switch (compression) {
        case SNAPPY:
            codecFactory = CodecFactory.snappyCodec();
            break;
        case DEFLATE:
            codecFactory = CodecFactory.deflateCodec(Deflater.DEFAULT_COMPRESSION);
            break;
        default:
            codecFactory = CodecFactory.nullCodec();
            break;
        }
    }

    @Override
    public void setNameSpace(String namespace) {
        this.nameSpace = namespace;
    }

    @Override
    public void setRecordName(String recordName) {
        this.recordName = recordName;
    }

    @Override
    public void setDocValue(String docValue) {
        this.docValue = docValue;
    }

    @Override
    public void setSchemaFilename(String schemaFilename) {
        this.schemaFilename = schemaFilename;
    }

}