org.apache.falcon.metadata.LineageRecorder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.metadata.LineageRecorder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.metadata;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.falcon.FalconException;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.json.simple.JSONValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;

/**
 * Utility called in the post process of oozie workflow to record lineage information.
 */
public class LineageRecorder extends Configured implements Tool {

    private static final Logger LOG = LoggerFactory.getLogger(LineageRecorder.class);

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new LineageRecorder(), args);
    }

    @Override
    public int run(String[] arguments) throws Exception {
        CommandLine command = getCommand(arguments);

        LOG.info("Parsing lineage metadata from: {}", command);
        Map<String, String> lineageMetadata = getLineageMetadata(command);
        LOG.info("Lineage Metadata: {}", lineageMetadata);

        String lineageFile = getFilePath(command.getOptionValue(LineageArgs.LOG_DIR.getOptionName()),
                command.getOptionValue(LineageArgs.ENTITY_NAME.getOptionName()));

        LOG.info("Persisting lineage metadata to: {}", lineageFile);
        persistLineageMetadata(lineageMetadata, lineageFile);

        return 0;
    }

    protected static CommandLine getCommand(String[] arguments) throws ParseException {

        Options options = new Options();

        for (LineageArgs arg : LineageArgs.values()) {
            addOption(options, arg);
        }

        return new GnuParser().parse(options, arguments);
    }

    private static void addOption(Options options, LineageArgs arg) {
        addOption(options, arg, true);
    }

    private static void addOption(Options options, LineageArgs arg, boolean isRequired) {
        Option option = arg.getOption();
        option.setRequired(isRequired);
        options.addOption(option);
    }

    protected Map<String, String> getLineageMetadata(CommandLine command) {
        Map<String, String> lineageMetadata = new HashMap<String, String>();

        for (LineageArgs arg : LineageArgs.values()) {
            lineageMetadata.put(arg.getOptionName(), arg.getOptionValue(command));
        }

        return lineageMetadata;
    }

    public static String getFilePath(String logDir, String entityName) {
        return logDir + entityName + "-lineage.json";
    }

    /**
     * this method is invoked from with in the workflow.
     *
     * @param lineageMetadata metadata to persist
     * @param lineageFile file to serialize the metadata
     * @throws IOException
     * @throws FalconException
     */
    protected void persistLineageMetadata(Map<String, String> lineageMetadata, String lineageFile)
            throws IOException, FalconException {
        OutputStream out = null;
        Path file = new Path(lineageFile);
        try {
            FileSystem fs = HadoopClientFactory.get().createFileSystem(file.toUri(), getConf());
            out = fs.create(file);

            // making sure falcon can read this file
            FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
            fs.setPermission(file, permission);

            out.write(JSONValue.toJSONString(lineageMetadata).getBytes());
        } finally {
            if (out != null) {
                try {
                    out.close();
                } catch (IOException ignore) {
                    // ignore
                }
            }
        }
    }

    public static Map<String, String> parseLineageMetadata(String lineageFile) throws FalconException {
        try {
            Path lineageDataPath = new Path(lineageFile); // file has 777 permissions
            FileSystem fs = HadoopClientFactory.get().createFileSystem(lineageDataPath.toUri());
            if (fs.exists(lineageDataPath)) {
                BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(lineageDataPath)));
                return (Map<String, String>) JSONValue.parse(in);
            }
            return null;
        } catch (IOException e) {
            throw new FalconException("Error opening lineage file", e);
        }
    }
}