com.cloudera.recordbreaker.schemadict.SchemaDictionary.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.recordbreaker.schemadict.SchemaDictionary.java

Source

/*
 * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */
package com.cloudera.recordbreaker.schemadict;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;

import java.util.List;
import java.util.Random;
import java.util.ArrayList;

import org.apache.avro.Schema;

/******************************************
 * A SchemaDictionary holds a number of serialized SchemaDictionaryElt objects, each of
 * which holds some schema info, a SchemaStatisticalSummary, and a user comment.
 * 
 * A SchemaDictionary is meant to be the "clean schema reference" that helps users give
 * a name to novel schemas.
 *
 * @author mjc
 ******************************************/
public class SchemaDictionary {
    File dir;
    Random r = new Random();
    List<SchemaDictionaryEntry> dictElts = new ArrayList<SchemaDictionaryEntry>();

    /**
     * Load the schema dictionary from the given directory.
     */
    public SchemaDictionary(File dir) throws IOException {
        this.dir = dir.getCanonicalFile();
        if (!dir.exists()) {
            if (!dir.mkdirs()) {
                throw new IOException("Could not create: " + dir);
            }
        }

        File dictFiles[] = dir.listFiles(new FilenameFilter() {
            public boolean accept(File dir, String name) {
                return name.endsWith(SchemaDictionaryEntry.SUMMARY_ENDING);
            }
        });

        for (int i = 0; i < dictFiles.length; i++) {
            String name = dictFiles[i].getName();
            String fileRoot = name.substring(0, name.length() - SchemaDictionaryEntry.SUMMARY_ENDING.length());
            SchemaDictionaryEntry sde = new SchemaDictionaryEntry();
            sde.loadDictionaryEntry(dir, fileRoot);
            dictElts.add(sde);
        }
    }

    /**
     * Store a novel dictionary element (which is constructed with the original datafile and a user's comment).
     */
    public synchronized void addDictionaryElt(File avroFile, String infoText) throws IOException {
        SchemaDictionaryEntry entry = new SchemaDictionaryEntry(avroFile, infoText);
        dictElts.add(entry);

        String fileRoot = "" + Math.abs(r.nextInt());
        entry.saveDictionaryEntry(dir, fileRoot);
    }

    /**
     * Iterate through objects already in the directory.
     */
    public List<SchemaDictionaryEntry> contents() {
        return dictElts;
    }

    //////////////////////////////////////////
    // main()
    //////////////////////////////////////////
    public static void main(String argv[]) throws IOException {
        boolean shouldDump = false;
        boolean shouldAdd = false;
        File avroDataFile = null;
        String dictMessage = null;

        CommandLine cmd = null;
        Options options = new Options();
        options.addOption("?", false, "Help for command-line");
        options.addOption("d", false, "Dump contents of schema dictionary");
        options.addOption("a", true, "Add datafile to new schema dictionary element");
        options.addOption("m", true, "Add comment message as part of new schema dictionary element");

        try {
            CommandLineParser parser = new PosixParser();
            cmd = parser.parse(options, argv);
        } catch (ParseException e) {
            HelpFormatter fmt = new HelpFormatter();
            fmt.printHelp("SchemaDictionary", options, true);
            System.err.println("Required input: <schemadictionary>");
            System.exit(-1);
        }

        if (cmd.hasOption("?")) {
            HelpFormatter fmt = new HelpFormatter();
            fmt.printHelp("SchemaDictionary", options, true);
            System.err.println("Required input: <schemadictionary>");
            System.exit(0);
        }

        if (cmd.hasOption("d")) {
            shouldDump = true;
        }

        if (cmd.hasOption("a")) {
            avroDataFile = new File(cmd.getOptionValue("a")).getCanonicalFile();
        }
        if (cmd.hasOption("m")) {
            dictMessage = cmd.getOptionValue("m");
            if (cmd.hasOption("a")) {
                shouldAdd = true;
            }
        }
        if ((!shouldAdd) && (cmd.hasOption("a") || cmd.hasOption("m"))) {
            System.err.println("Must indicate -a AND -m to add new schema dictionary item");
            HelpFormatter fmt = new HelpFormatter();
            fmt.printHelp("SchemaDictionary", options, true);
            System.err.println("Required input: <schemadictionary>");
            System.exit(0);
        }

        String[] argArray = cmd.getArgs();
        if (argArray.length == 0) {
            System.err.println("No schema dictionary path provided.");
            HelpFormatter fmt = new HelpFormatter();
            fmt.printHelp("SchemaDictionary", options, true);
            System.err.println("Required input: <schemadictionary>");
            System.exit(0);
        }

        File dictionaryDir = new File(argArray[0]).getCanonicalFile();
        SchemaDictionary dict = new SchemaDictionary(dictionaryDir);

        if (shouldAdd) {
            dict.addDictionaryElt(avroDataFile, dictMessage);
        }

        if (shouldDump) {
            int counter = 1;
            for (SchemaDictionaryEntry entry : dict.contents()) {
                System.err.println("" + counter + ".  " + entry.getInfo());
                System.err.println(entry.getSchema());
                System.err.println();
                counter++;
            }
            int numItems = counter - 1;
            System.err.println(
                    "Dictionary at " + dictionaryDir.getCanonicalPath() + " has " + numItems + " item(s).");
        }
    }
}