Java tutorial
/* * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved. * * Cloudera, Inc. licenses this file to you under the Apache License, * Version 2.0 (the "License"). You may not use this file except in * compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for * the specific language governing permissions and limitations under the * License. */ package com.cloudera.recordbreaker.schemadict; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.util.List; import java.util.Random; import java.util.ArrayList; import org.apache.avro.Schema; /****************************************** * A SchemaDictionary holds a number of serialized SchemaDictionaryElt objects, each of * which holds some schema info, a SchemaStatisticalSummary, and a user comment. * * A SchemaDictionary is meant to be the "clean schema reference" that helps users give * a name to novel schemas. * * @author mjc ******************************************/ public class SchemaDictionary { File dir; Random r = new Random(); List<SchemaDictionaryEntry> dictElts = new ArrayList<SchemaDictionaryEntry>(); /** * Load the schema dictionary from the given directory. */ public SchemaDictionary(File dir) throws IOException { this.dir = dir.getCanonicalFile(); if (!dir.exists()) { if (!dir.mkdirs()) { throw new IOException("Could not create: " + dir); } } File dictFiles[] = dir.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { return name.endsWith(SchemaDictionaryEntry.SUMMARY_ENDING); } }); for (int i = 0; i < dictFiles.length; i++) { String name = dictFiles[i].getName(); String fileRoot = name.substring(0, name.length() - SchemaDictionaryEntry.SUMMARY_ENDING.length()); SchemaDictionaryEntry sde = new SchemaDictionaryEntry(); sde.loadDictionaryEntry(dir, fileRoot); dictElts.add(sde); } } /** * Store a novel dictionary element (which is constructed with the original datafile and a user's comment). */ public synchronized void addDictionaryElt(File avroFile, String infoText) throws IOException { SchemaDictionaryEntry entry = new SchemaDictionaryEntry(avroFile, infoText); dictElts.add(entry); String fileRoot = "" + Math.abs(r.nextInt()); entry.saveDictionaryEntry(dir, fileRoot); } /** * Iterate through objects already in the directory. */ public List<SchemaDictionaryEntry> contents() { return dictElts; } ////////////////////////////////////////// // main() ////////////////////////////////////////// public static void main(String argv[]) throws IOException { boolean shouldDump = false; boolean shouldAdd = false; File avroDataFile = null; String dictMessage = null; CommandLine cmd = null; Options options = new Options(); options.addOption("?", false, "Help for command-line"); options.addOption("d", false, "Dump contents of schema dictionary"); options.addOption("a", true, "Add datafile to new schema dictionary element"); options.addOption("m", true, "Add comment message as part of new schema dictionary element"); try { CommandLineParser parser = new PosixParser(); cmd = parser.parse(options, argv); } catch (ParseException e) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaDictionary", options, true); System.err.println("Required input: <schemadictionary>"); System.exit(-1); } if (cmd.hasOption("?")) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaDictionary", options, true); System.err.println("Required input: <schemadictionary>"); System.exit(0); } if (cmd.hasOption("d")) { shouldDump = true; } if (cmd.hasOption("a")) { avroDataFile = new File(cmd.getOptionValue("a")).getCanonicalFile(); } if (cmd.hasOption("m")) { dictMessage = cmd.getOptionValue("m"); if (cmd.hasOption("a")) { shouldAdd = true; } } if ((!shouldAdd) && (cmd.hasOption("a") || cmd.hasOption("m"))) { System.err.println("Must indicate -a AND -m to add new schema dictionary item"); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaDictionary", options, true); System.err.println("Required input: <schemadictionary>"); System.exit(0); } String[] argArray = cmd.getArgs(); if (argArray.length == 0) { System.err.println("No schema dictionary path provided."); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("SchemaDictionary", options, true); System.err.println("Required input: <schemadictionary>"); System.exit(0); } File dictionaryDir = new File(argArray[0]).getCanonicalFile(); SchemaDictionary dict = new SchemaDictionary(dictionaryDir); if (shouldAdd) { dict.addDictionaryElt(avroDataFile, dictMessage); } if (shouldDump) { int counter = 1; for (SchemaDictionaryEntry entry : dict.contents()) { System.err.println("" + counter + ". " + entry.getInfo()); System.err.println(entry.getSchema()); System.err.println(); counter++; } int numItems = counter - 1; System.err.println( "Dictionary at " + dictionaryDir.getCanonicalPath() + " has " + numItems + " item(s)."); } } }