org.lilyproject.tools.import_.cli.JsonImportTool.java Source code

Java tutorial

Introduction

Here is the source code for org.lilyproject.tools.import_.cli.JsonImportTool.java

Source

/*
 * Copyright 2010 Outerthought bvba
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.lilyproject.tools.import_.cli;

import java.io.FileInputStream;
import java.io.InputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.google.common.base.Splitter;
import com.ngdata.lily.security.hbase.client.AuthorizationContext;
import org.lilyproject.repository.spi.AuthorizationContextHolder;
import org.lilyproject.tools.import_.json.IgnoreAndDeleteEmptyFieldsRecordReader;
import org.lilyproject.tools.import_.json.IgnoreEmptyFieldsRecordReader;
import org.lilyproject.tools.import_.json.RecordReader;
import org.lilyproject.util.hbase.RepoAndTableUtil;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.lilyproject.cli.BaseZkCliTool;
import org.lilyproject.cli.OptionUtil;
import org.lilyproject.client.LilyClient;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.LTable;
import org.lilyproject.util.Version;
import org.lilyproject.util.hbase.LilyHBaseSchema.Table;
import org.lilyproject.util.io.Closer;

public class JsonImportTool extends BaseZkCliTool {
    private Option schemaOnlyOption;
    private Option workersOption;
    private Option quietOption;
    private Option tableOption;
    private Option repositoryOption;
    private Option fileFormatOption;
    private Option ignoreEmptyFieldsOption;
    private Option ignoreAndDeleteEmptyFieldsOption;
    private Option maxErrorsOption;
    private Option rolesOption;
    private LilyClient lilyClient;

    @Override
    protected String getCmdName() {
        return "lily-import";
    }

    @Override
    protected String getVersion() {
        return Version.readVersion("org.lilyproject", "lily-import");
    }

    public static void main(String[] args) throws Exception {
        new JsonImportTool().start(args);
    }

    @Override
    @SuppressWarnings("static-access")
    public List<Option> getOptions() {
        List<Option> options = super.getOptions();

        workersOption = OptionBuilder.withArgName("count").hasArg().withDescription("Number of workers (threads)")
                .withLongOpt("workers").create("w");
        options.add(workersOption);

        schemaOnlyOption = OptionBuilder
                .withDescription("Only import the field types and record types, not the records.")
                .withLongOpt("schema-only").create("s");
        options.add(schemaOnlyOption);

        quietOption = OptionBuilder
                .withDescription("Instead of printing out all record ids, only print a dot every 1000 records")
                .withLongOpt("quiet").create("q");
        options.add(quietOption);

        tableOption = OptionBuilder.withArgName("table").hasArg()
                .withDescription("Repository table to import to, defaults to record table").withLongOpt("table")
                .create();
        options.add(tableOption);

        repositoryOption = OptionBuilder.withArgName("repository").hasArg()
                .withDescription("Repository name, if not specified default repository is used")
                .withLongOpt("repository").create();
        options.add(repositoryOption);

        fileFormatOption = OptionBuilder.withArgName("format").hasArg()
                .withDescription("Input file format (see explanation at bottom)").withLongOpt("format").create();
        options.add(fileFormatOption);

        ignoreEmptyFieldsOption = OptionBuilder.withDescription(
                "Ignores fields defined as empty strings, ignores zero-length lists, ignores nested"
                        + " records containing no fields. When in root record, adds them as fields-to-delete.")
                .withLongOpt("ignore-empty-fields").create();
        options.add(ignoreEmptyFieldsOption);

        ignoreAndDeleteEmptyFieldsOption = OptionBuilder
                .withDescription(
                        "Does everything ignore-empty-fields does, and adds empty fields in the root record"
                                + "to the list of fields-to-delete (only makes sense for updates).")
                .withLongOpt("ignore-and-delete-empty-fields").create();
        options.add(ignoreAndDeleteEmptyFieldsOption);

        maxErrorsOption = OptionBuilder.withArgName("count").hasArg()
                .withDescription("Give up the import after this amount of errors (only for records, not schema)")
                .withLongOpt("max-errors").create();
        options.add(maxErrorsOption);

        rolesOption = OptionBuilder.withArgName("roles").hasArg()
                .withDescription("Comma-separated list of active user roles (excluding tenant part). Only has "
                        + "effect when the NGDATA hbase-authz coprocessor is installed.")
                .withLongOpt("roles").create();
        options.add(rolesOption);

        return options;
    }

    @Override
    public int run(CommandLine cmd) throws Exception {
        int result = super.run(cmd);
        if (result != 0) {
            return result;
        }

        int workers = OptionUtil.getIntOption(cmd, workersOption, 1);

        String tableName = OptionUtil.getStringOption(cmd, tableOption, Table.RECORD.name);
        String repositoryName = OptionUtil.getStringOption(cmd, repositoryOption,
                RepoAndTableUtil.DEFAULT_REPOSITORY);
        ImportFileFormat fileFormat = OptionUtil.getEnum(cmd, fileFormatOption, ImportFileFormat.JSON,
                ImportFileFormat.class);

        if (cmd.getArgList().size() < 1) {
            System.out.println("No import file specified!");
            return 1;
        }

        boolean schemaOnly = cmd.hasOption(schemaOnlyOption.getOpt());
        boolean ignoreEmptyFields = cmd.hasOption(ignoreEmptyFieldsOption.getLongOpt());
        boolean ignoreAndDeleteEmptyFields = cmd.hasOption(ignoreAndDeleteEmptyFieldsOption.getLongOpt());
        long maxErrors = OptionUtil.getLongOption(cmd, maxErrorsOption, 1L);

        if (cmd.hasOption(rolesOption.getLongOpt())) {
            Set<String> roles = new HashSet<String>();
            Splitter splitter = Splitter.on(",").trimResults().omitEmptyStrings();
            for (String role : splitter.split(cmd.getOptionValue(rolesOption.getLongOpt()))) {
                roles.add(role);
            }
            AuthorizationContextHolder
                    .setCurrentContext(new AuthorizationContext("lily-import", repositoryName, roles));
        }

        lilyClient = new LilyClient(zkConnectionString, zkSessionTimeout);

        for (String arg : (List<String>) cmd.getArgList()) {
            System.out.println("----------------------------------------------------------------------");
            System.out.println("Importing " + arg + " to " + tableName + " table of repository " + repositoryName);
            InputStream is = new FileInputStream(arg);
            try {
                LRepository repository = lilyClient.getRepository(repositoryName);
                LTable table = repository.getTable(tableName);
                ImportListener importListener;
                if (cmd.hasOption(quietOption.getOpt())) {
                    importListener = new DefaultImportListener(System.out, EntityType.RECORD);
                } else {
                    importListener = new DefaultImportListener();
                }

                JsonImport.ImportSettings settings = new JsonImport.ImportSettings();
                settings.importListener = importListener;
                settings.threadCount = workers;
                settings.maximumRecordErrors = maxErrors;
                if (ignoreAndDeleteEmptyFields) {
                    settings.recordReader = IgnoreAndDeleteEmptyFieldsRecordReader.INSTANCE;
                } else if (ignoreEmptyFields) {
                    settings.recordReader = IgnoreEmptyFieldsRecordReader.INSTANCE;
                } else {
                    settings.recordReader = RecordReader.INSTANCE;
                }

                switch (fileFormat) {
                case JSON:
                    if (schemaOnly) {
                        JsonImport.loadSchema(repository, is, settings);
                    } else {
                        JsonImport.load(table, repository, is, settings);
                    }
                    break;
                case JSON_LINES:
                    JsonImport.loadJsonLines(table, repository, is, settings);
                    break;
                default:
                    throw new RuntimeException("Unexpected import file format: " + fileFormat);
                }
            } finally {
                Closer.close(is);
            }
        }

        System.out.println("Import done");

        return 0;
    }

    @Override
    protected void cleanup() {
        Closer.close(lilyClient);
        super.cleanup();
    }

    public enum ImportFileFormat {
        JSON, JSON_LINES
    }
}