com.linkedin.pinot.tools.admin.command.CreateSegmentCommand.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.tools.admin.command.CreateSegmentCommand.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.tools.admin.command;

import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.tools.Command;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Class to implement CreateSegment command.
 *
 */
public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Command {
    private static final Logger LOGGER = LoggerFactory.getLogger(CreateSegmentCommand.class);

    @Option(name = "-generatorConfigFile", required = false, metaVar = "<string>", usage = "Config file for segment generator.")
    private String _generatorConfigFile;

    @Option(name = "-dataDir", required = false, metaVar = "<string>", usage = "Directory containing the data.")
    private String _dataDir;

    @Option(name = "-format", required = false, metaVar = "<AVRO/CSV/JSON>", usage = "Input data format.")
    private FileFormat _format;

    @Option(name = "-outDir", required = false, metaVar = "<string>", usage = "Name of output directory.")
    private String _outDir;

    @Option(name = "-overwrite", required = false, usage = "Overwrite existing output directory.")
    private boolean _overwrite = false;

    @Option(name = "-tableName", required = false, metaVar = "<string>", usage = "Name of the table.")
    private String _tableName;

    @Option(name = "-segmentName", required = false, metaVar = "<string>", usage = "Name of the segment.")
    private String _segmentName;

    @Option(name = "-schemaFile", required = false, metaVar = "<string>", usage = "File containing schema for data.")
    private String _schemaFile;

    @Option(name = "-readerConfigFile", required = false, metaVar = "<string>", usage = "Config file for record reader.")
    private String _readerConfigFile;

    @Option(name = "-enableStarTreeIndex", required = false, usage = "Enable Star Tree Index.")
    boolean _enableStarTreeIndex = false;

    @Option(name = "-starTreeIndexSpecFile", required = false, metaVar = "<string>", usage = "Config file for star tree index.")
    private String _starTreeIndexSpecFile;

    @Option(name = "-numThreads", required = false, metaVar = "<int>", usage = "Parallelism while generating segments, default is 1.")
    private int _numThreads = 1;

    @Option(name = "-help", required = false, help = true, aliases = { "-h", "--h",
            "--help" }, usage = "Print this message.")
    private boolean _help = false;

    public CreateSegmentCommand setGeneratorConfigFile(String generatorConfigFile) {
        _generatorConfigFile = generatorConfigFile;
        return this;
    }

    public CreateSegmentCommand setDataDir(String dataDir) {
        _dataDir = dataDir;
        return this;
    }

    public CreateSegmentCommand setFormat(FileFormat format) {
        _format = format;
        return this;
    }

    public CreateSegmentCommand setOutDir(String outDir) {
        _outDir = outDir;
        return this;
    }

    public CreateSegmentCommand setOverwrite(boolean overwrite) {
        _overwrite = overwrite;
        return this;
    }

    public CreateSegmentCommand setTableName(String tableName) {
        _tableName = tableName;
        return this;
    }

    public CreateSegmentCommand setSegmentName(String segmentName) {
        _segmentName = segmentName;
        return this;
    }

    public CreateSegmentCommand setSchemaFile(String schemaFile) {
        _schemaFile = schemaFile;
        return this;
    }

    public CreateSegmentCommand setReaderConfigFile(String readerConfigFile) {
        _readerConfigFile = readerConfigFile;
        return this;
    }

    public CreateSegmentCommand setEnableStarTreeIndex(boolean enableStarTreeIndex) {
        _enableStarTreeIndex = enableStarTreeIndex;
        return this;
    }

    public CreateSegmentCommand setStarTreeIndexSpecFile(String starTreeIndexSpecFile) {
        _starTreeIndexSpecFile = starTreeIndexSpecFile;
        return this;
    }

    public CreateSegmentCommand setNumThreads(int numThreads) {
        _numThreads = numThreads;
        return this;
    }

    @Override
    public String toString() {
        return ("CreateSegment  -generatorConfigFile " + _generatorConfigFile + " -dataDir " + _dataDir
                + " -format " + _format + " -outDir " + _outDir + " -overwrite " + _overwrite + " -tableName "
                + _tableName + " -segmentName " + _segmentName + " -schemaFile " + _schemaFile
                + " -readerConfigFile " + _readerConfigFile + " -enableStarTreeIndex " + _enableStarTreeIndex
                + " -starTreeIndexSpecFile " + _starTreeIndexSpecFile + " -numThreads " + _numThreads);
    }

    @Override
    public final String getName() {
        return "CreateSegment";
    }

    @Override
    public String description() {
        return "Create pinot segments from provided avro/csv/json input data.";
    }

    @Override
    public boolean getHelp() {
        return _help;
    }

    @Override
    public boolean execute() throws Exception {
        LOGGER.info("Executing command: {}", toString());

        // Load generator config if exist.
        final SegmentGeneratorConfig segmentGeneratorConfig;
        if (_generatorConfigFile != null) {
            segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile),
                    SegmentGeneratorConfig.class);
        } else {
            segmentGeneratorConfig = new SegmentGeneratorConfig();
        }

        // Load config from segment generator config.
        String configDataDir = segmentGeneratorConfig.getDataDir();
        if (_dataDir == null) {
            if (configDataDir == null) {
                throw new RuntimeException("Must specify dataDir.");
            }
            _dataDir = configDataDir;
        } else {
            if (configDataDir != null && !configDataDir.equals(_dataDir)) {
                LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}",
                        _dataDir);
            }
        }

        FileFormat configFormat = segmentGeneratorConfig.getFormat();
        if (_format == null) {
            if (configFormat == null) {
                throw new RuntimeException("Format cannot be null in config file.");
            }
            _format = configFormat;
        } else {
            if (configFormat != _format && configFormat != FileFormat.AVRO) {
                LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}",
                        _format);
            }
        }

        String configOutDir = segmentGeneratorConfig.getOutDir();
        if (_outDir == null) {
            if (configOutDir == null) {
                throw new RuntimeException("Must specify outDir.");
            }
            _outDir = configOutDir;
        } else {
            if (configOutDir != null && !configOutDir.equals(_outDir)) {
                LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}",
                        _outDir);
            }
        }

        if (segmentGeneratorConfig.isOverwrite()) {
            _overwrite = true;
        }

        String configTableName = segmentGeneratorConfig.getTableName();
        if (_tableName == null) {
            if (configTableName == null) {
                throw new RuntimeException("Must specify tableName.");
            }
            _tableName = configTableName;
        } else {
            if (configTableName != null && !configTableName.equals(_tableName)) {
                LOGGER.warn(
                        "Find tableName conflict in command line and config file, use config in command line: {}",
                        _tableName);
            }
        }

        String configSegmentName = segmentGeneratorConfig.getSegmentName();
        if (_segmentName == null) {
            if (configSegmentName == null) {
                throw new RuntimeException("Must specify segmentName.");
            }
            _segmentName = configSegmentName;
        } else {
            if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
                LOGGER.warn(
                        "Find segmentName conflict in command line and config file, use config in command line: {}",
                        _segmentName);
            }
        }

        // Filter out all input files.
        File dir = new File(_dataDir);
        if (!dir.exists() || !dir.isDirectory()) {
            throw new RuntimeException("Data directory " + _dataDir + " not found.");
        }

        File[] files = dir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.toLowerCase().endsWith(_format.toString().toLowerCase());
            }
        });

        if ((files == null) || (files.length == 0)) {
            throw new RuntimeException("Data directory " + _dataDir + " does not contain "
                    + _format.toString().toUpperCase() + " files.");
        }

        // Make sure output directory does not already exist, or can be overwritten.
        File outDir = new File(_outDir);
        if (outDir.exists()) {
            if (!_overwrite) {
                throw new IOException("Output directory " + _outDir + " already exists.");
            } else {
                FileUtils.deleteDirectory(outDir);
            }
        }

        // Set other generator configs from command line.
        segmentGeneratorConfig.setDataDir(_dataDir);
        segmentGeneratorConfig.setFormat(_format);
        segmentGeneratorConfig.setOutDir(_outDir);
        segmentGeneratorConfig.setOverwrite(_overwrite);
        segmentGeneratorConfig.setTableName(_tableName);
        segmentGeneratorConfig.setSegmentName(_segmentName);
        if (_schemaFile != null) {
            if (segmentGeneratorConfig.getSchemaFile() != null
                    && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
                LOGGER.warn(
                        "Find schemaFile conflict in command line and config file, use config in command line: {}",
                        _schemaFile);
            }
            segmentGeneratorConfig.setSchemaFile(_schemaFile);
        }
        if (_readerConfigFile != null) {
            if (segmentGeneratorConfig.getReaderConfigFile() != null
                    && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
                LOGGER.warn(
                        "Find readerConfigFile conflict in command line and config file, use config in command line: {}",
                        _readerConfigFile);
            }
            segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
        }
        if (_enableStarTreeIndex) {
            segmentGeneratorConfig.setEnableStarTreeIndex(true);
        }
        if (_starTreeIndexSpecFile != null) {
            if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null
                    && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
                LOGGER.warn(
                        "Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}",
                        _starTreeIndexSpecFile);
            }
            segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
        }

        ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
        int cnt = 0;
        for (final File file : files) {
            final int segCnt = cnt;

            executor.execute(new Runnable() {
                @Override
                public void run() {
                    try {
                        SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
                        config.setInputFilePath(file.getAbsolutePath());
                        config.setSegmentName(_segmentName + "_" + segCnt);
                        config.loadConfigFiles();

                        final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
                        driver.init(config);
                        driver.build();
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            });
            cnt += 1;
        }

        executor.shutdown();
        return executor.awaitTermination(1, TimeUnit.HOURS);
    }
}