com.linkedin.pinot.tools.segment.converter.PinotSegmentConvertCommand.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.tools.segment.converter.PinotSegmentConvertCommand.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.tools.segment.converter;

import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.tools.AbstractBaseCommand;
import com.linkedin.pinot.tools.Command;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The <code>PinotSegmentConvertCommand</code> class provides tools to convert Pinot segments into another format.
 * <p>Currently support converting Pinot segments to:
 * <ul>
 *   <li>AVRO format</li>
 *   <li>CSV format</li>
 *   <li>JSON format</li>
 * </ul>
 */
@SuppressWarnings("FieldCanBeLocal")
public class PinotSegmentConvertCommand extends AbstractBaseCommand implements Command {
    private static final Logger LOGGER = LoggerFactory.getLogger(PinotSegmentConvertCommand.class);
    private static final String TEMP_DIR_NAME = "temp";

    @Option(name = "-dataDir", required = true, metaVar = "<String>", usage = "Path to data directory containing Pinot segments.")
    private String _dataDir;

    @Option(name = "-outputDir", required = true, metaVar = "<String>", usage = "Path to output directory.")
    private String _outputDir;

    @Option(name = "-outputFormat", required = true, metaVar = "<String>", usage = "Format to convert to (AVRO/CSV/JSON).")
    private String _outputFormat;

    @Option(name = "-csvDelimiter", required = false, metaVar = "<char>", usage = "CSV delimiter (default ',').")
    private char _csvDelimiter = ',';

    @Option(name = "-csvListDelimiter", required = false, metaVar = "<char>", usage = "CSV List delimiter for multi-value columns (default ';').")
    private char _csvListDelimiter = ';';

    @Option(name = "-csvWithHeader", required = false, metaVar = "<boolean>", usage = "Print CSV Header (default false).")
    private boolean _csvWithHeader;

    @Option(name = "-overwrite", required = false, metaVar = "<boolean>", usage = "Overwrite the existing file (default false).")
    private boolean _overwrite;

    @Option(name = "-help", required = false, help = true, aliases = { "-h", "--h",
            "--help" }, usage = "Print this message.")
    private boolean _help;

    @Override
    public boolean execute() throws Exception {
        // Make sure output directory is empty.
        File outputDir = new File(_outputDir);
        if (outputDir.exists()) {
            if (_overwrite) {
                if (!FileUtils.deleteQuietly(outputDir)) {
                    throw new RuntimeException("Output directory: " + outputDir.getAbsolutePath()
                            + " already exists and cannot be deleted.");
                }
            } else {
                throw new RuntimeException("Output directory: " + outputDir.getAbsolutePath()
                        + " already exists and overwrite flag is not set.");
            }
        }
        if (!outputDir.mkdirs()) {
            throw new RuntimeException("Output directory: " + outputDir.getAbsolutePath() + " cannot be created.");
        }

        File tempDir = new File(outputDir, TEMP_DIR_NAME);
        try {
            // Add all segments to the segment path map.
            Map<String, String> segmentPath = new HashMap<>();
            File dataDir = new File(_dataDir);
            File[] files = dataDir.listFiles();
            if (files == null || files.length == 0) {
                throw new RuntimeException("Data directory does not contain any files.");
            }
            for (File file : files) {
                String fileName = file.getName();
                if (file.isDirectory()) {
                    // Uncompressed segment.
                    if (segmentPath.containsKey(fileName)) {
                        throw new RuntimeException("Multiple segments with the same segment name: " + fileName);
                    }
                    segmentPath.put(fileName, file.getAbsolutePath());
                } else if (fileName.toLowerCase().endsWith(".tar.gz") || fileName.toLowerCase().endsWith(".tgz")) {
                    // Compressed segment.
                    File segment = TarGzCompressionUtils.unTar(file, new File(tempDir, fileName)).get(0);
                    String segmentName = segment.getName();
                    if (segmentPath.containsKey(segmentName)) {
                        throw new RuntimeException("Multiple segments with the same segment name: " + fileName);
                    }
                    segmentPath.put(segmentName, segment.getAbsolutePath());
                }
            }

            // Do the conversion according to the output format.
            for (Map.Entry<String, String> entry : segmentPath.entrySet()) {
                String segmentName = entry.getKey();
                String inputPath = entry.getValue();
                String outputPath = new File(outputDir, segmentName).getAbsolutePath();
                switch (FileFormat.valueOf(_outputFormat.toUpperCase())) {
                case AVRO:
                    outputPath += ".avro";
                    new PinotSegmentToAvroConverter(inputPath, outputPath).convert();
                    break;
                case CSV:
                    outputPath += ".csv";
                    new PinotSegmentToCsvConverter(inputPath, outputPath, _csvDelimiter, _csvDelimiter,
                            _csvWithHeader).convert();
                    break;
                case JSON:
                    outputPath += ".json";
                    new PinotSegmentToJsonConverter(inputPath, outputPath).convert();
                    break;
                default:
                    throw new RuntimeException("Unsupported conversion to file format: " + _outputFormat);
                }
                LOGGER.info("Finish converting segment: {} into file: {}", segmentName, outputPath);
            }

            return true;
        } finally {
            FileUtils.deleteQuietly(tempDir);
        }
    }

    @Override
    public String description() {
        return "Convert Pinot segments to another format such as AVRO/CSV/JSON.";
    }

    @Override
    public boolean getHelp() {
        return _help;
    }
}