org.apache.drill.exec.store.easy.text.TextFormatPlugin.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.drill.exec.store.easy.text.TextFormatPlugin.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.store.easy.text;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.physical.base.ScanStats;
import org.apache.drill.exec.physical.base.ScanStats.GroupScanProperty;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.ExecProtos.FragmentHandle;
import org.apache.drill.exec.proto.UserBitShared.CoreOperatorType;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.RecordReader;
import org.apache.drill.exec.store.RecordWriter;
import org.apache.drill.exec.store.dfs.DrillFileSystem;
import org.apache.drill.exec.store.dfs.FileSelection;
import org.apache.drill.exec.store.dfs.FileSystemConfig;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasyGroupScan;
import org.apache.drill.exec.store.dfs.easy.EasyWriter;
import org.apache.drill.exec.store.dfs.easy.FileWork;
import org.apache.drill.exec.store.easy.text.compliant.CompliantTextRecordReader;
import org.apache.drill.exec.store.easy.text.compliant.TextParsingSettings;
import org.apache.drill.exec.store.schedule.CompleteFileWork;
import org.apache.drill.exec.store.text.DrillTextRecordReader;
import org.apache.drill.exec.store.text.DrillTextRecordWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;

public class TextFormatPlugin extends EasyFormatPlugin<TextFormatPlugin.TextFormatConfig> {
    private final static String DEFAULT_NAME = "text";

    public TextFormatPlugin(String name, DrillbitContext context, Configuration fsConf,
            StoragePluginConfig storageConfig) {
        super(name, context, fsConf, storageConfig, new TextFormatConfig(), true, false, true, true,
                Collections.<String>emptyList(), DEFAULT_NAME);
    }

    public TextFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig config,
            TextFormatConfig formatPluginConfig) {
        super(name, context, fsConf, config, formatPluginConfig, true, false, true, true,
                formatPluginConfig.getExtensions(), DEFAULT_NAME);
    }

    @Override
    public RecordReader getRecordReader(FragmentContext context, DrillFileSystem dfs, FileWork fileWork,
            List<SchemaPath> columns, String userName) throws ExecutionSetupException {
        Path path = dfs.makeQualified(new Path(fileWork.getPath()));
        FileSplit split = new FileSplit(path, fileWork.getStart(), fileWork.getLength(), new String[] { "" });

        if (context.getOptions().getOption(ExecConstants.ENABLE_NEW_TEXT_READER_KEY).bool_val == true) {
            TextParsingSettings settings = new TextParsingSettings();
            settings.set((TextFormatConfig) formatConfig);
            return new CompliantTextRecordReader(split, dfs, context, settings, columns);
        } else {
            char delim = ((TextFormatConfig) formatConfig).getFieldDelimiter();
            return new DrillTextRecordReader(split, dfs.getConf(), context, delim, columns);
        }
    }

    @Override
    public AbstractGroupScan getGroupScan(String userName, FileSelection selection, List<SchemaPath> columns)
            throws IOException {
        return new EasyGroupScan(userName, selection, this, columns, selection.selectionRoot);
    }

    @Override
    protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) {
        long data = 0;
        for (final CompleteFileWork work : scan.getWorkIterable()) {
            data += work.getTotalBytes();
        }
        final double estimatedRowSize = settings.getOptions().getOption(ExecConstants.TEXT_ESTIMATED_ROW_SIZE);
        final double estRowCount = data / estimatedRowSize;
        return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, (long) estRowCount, 1, data);
    }

    @Override
    public RecordWriter getRecordWriter(final FragmentContext context, final EasyWriter writer) throws IOException {
        final Map<String, String> options = Maps.newHashMap();

        options.put("location", writer.getLocation());

        FragmentHandle handle = context.getHandle();
        String fragmentId = String.format("%d_%d", handle.getMajorFragmentId(), handle.getMinorFragmentId());
        options.put("prefix", fragmentId);

        options.put("separator", ((TextFormatConfig) getConfig()).getFieldDelimiterAsString());
        options.put(FileSystem.FS_DEFAULT_NAME_KEY, ((FileSystemConfig) writer.getStorageConfig()).connection);

        options.put("extension", ((TextFormatConfig) getConfig()).getExtensions().get(0));

        RecordWriter recordWriter = new DrillTextRecordWriter(context.getAllocator());
        recordWriter.init(options);

        return recordWriter;
    }

    @JsonTypeName("text")
    @JsonInclude(Include.NON_DEFAULT)
    public static class TextFormatConfig implements FormatPluginConfig {

        public List<String> extensions = ImmutableList.of();
        public String lineDelimiter = "\n";
        public char fieldDelimiter = '\n';
        public char quote = '"';
        public char escape = '"';
        public char comment = '#';
        public boolean skipFirstLine = false;
        public boolean extractHeader = false;

        public List<String> getExtensions() {
            return extensions;
        }

        public char getQuote() {
            return quote;
        }

        public char getEscape() {
            return escape;
        }

        public char getComment() {
            return comment;
        }

        public String getLineDelimiter() {
            return lineDelimiter;
        }

        public char getFieldDelimiter() {
            return fieldDelimiter;
        }

        @JsonIgnore
        public boolean isHeaderExtractionEnabled() {
            return extractHeader;
        }

        @JsonIgnore
        public String getFieldDelimiterAsString() {
            return new String(new char[] { fieldDelimiter });
        }

        @Deprecated
        @JsonProperty("delimiter")
        public void setFieldDelimiter(char delimiter) {
            this.fieldDelimiter = delimiter;
        }

        public boolean isSkipFirstLine() {
            return skipFirstLine;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + comment;
            result = prime * result + escape;
            result = prime * result + ((extensions == null) ? 0 : extensions.hashCode());
            result = prime * result + fieldDelimiter;
            result = prime * result + ((lineDelimiter == null) ? 0 : lineDelimiter.hashCode());
            result = prime * result + quote;
            result = prime * result + (skipFirstLine ? 1231 : 1237);
            result = prime * result + (extractHeader ? 1231 : 1237);
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            TextFormatConfig other = (TextFormatConfig) obj;
            if (comment != other.comment) {
                return false;
            }
            if (escape != other.escape) {
                return false;
            }
            if (extensions == null) {
                if (other.extensions != null) {
                    return false;
                }
            } else if (!extensions.equals(other.extensions)) {
                return false;
            }
            if (fieldDelimiter != other.fieldDelimiter) {
                return false;
            }
            if (lineDelimiter == null) {
                if (other.lineDelimiter != null) {
                    return false;
                }
            } else if (!lineDelimiter.equals(other.lineDelimiter)) {
                return false;
            }
            if (quote != other.quote) {
                return false;
            }
            if (skipFirstLine != other.skipFirstLine) {
                return false;
            }
            if (extractHeader != other.extractHeader) {
                return false;
            }
            return true;
        }

    }

    @Override
    public int getReaderOperatorType() {
        return CoreOperatorType.TEXT_SUB_SCAN_VALUE;
    }

    @Override
    public int getWriterOperatorType() {
        return CoreOperatorType.TEXT_WRITER_VALUE;
    }

    @Override
    public boolean supportsPushDown() {
        return true;
    }

}