com.splicemachine.derby.stream.control.ControlDataSetProcessor.java Source code

Java tutorial

Introduction

Here is the source code for com.splicemachine.derby.stream.control.ControlDataSetProcessor.java

Source

/*
 * Copyright 2012 - 2016 Splice Machine, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */

package com.splicemachine.derby.stream.control;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Collections;
import java.util.Iterator;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;

import com.splicemachine.EngineDriver;
import com.splicemachine.db.iapi.store.access.Qualifier;
import com.splicemachine.db.iapi.types.DataValueDescriptor;
import com.splicemachine.derby.stream.iapi.*;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.commons.collections.iterators.SingletonIterator;
import org.apache.log4j.Logger;
import org.spark_project.guava.base.Charsets;
import scala.Tuple2;
import com.splicemachine.access.api.DistributedFileSystem;
import com.splicemachine.db.iapi.error.StandardException;
import com.splicemachine.db.iapi.sql.Activation;
import com.splicemachine.db.iapi.sql.execute.ExecRow;
import com.splicemachine.derby.iapi.sql.execute.SpliceOperation;
import com.splicemachine.derby.impl.sql.execute.operations.LocatedRow;
import com.splicemachine.derby.impl.sql.execute.operations.scanner.TableScannerBuilder;
import com.splicemachine.derby.stream.function.Partitioner;
import com.splicemachine.derby.stream.iterator.TableScannerIterator;
import com.splicemachine.pipeline.Exceptions;
import com.splicemachine.si.api.data.TxnOperationFactory;
import com.splicemachine.si.api.server.Transactor;
import com.splicemachine.si.api.txn.TxnSupplier;
import com.splicemachine.si.impl.TxnRegion;
import com.splicemachine.si.impl.driver.SIDriver;
import com.splicemachine.si.impl.readresolve.NoOpReadResolver;
import com.splicemachine.si.impl.rollforward.NoopRollForward;
import com.splicemachine.storage.Partition;

/**
 * Local control side DataSetProcessor.
 *
 * @author jleach
 */
public class ControlDataSetProcessor implements DataSetProcessor {
    private long badRecordThreshold = -1;
    private boolean permissive;
    private String statusDirectory;
    private String importFileName;

    private static final Logger LOG = Logger.getLogger(ControlDataSetProcessor.class);

    protected final TxnSupplier txnSupplier;
    protected final Transactor transactory;
    protected final TxnOperationFactory txnOperationFactory;

    public ControlDataSetProcessor(TxnSupplier txnSupplier, Transactor transactory,
            TxnOperationFactory txnOperationFactory) {
        this.txnSupplier = txnSupplier;
        this.transactory = transactory;
        this.txnOperationFactory = txnOperationFactory;
    }

    @Override
    public Type getType() {
        return Type.LOCAL;
    }

    public static final Partitioner NOOP_PARTITIONER = new Partitioner() {
        @Override
        public void initialize() {
        }

        @Override
        public int numPartitions() {
            return 0;
        }

        @Override
        public int getPartition(Object o) {
            return 0;
        }
    };

    @Override
    @SuppressFBWarnings(value = "SE_NO_SUITABLE_CONSTRUCTOR_FOR_EXTERNALIZATION", justification = "Serialization"
            + "of this is a mistake for control-side operations")
    public <Op extends SpliceOperation, V> ScanSetBuilder<V> newScanSet(final Op spliceOperation,
            final String tableName) throws StandardException {
        return new TableScannerBuilder<V>() {
            @Override
            public DataSet<V> buildDataSet() throws StandardException {
                Partition p;
                try {
                    p = SIDriver.driver().getTableFactory().getTable(tableName);
                    TxnRegion localRegion = new TxnRegion(p, NoopRollForward.INSTANCE, NoOpReadResolver.INSTANCE,
                            txnSupplier, transactory, txnOperationFactory);

                    this.region(localRegion).scanner(p.openScanner(getScan(), metricFactory)); //set the scanner
                    TableScannerIterator tableScannerIterator = new TableScannerIterator(this, spliceOperation);
                    if (spliceOperation != null) {
                        spliceOperation.registerCloseable(tableScannerIterator);
                        spliceOperation.registerCloseable(p);
                    }
                    return new ControlDataSet(tableScannerIterator);
                } catch (IOException e) {
                    throw Exceptions.parseException(e);
                }
            }
        };
    }

    @Override
    public <V> DataSet<V> getEmpty() {
        return new ControlDataSet<>(Collections.<V>emptyList().iterator());
    }

    @Override
    public <V> DataSet<V> getEmpty(String name) {
        return getEmpty();
    }

    @Override
    public <V> DataSet<V> singleRowDataSet(V value) {
        return new ControlDataSet<>(new SingletonIterator(value));
    }

    @Override
    public <V> DataSet<V> singleRowDataSet(V value, Object caller) {
        return singleRowDataSet(value);
    }

    @Override
    public <K, V> PairDataSet<K, V> singleRowPairDataSet(K key, V value) {
        return new ControlPairDataSet<>(new SingletonIterator(new Tuple2<>(key, value)));
    }

    @Override
    public <Op extends SpliceOperation> OperationContext<Op> createOperationContext(Op spliceOperation) {
        OperationContext<Op> operationContext = new ControlOperationContext<>(spliceOperation);
        spliceOperation.setOperationContext(operationContext);
        if (permissive) {
            operationContext.setPermissive(statusDirectory, importFileName, badRecordThreshold);
        }
        return operationContext;
    }

    @Override
    public <Op extends SpliceOperation> OperationContext<Op> createOperationContext(Activation activation) {
        return new ControlOperationContext<>(null);
    }

    @Override
    public void setJobGroup(String jobName, String jobDescription) {
    }

    @Override
    public PairDataSet<String, InputStream> readWholeTextFile(String s, SpliceOperation op) {
        try {
            InputStream is = getFileStream(s);
            return singleRowPairDataSet(s, is);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public PairDataSet<String, InputStream> readWholeTextFile(String s) {
        return readWholeTextFile(s, null);
    }

    @Override
    public DataSet<String> readTextFile(final String s) {
        try {
            InputStream is = getFileStream(s);
            return new ControlDataSet<>(new TextFileIterator(is));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public DataSet<String> readTextFile(String s, SpliceOperation op) {
        return readTextFile(s);
    }

    @Override
    public <K, V> PairDataSet<K, V> getEmptyPair() {
        return new ControlPairDataSet<>(Collections.<Tuple2<K, V>>emptyList().iterator());
    }

    @Override
    public <V> DataSet<V> createDataSet(Iterator<V> value) {
        return new ControlDataSet<>(value);
    }

    @Override
    public <V> DataSet<V> createDataSet(Iterator<V> value, String name) {
        return new ControlDataSet<>(value);
    }

    @Override
    public void setSchedulerPool(String pool) {
        // no op
    }

    private static class TextFileIterator implements Iterator<String> {

        Scanner scanner;

        public TextFileIterator(InputStream inputStream) {
            //-sf- adding UTF-8 charset here to avoid findbugs warning. If we stop using UTF-8, we might be in trouble
            this.scanner = new Scanner(inputStream, Charsets.UTF_8.name());
        }

        @Override
        public void remove() {
        }

        @Override
        public String next() {
            return scanner.nextLine();
        }

        @Override
        public boolean hasNext() {
            return scanner.hasNextLine();
        }

    }

    @Override
    public void setPermissive(String statusDirectory, String importFileName, long badRecordThreshold) {
        this.permissive = true;
        this.statusDirectory = statusDirectory;
        this.importFileName = importFileName;
        this.badRecordThreshold = badRecordThreshold;
    }

    @Override
    public void clearBroadcastedOperation() {
        // do nothing
    }

    @Override
    public void stopJobGroup(String jobName) {
        // do nothing
    }

    @Override
    public Partitioner getPartitioner(DataSet<LocatedRow> dataSet, ExecRow template, int[] keyDecodingMap,
            boolean[] keyOrder, int[] rightHashKeys) {
        return NOOP_PARTITIONER;
    }

    /* ****************************************************************************************************************/
    /*private helper methods*/
    private InputStream newInputStream(DistributedFileSystem dfs, @Nonnull Path p, OpenOption... options)
            throws IOException {
        InputStream value = dfs.newInputStream(p, options);
        String s = p.getFileName().toString();
        assert s != null;
        if (s.endsWith("gz")) {
            //need to open up a decompressing inputStream
            value = new GZIPInputStream(value);
        }
        return value;
    }

    private InputStream getFileStream(String s) throws IOException {
        DistributedFileSystem dfs = SIDriver.driver().fileSystem();
        InputStream value;
        if (dfs.getInfo(s).isDirectory()) {
            //we need to open a Stream against each file in the directory
            InputStream inputStream = null;
            boolean sequenced = false;
            try (DirectoryStream<Path> stream = Files.newDirectoryStream(dfs.getPath(s))) {
                for (Path p : stream) {
                    if (inputStream == null) {
                        inputStream = newInputStream(dfs, p, StandardOpenOption.READ);
                    } else {
                        inputStream = new SequenceInputStream(inputStream,
                                newInputStream(dfs, p, StandardOpenOption.READ));
                    }
                }
            }
            value = inputStream;
        } else {
            value = newInputStream(dfs, dfs.getPath(s), StandardOpenOption.READ);
        }
        return value;
    }

    @Override
    public <V> DataSet<V> readParquetFile(int[] baseColumnMap, String location, OperationContext context,
            Qualifier[][] qualifiers, DataValueDescriptor probeValue, ExecRow execRow) throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        return new ControlDataSet(
                proc.readParquetFile(baseColumnMap, location, context, qualifiers, probeValue, execRow)
                        .toLocalIterator());
    }

    @Override
    public <V> DataSet<V> readORCFile(int[] baseColumnMap, String location, OperationContext context,
            Qualifier[][] qualifiers, DataValueDescriptor probeValue, ExecRow execRow) throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        return new ControlDataSet(proc
                .readORCFile(baseColumnMap, location, context, qualifiers, probeValue, execRow).toLocalIterator());
    }

    @Override
    public <V> DataSet<LocatedRow> readTextFile(SpliceOperation op, String location, String characterDelimiter,
            String columnDelimiter, int[] baseColumnMap, OperationContext context, ExecRow execRow)
            throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        return new ControlDataSet(proc
                .readTextFile(op, location, characterDelimiter, columnDelimiter, baseColumnMap, context, execRow)
                .toLocalIterator());
    }

    @Override
    public <V> DataSet<V> readPinnedTable(long conglomerateId, int[] baseColumnMap, String location,
            OperationContext context, Qualifier[][] qualifiers, DataValueDescriptor probeValue, ExecRow execRow)
            throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        return new ControlDataSet(proc
                .readPinnedTable(conglomerateId, baseColumnMap, location, context, qualifiers, probeValue, execRow)
                .toLocalIterator());
    }

    @Override
    public void dropPinnedTable(long conglomerateId) throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        proc.dropPinnedTable(conglomerateId);
    }

    @Override
    public void createEmptyExternalFile(ExecRow execRow, int[] baseColumnMap, int[] partitionBy, String storageAs,
            String location) throws StandardException {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        proc.createEmptyExternalFile(execRow, baseColumnMap, partitionBy, storageAs, location);
    }

    @Override
    public void refreshTable(String location) {
        DistributedDataSetProcessor proc = EngineDriver.driver().processorFactory().distributedProcessor();
        proc.refreshTable(location);
    }
}