org.apache.accumulo.tserver.tablet.Compactor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.tserver.tablet.Compactor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.tserver.tablet;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.data.impl.KeyExtent;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.file.FileSKVWriter;
import org.apache.accumulo.core.iterators.IteratorUtil;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
import org.apache.accumulo.core.iterators.system.DeletingIterator;
import org.apache.accumulo.core.iterators.system.MultiIterator;
import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import org.apache.accumulo.core.trace.Span;
import org.apache.accumulo.core.trace.Trace;
import org.apache.accumulo.core.util.LocalityGroupUtil;
import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
import org.apache.accumulo.core.util.ratelimit.RateLimiter;
import org.apache.accumulo.server.AccumuloServerContext;
import org.apache.accumulo.server.fs.FileRef;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.accumulo.server.problems.ProblemReport;
import org.apache.accumulo.server.problems.ProblemReportingIterator;
import org.apache.accumulo.server.problems.ProblemReports;
import org.apache.accumulo.server.problems.ProblemType;
import org.apache.accumulo.tserver.InMemoryMap;
import org.apache.accumulo.tserver.MinorCompactionReason;
import org.apache.accumulo.tserver.TabletIteratorEnvironment;
import org.apache.accumulo.tserver.compaction.MajorCompactionReason;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Compactor implements Callable<CompactionStats> {
    private static final Logger log = LoggerFactory.getLogger(Compactor.class);
    private static final AtomicLong nextCompactorID = new AtomicLong(0);

    public static class CompactionCanceledException extends Exception {
        private static final long serialVersionUID = 1L;
    }

    public interface CompactionEnv {

        boolean isCompactionEnabled();

        IteratorScope getIteratorScope();

        RateLimiter getReadLimiter();

        RateLimiter getWriteLimiter();
    }

    private final Map<FileRef, DataFileValue> filesToCompact;
    private final InMemoryMap imm;
    private final FileRef outputFile;
    private final boolean propogateDeletes;
    private final AccumuloConfiguration acuTableConf;
    private final CompactionEnv env;
    private final VolumeManager fs;
    protected final KeyExtent extent;
    private final List<IteratorSetting> iterators;

    // things to report
    private String currentLocalityGroup = "";
    private final long startTime;

    private int reason;

    private final AtomicLong entriesRead = new AtomicLong(0);
    private final AtomicLong entriesWritten = new AtomicLong(0);
    private final DateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS");

    // a unique id to identify a compactor
    private final long compactorID = nextCompactorID.getAndIncrement();
    protected volatile Thread thread;
    private final AccumuloServerContext context;

    public long getCompactorID() {
        return compactorID;
    }

    private synchronized void setLocalityGroup(String name) {
        this.currentLocalityGroup = name;
    }

    public synchronized String getCurrentLocalityGroup() {
        return currentLocalityGroup;
    }

    private void clearStats() {
        entriesRead.set(0);
        entriesWritten.set(0);
    }

    protected static final Set<Compactor> runningCompactions = Collections
            .synchronizedSet(new HashSet<Compactor>());

    public static List<CompactionInfo> getRunningCompactions() {
        ArrayList<CompactionInfo> compactions = new ArrayList<>();

        synchronized (runningCompactions) {
            for (Compactor compactor : runningCompactions) {
                compactions.add(new CompactionInfo(compactor));
            }
        }

        return compactions;
    }

    public Compactor(AccumuloServerContext context, Tablet tablet, Map<FileRef, DataFileValue> files,
            InMemoryMap imm, FileRef outputFile, boolean propogateDeletes, CompactionEnv env,
            List<IteratorSetting> iterators, int reason, AccumuloConfiguration tableConfiguation) {
        this.context = context;
        this.extent = tablet.getExtent();
        this.fs = tablet.getTabletServer().getFileSystem();
        this.acuTableConf = tableConfiguation;
        this.filesToCompact = files;
        this.imm = imm;
        this.outputFile = outputFile;
        this.propogateDeletes = propogateDeletes;
        this.env = env;
        this.iterators = iterators;
        this.reason = reason;

        startTime = System.currentTimeMillis();
    }

    public VolumeManager getFileSystem() {
        return fs;
    }

    KeyExtent getExtent() {
        return extent;
    }

    String getOutputFile() {
        return outputFile.toString();
    }

    MajorCompactionReason getMajorCompactionReason() {
        return MajorCompactionReason.values()[reason];
    }

    @Override
    public CompactionStats call() throws IOException, CompactionCanceledException {

        FileSKVWriter mfw = null;

        CompactionStats majCStats = new CompactionStats();

        boolean remove = runningCompactions.add(this);

        clearStats();

        final Path outputFilePath = outputFile.path();
        final String outputFilePathName = outputFilePath.toString();
        String oldThreadName = Thread.currentThread().getName();
        String newThreadName = "MajC compacting " + extent.toString() + " started "
                + dateFormatter.format(new Date()) + " file: " + outputFile;
        Thread.currentThread().setName(newThreadName);
        thread = Thread.currentThread();
        try {
            FileOperations fileFactory = FileOperations.getInstance();
            FileSystem ns = this.fs.getVolumeByPath(outputFilePath).getFileSystem();
            mfw = fileFactory.newWriterBuilder().forFile(outputFilePathName, ns, ns.getConf())
                    .withTableConfiguration(acuTableConf).withRateLimiter(env.getWriteLimiter()).build();

            Map<String, Set<ByteSequence>> lGroups;
            try {
                lGroups = LocalityGroupUtil.getLocalityGroups(acuTableConf);
            } catch (LocalityGroupConfigurationError e) {
                throw new IOException(e);
            }

            long t1 = System.currentTimeMillis();

            HashSet<ByteSequence> allColumnFamilies = new HashSet<>();

            if (mfw.supportsLocalityGroups()) {
                for (Entry<String, Set<ByteSequence>> entry : lGroups.entrySet()) {
                    setLocalityGroup(entry.getKey());
                    compactLocalityGroup(entry.getKey(), entry.getValue(), true, mfw, majCStats);
                    allColumnFamilies.addAll(entry.getValue());
                }
            }

            setLocalityGroup("");
            compactLocalityGroup(null, allColumnFamilies, false, mfw, majCStats);

            long t2 = System.currentTimeMillis();

            FileSKVWriter mfwTmp = mfw;
            mfw = null; // set this to null so we do not try to close it again in finally if the close fails
            try {
                mfwTmp.close(); // if the close fails it will cause the compaction to fail
            } catch (IOException ex) {
                if (!fs.deleteRecursively(outputFile.path())) {
                    if (fs.exists(outputFile.path())) {
                        log.error("Unable to delete " + outputFile);
                    }
                }
                throw ex;
            }

            log.debug(String.format(
                    "Compaction %s %,d read | %,d written | %,6d entries/sec | %,6.3f secs | %,12d bytes | %9.3f byte/sec",
                    extent, majCStats.getEntriesRead(), majCStats.getEntriesWritten(),
                    (int) (majCStats.getEntriesRead() / ((t2 - t1) / 1000.0)), (t2 - t1) / 1000.0,
                    mfwTmp.getLength(), mfwTmp.getLength() / ((t2 - t1) / 1000.0)));

            majCStats.setFileSize(mfwTmp.getLength());
            return majCStats;
        } catch (IOException e) {
            log.error("{}", e.getMessage(), e);
            throw e;
        } catch (RuntimeException e) {
            log.error("{}", e.getMessage(), e);
            throw e;
        } finally {
            Thread.currentThread().setName(oldThreadName);
            if (remove) {
                thread = null;
                runningCompactions.remove(this);
            }

            try {
                if (mfw != null) {
                    // compaction must not have finished successfully, so close its output file
                    try {
                        mfw.close();
                    } finally {
                        if (!fs.deleteRecursively(outputFile.path()))
                            if (fs.exists(outputFile.path()))
                                log.error("Unable to delete " + outputFile);
                    }
                }
            } catch (IOException e) {
                log.warn("{}", e.getMessage(), e);
            } catch (RuntimeException exception) {
                log.warn("{}", exception.getMessage(), exception);
            }
        }
    }

    private List<SortedKeyValueIterator<Key, Value>> openMapDataFiles(String lgName,
            ArrayList<FileSKVIterator> readers) throws IOException {

        List<SortedKeyValueIterator<Key, Value>> iters = new ArrayList<>(filesToCompact.size());

        for (FileRef mapFile : filesToCompact.keySet()) {
            try {

                FileOperations fileFactory = FileOperations.getInstance();
                FileSystem fs = this.fs.getVolumeByPath(mapFile.path()).getFileSystem();
                FileSKVIterator reader;

                reader = fileFactory.newReaderBuilder().forFile(mapFile.path().toString(), fs, fs.getConf())
                        .withTableConfiguration(acuTableConf).withRateLimiter(env.getReadLimiter()).build();

                readers.add(reader);

                SortedKeyValueIterator<Key, Value> iter = new ProblemReportingIterator(context, extent.getTableId(),
                        mapFile.path().toString(), false, reader);

                if (filesToCompact.get(mapFile).isTimeSet()) {
                    iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime());
                }

                iters.add(iter);

            } catch (Throwable e) {

                ProblemReports.getInstance(context).report(new ProblemReport(extent.getTableId(),
                        ProblemType.FILE_READ, mapFile.path().toString(), e));

                log.warn("Some problem opening map file {} {}", mapFile, e.getMessage(), e);
                // failed to open some map file... close the ones that were opened
                for (FileSKVIterator reader : readers) {
                    try {
                        reader.close();
                    } catch (Throwable e2) {
                        log.warn("Failed to close map file", e2);
                    }
                }

                readers.clear();

                if (e instanceof IOException)
                    throw (IOException) e;
                throw new IOException("Failed to open map data files", e);
            }
        }

        return iters;
    }

    private void compactLocalityGroup(String lgName, Set<ByteSequence> columnFamilies, boolean inclusive,
            FileSKVWriter mfw, CompactionStats majCStats) throws IOException, CompactionCanceledException {
        ArrayList<FileSKVIterator> readers = new ArrayList<>(filesToCompact.size());
        Span span = Trace.start("compact");
        try {
            long entriesCompacted = 0;
            List<SortedKeyValueIterator<Key, Value>> iters = openMapDataFiles(lgName, readers);

            if (imm != null) {
                iters.add(imm.compactionIterator());
            }

            CountingIterator citr = new CountingIterator(new MultiIterator(iters, extent.toDataRange()),
                    entriesRead);
            DeletingIterator delIter = new DeletingIterator(citr, propogateDeletes);
            ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);

            // if(env.getIteratorScope() )

            TabletIteratorEnvironment iterEnv;
            if (env.getIteratorScope() == IteratorScope.majc)
                iterEnv = new TabletIteratorEnvironment(IteratorScope.majc, !propogateDeletes, acuTableConf);
            else if (env.getIteratorScope() == IteratorScope.minc)
                iterEnv = new TabletIteratorEnvironment(IteratorScope.minc, acuTableConf);
            else
                throw new IllegalArgumentException();

            SortedKeyValueIterator<Key, Value> itr = iterEnv.getTopLevelIterator(IteratorUtil
                    .loadIterators(env.getIteratorScope(), cfsi, extent, acuTableConf, iterators, iterEnv));

            itr.seek(extent.toDataRange(), columnFamilies, inclusive);

            if (!inclusive) {
                mfw.startDefaultLocalityGroup();
            } else {
                mfw.startNewLocalityGroup(lgName, columnFamilies);
            }

            Span write = Trace.start("write");
            try {
                while (itr.hasTop() && env.isCompactionEnabled()) {
                    mfw.append(itr.getTopKey(), itr.getTopValue());
                    itr.next();
                    entriesCompacted++;

                    if (entriesCompacted % 1024 == 0) {
                        // Periodically update stats, do not want to do this too often since its volatile
                        entriesWritten.addAndGet(1024);
                    }
                }

                if (itr.hasTop() && !env.isCompactionEnabled()) {
                    // cancel major compaction operation
                    try {
                        try {
                            mfw.close();
                        } catch (IOException e) {
                            log.error("{}", e.getMessage(), e);
                        }
                        fs.deleteRecursively(outputFile.path());
                    } catch (Exception e) {
                        log.warn("Failed to delete Canceled compaction output file " + outputFile, e);
                    }
                    throw new CompactionCanceledException();
                }

            } finally {
                CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted);
                majCStats.add(lgMajcStats);
                write.stop();
            }

        } finally {
            // close sequence files opened
            for (FileSKVIterator reader : readers) {
                try {
                    reader.close();
                } catch (Throwable e) {
                    log.warn("Failed to close map file", e);
                }
            }
            span.stop();
        }
    }

    Collection<FileRef> getFilesToCompact() {
        return filesToCompact.keySet();
    }

    boolean hasIMM() {
        return imm != null;
    }

    boolean willPropogateDeletes() {
        return propogateDeletes;
    }

    long getEntriesRead() {
        return entriesRead.get();
    }

    long getEntriesWritten() {
        return entriesWritten.get();
    }

    long getStartTime() {
        return startTime;
    }

    Iterable<IteratorSetting> getIterators() {
        return this.iterators;
    }

    MinorCompactionReason getMinCReason() {
        return MinorCompactionReason.values()[reason];
    }

}