org.apache.crunch.kafka.offset.hdfs.HDFSOffsetReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.crunch.kafka.offset.hdfs.HDFSOffsetReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.crunch.kafka.offset.hdfs;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.crunch.kafka.offset.AbstractOffsetReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * Reader implementation that reads offset information from HDFS.
 */
public class HDFSOffsetReader extends AbstractOffsetReader {

    private static final Logger LOG = LoggerFactory.getLogger(HDFSOffsetReader.class);

    private final Configuration config;
    private final Path baseOffsetStoragePath;
    private static final ObjectMapper MAPPER = new ObjectMapper();

    /**
     * Creates a reader instance for interacting with the storage specified by the {@code config} and with
     * the base storage path of {@code baseStoragePath}.
     *
     * @param config                the config for interacting with the underlying data store.
     * @param baseOffsetStoragePath the base storage path for offset information.  If the path does not exist it will
     *                              be created.
     * @throws IllegalArgumentException if either argument is {@code null}.
     */
    public HDFSOffsetReader(Configuration config, Path baseOffsetStoragePath) {
        if (config == null) {
            throw new IllegalArgumentException("The 'config' cannot be 'null'.");
        }
        if (baseOffsetStoragePath == null) {
            throw new IllegalArgumentException("The 'baseOffsetStoragePath' cannot be 'null'.");
        }
        this.config = config;
        this.baseOffsetStoragePath = baseOffsetStoragePath;
    }

    @Override
    public Map<TopicPartition, Long> readLatestOffsets() throws IOException {
        List<Long> storedOffsetPersistenceTimes = getStoredOffsetPersistenceTimes(true);
        if (storedOffsetPersistenceTimes.isEmpty()) {
            return Collections.emptyMap();
        }

        long persistedTime = storedOffsetPersistenceTimes.get(0);

        Map<TopicPartition, Long> offsets = readOffsets(persistedTime);

        return offsets == null ? Collections.<TopicPartition, Long>emptyMap() : offsets;
    }

    @Override
    public Map<TopicPartition, Long> readOffsets(long persistedOffsetTime) throws IOException {
        Path offsetFilePath = HDFSOffsetWriter.getPersistedTimeStoragePath(baseOffsetStoragePath,
                persistedOffsetTime);

        FileSystem fs = getFileSystem();
        if (fs.isFile(offsetFilePath)) {
            InputStream inputStream = fs.open(offsetFilePath);
            try {
                Offsets offsets = MAPPER.readValue(inputStream, Offsets.class);
                Map<TopicPartition, Long> partitionsMap = new HashMap<>();
                for (Offsets.PartitionOffset partitionOffset : offsets.getOffsets()) {
                    partitionsMap.put(
                            new TopicPartition(partitionOffset.getTopic(), partitionOffset.getPartition()),
                            partitionOffset.getOffset());
                }
                return partitionsMap;
            } finally {
                inputStream.close();
            }
        }

        LOG.error("Offset file at {} is not a file or does not exist.", offsetFilePath);
        return null;
    }

    @Override
    public List<Long> getStoredOffsetPersistenceTimes() throws IOException {
        return getStoredOffsetPersistenceTimes(false);
    }

    private List<Long> getStoredOffsetPersistenceTimes(boolean newestFirst) throws IOException {
        List<Long> persistedTimes = new LinkedList<>();
        FileSystem fs = getFileSystem();
        try {
            FileStatus[] fileStatuses = fs.listStatus(baseOffsetStoragePath);
            for (FileStatus status : fileStatuses) {
                if (status.isFile()) {
                    String fileName = status.getPath().getName();
                    try {
                        persistedTimes.add(HDFSOffsetWriter.fileNameToPersistenceTime(fileName));
                    } catch (IllegalArgumentException iae) {
                        LOG.info("Skipping file {} due to filename not being of the correct format.",
                                status.getPath(), iae);
                    }
                } else {
                    LOG.info("Skippping {} because it is not a file.", status.getPath());
                }
            }
        } catch (FileNotFoundException fnfe) {
            LOG.error("Unable to retrieve prior offsets.", fnfe);
        }

        //natural order should put oldest (smallest long) first. This will put newest first.
        if (newestFirst) {
            Collections.sort(persistedTimes, Collections.reverseOrder());
        } else {
            Collections.sort(persistedTimes);
        }
        return Collections.unmodifiableList(persistedTimes);
    }

    @Override
    public void close() throws IOException {

    }

    /**
     * Returns the {@link FileSystem} instance for writing data.  Callers are not responsible for closing the instance.
     *
     * @return the {@link FileSystem} instance for writing data.
     * @throws IOException error retrieving underlying file system.
     */
    protected FileSystem getFileSystem() throws IOException {
        return FileSystem.get(config);
    }
}