com.edwardsit.spark4n6.EWFImageInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.edwardsit.spark4n6.EWFImageInputFormat.java

Source

/*
 * Copyright 2015-2016 Derek Edwards
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.edwardsit.spark4n6;

import edu.nps.jlibewf.EWFFileReader;
import edu.nps.jlibewf.EWFSection;
import edu.nps.jlibewf.EWFSegmentFileReader;
import org.apache.hadoop.fs.*;
import org.apache.log4j.*;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
* Created by Derek on 9/22/2014.
*/
public class EWFImageInputFormat extends FileInputFormat<BytesWritable, BytesWritable> {
    private static Logger log = Logger.getLogger(EWFImageInputFormat.class);
    private long chunkSize = 0L;

    public EWFImageInputFormat() {
    }

    @Override
    public RecordReader<BytesWritable, BytesWritable> createRecordReader(InputSplit split,
            TaskAttemptContext context) throws IOException, InterruptedException {
        return new EWFRecordReader();
    }

    @Override
    protected boolean isSplitable(JobContext context, Path filename) {
        return true;
    }

    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        log.setLevel(Level.DEBUG);
        List<InputSplit> splits = new ArrayList<InputSplit>();
        List<FileStatus> files = listStatus(job);
        BlockLocation[] blkLocations = null;
        Path path = null;
        FileSystem fs = null;
        EWFFileReader ewf = null;
        ArrayList<EWFSection.SectionPrefix> sections = null;
        Iterator<EWFSection.SectionPrefix> it = null;
        EWFSection.SectionPrefix sp = null;
        Path priorFile = null;
        long priorOffset = 0L;
        FileStatus priorFileStatus = null;
        chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
        long priorStart = 0L;
        int blkIndex = 0;
        for (FileStatus file : files) {
            path = file.getPath();
            fs = path.getFileSystem(job.getConfiguration());
            if (path.getName().endsWith(".E01")) {

                ewf = new EWFFileReader(fs, path);
                sections = ewf.getSectionPrefixArray();
                it = sections.iterator();
                while (it.hasNext()) {
                    sp = it.next();
                    if (sp.sectionType.equals(EWFSection.SectionType.TABLE_TYPE)) {
                        priorFileStatus = fs.getFileStatus(priorFile);
                        for (long i = sp.chunkCount; i > 0L; i = i - getChunksPerSplit(priorFileStatus)) {
                            if (priorFileStatus instanceof LocatedFileStatus) {
                                blkLocations = ((LocatedFileStatus) priorFileStatus).getBlockLocations();
                            } else {
                                blkLocations = fs.getFileBlockLocations(priorFileStatus, priorOffset,
                                        (getChunksPerSplit(priorFileStatus) * chunkSize));
                            }
                            blkIndex = getBlockIndex(blkLocations, priorOffset);
                            if (i > getChunksPerSplit(priorFileStatus)) {
                                log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                        + ", " + (getChunksPerSplit(priorFileStatus) * chunkSize) + ", "
                                        + listHosts(blkLocations, blkIndex) + ");");
                                splits.add(makeSplit(priorFile, (priorStart * chunkSize),
                                        (getChunksPerSplit(priorFileStatus) * chunkSize),
                                        blkLocations[blkIndex].getHosts()));
                                priorStart += getChunksPerSplit(priorFileStatus);
                            } else {
                                log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                        + ", " + (i * chunkSize) + ", " + listHosts(blkLocations, blkIndex) + ");");
                                splits.add(makeSplit(priorFile, (priorStart * chunkSize), (i * chunkSize),
                                        blkLocations[blkIndex].getHosts()));
                                priorStart += i;
                            }
                        }
                    }
                    priorFile = sp.file;
                    priorOffset = sp.fileOffset;
                }
            }
        }
        return splits;
    }

    protected String listHosts(BlockLocation[] blkLocations, int blkIndex) throws IOException {
        StringBuffer hosts = new StringBuffer();
        hosts.append("[");
        for (String host : blkLocations[blkIndex].getHosts()) {
            hosts.append(host).append(" ");
        }
        hosts.append("]");
        return hosts.toString();
    }

    protected long getChunksPerSplit(FileStatus file) {
        return file.getBlockSize() / chunkSize;
    }
}