com.streamsets.pipeline.lib.io.FileContext.java Source code

Java tutorial

Introduction

Here is the source code for com.streamsets.pipeline.lib.io.FileContext.java

Source

/**
 * Copyright 2015 StreamSets Inc.
 *
 * Licensed under the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.pipeline.lib.io;

import com.streamsets.pipeline.api.impl.Utils;
import com.streamsets.pipeline.config.PostProcessingOptions;
import com.streamsets.pipeline.lib.parser.shaded.com.google.code.regexp.Pattern;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * The <code>FileContext</code> encapsulates all live information about a directory being scanned/read.
 */
public class FileContext {
    private static final Logger LOG = LoggerFactory.getLogger(FileContext.class);
    private final MultiFileInfo multiFileInfo;
    private final LiveDirectoryScanner scanner;
    private final Charset charset;
    private final int maxLineLength;
    private final PostProcessingOptions postProcessing;
    private final String archiveDir;
    private final FileEventPublisher eventPublisher;
    private LiveFile currentFile;
    private LiveFileReader reader;
    private LiveFile startingCurrentFileName;
    private long startingOffset;
    private RollMode rollMode;
    private final Path dir;
    private boolean open;

    public FileContext(MultiFileInfo multiFileInfo, Charset charset, int maxLineLength,
            PostProcessingOptions postProcessing, String archiveDir, FileEventPublisher eventPublisher)
            throws IOException {
        open = true;
        this.multiFileInfo = multiFileInfo;
        this.charset = charset;
        this.maxLineLength = maxLineLength;
        this.postProcessing = postProcessing;
        this.archiveDir = archiveDir;
        this.eventPublisher = eventPublisher;
        Path fullPath = Paths.get(multiFileInfo.getFileFullPath());
        dir = fullPath.getParent();
        Path name = fullPath.getFileName();
        rollMode = multiFileInfo.getFileRollMode().createRollMode(name.toString(), multiFileInfo.getPattern());
        scanner = new LiveDirectoryScanner(dir.toString(), multiFileInfo.getFirstFile(), getRollMode());
    }

    public String toString() {
        return Utils.format("FileContext[path={} rollMode={}]", multiFileInfo.getFileFullPath(), rollMode);
    }

    public long getPendingFiles() throws IOException {
        return scanner.getPendingFiles(currentFile);
    }

    public boolean hasReader() {
        Utils.checkState(open, "FileContext is closed");
        return reader != null;
    }

    // a file context is active while its parent directory exists.
    public boolean isActive() {
        return Files.exists(dir);
    }

    public void close() {
        if (open && reader != null) {
            open = false;
            try {
                reader.close();
            } catch (IOException ex) {
                LOG.warn("Could not close '{}' file property: {}", reader.getLiveFile(), ex.toString(), ex);
            } finally {
                reader = null;
            }
        }
    }

    // prepares and gets the reader if available before a read.
    public LiveFileReader getReader() throws IOException {
        Utils.checkState(open, "FileContext is closed");
        if (reader == null) {
            currentFile = getStartingCurrentFileName();
            long fileOffset = getStartingOffset();

            boolean needsToScan = currentFile == null || fileOffset == Long.MAX_VALUE;
            if (needsToScan) {
                if (currentFile != null) {
                    // we need to refresh the file in case the name changed before scanning as the scanner does not refresh
                    currentFile = currentFile.refresh();
                }
                currentFile = scanner.scan(currentFile);
                fileOffset = 0;
            }
            if (currentFile != null) {
                reader = new SingleLineLiveFileReader(getRollMode(), getMultiFileInfo().getTag(), currentFile,
                        charset, fileOffset, maxLineLength);
                if (!multiFileInfo.getMultiLineMainLinePatter().isEmpty()) {
                    reader = new MultiLineLiveFileReader(getMultiFileInfo().getTag(), reader,
                            Pattern.compile(multiFileInfo.getMultiLineMainLinePatter()));
                }
                if (fileOffset == 0) {
                    // file start event
                    eventPublisher.publish(new FileEvent(currentFile, FileEvent.Action.START));
                }
            }
        }
        return reader;
    }

    // updates reader and offsets after a read.
    public void releaseReader(boolean inErrorDiscardReader) throws IOException {
        Utils.checkState(open, "FileContext is closed");
        // update starting offsets for next invocation either cold (no reader) or hot (reader)
        boolean hasNext;
        try {
            hasNext = reader != null && reader.hasNext();
        } catch (IOException ex) {
            IOUtils.closeQuietly(reader);
            reader = null;
            hasNext = false;
        }
        boolean doneWithFile = !hasNext || inErrorDiscardReader;
        if (doneWithFile) {
            IOUtils.closeQuietly(reader);
            reader = null;
            // Using Long.MAX_VALUE to signal we reach the end of the file and next iteration should get the next file.
            setStartingCurrentFileName(currentFile);
            setStartingOffset(Long.MAX_VALUE);

            // If we failed to open the file in first place, it will be null and hence we won't do anything with it.
            if (currentFile == null) {
                return;
            }

            // File end event
            LiveFile file = currentFile.refresh();

            if (file == null) {
                return;
            }
            if (inErrorDiscardReader) {
                LOG.warn("Processing file '{}' produced an error, skipping '{}' post processing on that file", file,
                        postProcessing);
                eventPublisher.publish(new FileEvent(file, FileEvent.Action.ERROR));
            } else {
                eventPublisher.publish(new FileEvent(file, FileEvent.Action.END));
                switch (postProcessing) {
                case NONE:
                    LOG.debug("File '{}' processing completed, post processing action 'NONE'", file);
                    break;
                case DELETE:
                    try {
                        Files.delete(file.getPath());
                        LOG.debug("File '{}' processing completed, post processing action 'DELETED'", file);
                    } catch (IOException ex) {
                        throw new IOException(Utils.format("Could not delete '{}': {}", file, ex.toString()), ex);
                    }
                    break;
                case ARCHIVE:
                    Path fileArchive = Paths.get(archiveDir, file.getPath().toString());
                    if (fileArchive == null) {
                        throw new IOException("Could not find archive file");
                    }
                    try {
                        Files.createDirectories(fileArchive.getParent());
                        Files.move(file.getPath(), fileArchive);
                        LOG.debug("File '{}' processing completed, post processing action 'ARCHIVED' as", file);
                    } catch (IOException ex) {
                        throw new IOException(Utils.format("Could not archive '{}': {}", file, ex.toString()), ex);
                    }
                    break;
                }
            }
        } else {
            setStartingCurrentFileName(currentFile);
            setStartingOffset(getReader().getOffset());
        }
    }

    public MultiFileInfo getMultiFileInfo() {
        return multiFileInfo;
    }

    public LiveFile getStartingCurrentFileName() {
        Utils.checkState(open, "FileContext is closed");
        return startingCurrentFileName;
    }

    public long getStartingOffset() {
        Utils.checkState(open, "FileContext is closed");
        return startingOffset;
    }

    public RollMode getRollMode() {
        Utils.checkState(open, "FileContext is closed");
        return rollMode;
    }

    public void setStartingCurrentFileName(LiveFile startingCurrentFileName) {
        Utils.checkState(open, "FileContext is closed");
        this.startingCurrentFileName = startingCurrentFileName;
    }

    public void setStartingOffset(long startingOffset) {
        Utils.checkState(open, "FileContext is closed");
        this.startingOffset = startingOffset;
    }
}