org.apache.tika.batch.fs.FSOutputStreamFactory.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tika.batch.fs.FSOutputStreamFactory.java

Source

package org.apache.tika.batch.fs;
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.apache.tika.batch.OutputStreamFactory;
import org.apache.tika.metadata.Metadata;

public class FSOutputStreamFactory implements OutputStreamFactory {

    public enum COMPRESSION {
        NONE, BZIP2, GZIP, ZIP
    }

    private final FSUtil.HANDLE_EXISTING handleExisting;
    private final Path outputRoot;
    private final String suffix;
    private final COMPRESSION compression;

    /**
     *
     * @param outputRoot
     * @param handleExisting
     * @param compression
     * @param suffix
     * @see #FSOutputStreamFactory(Path, FSUtil.HANDLE_EXISTING, COMPRESSION, String)
     */
    @Deprecated
    public FSOutputStreamFactory(File outputRoot, FSUtil.HANDLE_EXISTING handleExisting, COMPRESSION compression,
            String suffix) {
        this(Paths.get(outputRoot.toURI()), handleExisting, compression, suffix);
    }

    public FSOutputStreamFactory(Path outputRoot, FSUtil.HANDLE_EXISTING handleExisting, COMPRESSION compression,
            String suffix) {
        this.handleExisting = handleExisting;
        this.outputRoot = outputRoot;
        this.suffix = suffix;
        this.compression = compression;
    }

    /**
     * This tries to create a file based on the {@link org.apache.tika.batch.fs.FSUtil.HANDLE_EXISTING}
     * value that was passed in during initialization.
     * <p>
     * If {@link #handleExisting} is set to "SKIP" and the output file already exists,
     * this will return null.
     * <p>
     * If an output file can be found, this will try to mkdirs for that output file.
     * If mkdirs() fails, this will throw an IOException.
     * <p>
     * Finally, this will open an output stream for the appropriate output file.
     * @param metadata must have a value set for FSMetadataProperties.FS_ABSOLUTE_PATH or
     *                 else NullPointerException will be thrown!
     * @return OutputStream
     * @throws java.io.IOException, NullPointerException
     */
    @Override
    public OutputStream getOutputStream(Metadata metadata) throws IOException {
        String initialRelativePath = metadata.get(FSProperties.FS_REL_PATH);
        Path outputPath = FSUtil.getOutputPath(outputRoot, initialRelativePath, handleExisting, suffix);
        if (outputPath == null) {
            return null;
        }
        if (!Files.isDirectory(outputPath.getParent())) {
            Files.createDirectories(outputPath.getParent());
            //TODO: shouldn't need this any more in java 7, right?
            if (!Files.isDirectory(outputPath.getParent())) {
                throw new IOException("Couldn't create parent directory for:" + outputPath.toAbsolutePath());
            }
        }

        OutputStream os = Files.newOutputStream(outputPath);
        switch (compression) {
        case BZIP2:
            os = new BZip2CompressorOutputStream(os);
            break;
        case GZIP:
            os = new GZIPOutputStream(os);
            break;
        case ZIP:
            os = new ZipArchiveOutputStream(os);
            break;
        }
        return new BufferedOutputStream(os);
    }
}