nl.surfsara.newsreader.loader.ReadNewsreaderDocs.java Source code

Java tutorial

Introduction

Here is the source code for nl.surfsara.newsreader.loader.ReadNewsreaderDocs.java

Source

/**
 * Copyright 2014 SURFsara
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package nl.surfsara.newsreader.loader;

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.security.PrivilegedAction;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile.Reader.Option;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;

/**
 * Accessing HDFS needs to be performed with privileges for a principal (user)
 * enabled. This is an implementation of a PriviligedAction that, as the logged
 * in user, reads NAF documents from a sequence file and stores these on the
 * local file system.
 * 
 * @author mathijs.kattenberg@surfsara.nl
 */
public class ReadNewsreaderDocs implements PrivilegedAction<Long> {

    private Configuration conf;
    private String source;
    private String dest;

    public ReadNewsreaderDocs(Configuration conf, String source, String dest) {
        this.conf = conf;
        this.source = source;
        this.dest = dest;
    }

    @Override
    public Long run() {
        long numfilesread = 0;
        Path sPath = new Path(source);
        File destDir = new File(dest);
        if (destDir.isDirectory()) {
            destDir.mkdirs();
            try {
                FileSystem fileSystem = FileSystem.get(conf);
                FileStatus[] globStatus = fileSystem.globStatus(sPath);
                for (FileStatus fss : globStatus) {
                    if (fss.isFile()) {
                        Option optPath = SequenceFile.Reader.file(fss.getPath());
                        SequenceFile.Reader r = new SequenceFile.Reader(conf, optPath);

                        Text key = new Text();
                        Text val = new Text();

                        while (r.next(key, val)) {
                            File outputFile = new File(destDir, key.toString());
                            FileOutputStream fos = new FileOutputStream(outputFile);
                            InputStream is = IOUtils.toInputStream(val.toString());
                            IOUtils.copy(is, fos);
                            fos.flush();
                            fos.close();
                            numfilesread++;
                        }
                        r.close();
                    }

                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.out.println("Destination should be a directory.");
        }
        return numfilesread;
    }

}