org.apache.mahout.classifier.svm.datastore.HDFSReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.classifier.svm.datastore.HDFSReader.java

Source

/*
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  under the License.
 */
package org.apache.mahout.classifier.svm.datastore;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 
 */
public class HDFSReader {

    private static final Logger log = LoggerFactory.getLogger(HDFSReader.class);
    private Configuration conf;

    // public static void main(String[] args) throws IOException {
    //
    // HDFSReader rd = new HDFSReader();
    // String hostName = "hdfs://localhost:12009";
    // rd.setServerAddress(hostName);
    // // rd.setServerUserPasswd("maximzhao,zhaozd1@");
    // Path file = new Path("/user/maximzhao/pokerovo/part-r-00000");
    //
    // List<String> lines = rd.readAllLines(file);
    // Iterator<String> elt = lines.iterator();
    // while (elt.hasNext()) {
    // String it = elt.next();
    // }
    // }
    public HDFSReader() {
        this.conf = new Configuration();
    }

    /**
     * Set HDFS server address
     * 
     * @param serverAdress
     */
    public void setServerAddress(String serverAdress) {
        HDFSConfig.setSeverAddress(this.conf, serverAdress);
    }

    /**
     * Set User account and password
     * 
     * @param userpass
     */
    public void setServerUserPasswd(String userpass) {
        HDFSConfig.setUSerPasswd(this.conf, userpass);
    }

    public List<String> readADirectory(Path filePath) {
        List<String> lines = new ArrayList<String>();
        try {
            FileSystem fs = FileSystem.get(this.conf);
            if (!fs.isFile(filePath)) {
                FileStatus[] fsList = fs.listStatus(filePath);
                for (FileStatus file : fsList) {
                    if (!file.isDir()) {
                        lines.addAll(readAllLines(new Path(filePath.toString() + "/" + file.getPath().getName())));
                    }
                }
            }
        } catch (Exception e) {
            log.error("Exception: " + e.getMessage());
        }
        return lines;
    }

    public boolean isDir(Path filePath) {
        try {
            FileSystem fs = FileSystem.get(this.conf);
            if (fs.isFile(filePath)) {
                return false;
            } else {
                return true;
            }
        } catch (Exception e) {
            log.error("Exception: " + e.getMessage());
            return false;
        }
    }

    /**
     * Simple implementation of reading certain lines from HDFS server.
     * 
     * @param filePath
     *          file path in sever
     * @param startLine
     *          starting line
     * @param endLine
     *          endLine
     * @return
     * @throws IOException
     */
    public List<String> readLines(Path filePath, int startLine, int endLine) throws IOException {
        FileSystem fs = FileSystem.get(this.conf);
        // set buff to 64MB
        // LineReader lr = new LineReader(fs.open(filePath), 64000000);
        LineReader lr = new LineReader(fs.open(filePath));
        List<String> lines = new ArrayList<String>();
        Text line = new Text();
        try {
            for (int i = 0; i <= endLine; i++) {
                lr.readLine(line); // read
                if (line.getLength() < 1) {
                    log.info("No more line! " + i);
                    break;
                }
                if (i >= startLine) {
                    lines.add(line.toString());
                }
            }
        } finally {
            lr.close();
        }
        return lines;
    }

    /**
     * Read all files. Very Dangerous.
     * 
     * @param filePath
     * @return
     * @throws IOException
     */
    public List<String> readAllLines(Path filePath) throws IOException {
        FileSystem fs = FileSystem.get(this.conf);
        // set buff to 64MB
        // LineReader lr = new LineReader(fs.open(filePath), 64000000);
        LineReader lr = new LineReader(fs.open(filePath));
        List<String> lines = new ArrayList<String>();
        Text line = new Text();
        try {
            while (lr.readLine(line) > 0) {
                lines.add(line.toString());
            }
        } finally {
            lr.close();
        }
        return lines;
    }

    /**
     * Read all lines in the range of (0, maxLines).
     * 
     * @param filePath
     *          file name.
     * @param maxLines
     *          maximun number of lines
     * @return
     * @throws IOException
     */
    public List<String> readAllLines(Path filePath, long maxLines) throws IOException {
        FileSystem fs = FileSystem.get(this.conf);
        // set buff to 64MB
        // LineReader lr = new LineReader(fs.open(filePath), 64000000);
        LineReader lr = new LineReader(fs.open(filePath));
        List<String> lines = new ArrayList<String>();
        Text line = new Text();
        long i = 0;
        try {
            while (lr.readLine(line) > 1 && i < maxLines) {
                lines.add(line.toString());
            }
        } finally {
            lr.close();
        }
        return lines;
    }
}