org.punksearch.crawler.adapters.FtpAdapter.java Source code

Java tutorial

Introduction

Here is the source code for org.punksearch.crawler.adapters.FtpAdapter.java

Source

/***************************************************************************
 *                                                                         *
 *   PunkSearch - Searching over LAN                                       *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
package org.punksearch.crawler.adapters;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPFile;
import org.punksearch.logic.online.OnlineStatuses;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

import static org.punksearch.crawler.CrawlerKeys.*;

/**
 * Adapter for crawling FTP hosts. Uses commons-net library.
 * 
 * @author Yury Soldak (ysoldak@gmail.com)
 */
public class FtpAdapter implements ProtocolAdapter {

    private static Log __log = LogFactory.getLog(FtpAdapter.class);

    private FTPClient ftp = new FTPClient();

    private String rootPath;

    public boolean connect(String ip) {
        disconnect();

        __log.trace("Check if server has active ftp: " + ip);
        if (!OnlineStatuses.getInstance().isOnline("ftp://" + ip)) {
            return false;
        }

        try {
            __log.trace("Connecting to server: " + ip);
            setupFtpClient(ip);
            ftp.connect(ip);
            ftp.login(getUser(), getPassword());
            ftp.setFileType(FTPClient.BINARY_FILE_TYPE);
            setRootPath(ftp.printWorkingDirectory());
            return true;
        } catch (Exception e) {
            __log.warn("Exception (" + e.getMessage() + ") during connecting the server: " + ip);
            disconnect();
            return false;
        }
    }

    public void disconnect() {
        try {
            if (ftp.isConnected()) {
                __log.trace("Disconnectiong from server: " + ftp.getRemoteAddress().getHostAddress());
                ftp.disconnect();
            }
        } catch (Exception e) {
            __log.warn("Exception (" + e.getMessage() + ") during disconnecting a server");
            ftp = new FTPClient();
        }
    }

    /**
     * test-friendly method
     */
    protected void setRootPath(String path) {
        rootPath = path;
    }

    public byte[] header(Object item, int length) {
        FtpItem file = (FtpItem) item;
        try {
            if (file.isFile()) {
                String filePath = file.getPath();//path + file.getName();
                InputStream is = ftp.retrieveFileStream(filePath);
                if (is == null) {
                    __log.debug("Can't read header for the file (" + ftp.getReplyCode() + "): " + filePath);
                    return null;
                }
                byte[] buf = new byte[length];
                is.read(buf);
                is.close();
                ftp.completePendingCommand();
                return buf;
            } else {
                return null;
            }
        } catch (IOException e) {
            __log.debug("Can't read header for the file (i/o error): " + getFullPath(item));
            return null;
        }
    }

    public long getModificationTime(Object item) {
        return ((FtpItem) item).getModificationTime();
    }

    public String getName(Object item) {
        return ((FtpItem) item).getName();
    }

    public String getPath(Object item) {
        return ((FtpItem) item).getPath();
    }

    public String getFullPath(Object item) {
        return ((FtpItem) item).getFullPath();
    }

    public String getProtocol() {
        return "ftp";
    }

    public Object getRootDir() {
        if (ftp == null || !ftp.isConnected()) {
            __log.error("Can't get root dir since not connected to any ftp host");
            throw new IllegalStateException("Can't get root dir since not connected to any ftp host");
        }
        return new FtpItem(null, "");
    }

    @Override
    public Object resolvePath(String path) {
        throw new UnsupportedOperationException("not yet implemented");
    }

    public long getSize(Object item) {
        return ((FtpItem) item).getSize();
    }

    public boolean isDirectory(Object item) {
        return ((FtpItem) item).isDirectory();
    }

    public boolean isFile(Object item) {
        //return (!((FtpItem) item).isDirectory() && !((FtpItem) item).isLink());
        return ((FtpItem) item).isFile();
    }

    public boolean isHidden(Object item) {
        return false;
    }

    public boolean isLink(Object item) {
        return ((FtpItem) item).isLink();
    }

    public Object[] list(Object dir) {
        FtpItem item = (FtpItem) dir;
        try {
            FTPFile[] files = ftp.listFiles(rootPath + item.getFullPath());
            FtpItem[] result = new FtpItem[files.length];
            for (int i = 0; i < files.length; i++) {
                result[i] = new FtpItem(files[i], item.getFullPath() + "/" + files[i].getName());
            }
            return result;
        } catch (IOException e) {
            // host communication problem occured, rethrow the exception so crawler will give up crawling this host
            __log.warn("I/O Exception during listing of dir: " + e.getMessage());
            throw new RuntimeException(e);
        } catch (Exception e) {
            __log.info("Exception (" + e.getMessage() + ") during listing directory: " + item.getFullPath());
            return new FtpItem[0];
        }
    }

    public Map<String, String> parseCustomEncodings(String encString) {
        Map<String, String> result = new HashMap<String, String>();
        if (encString == null || encString.length() == 0) {
            return result;
        }

        String[] chunks = encString.split(",");
        for (String chunk : chunks) {
            String[] parts = chunk.split(":");
            result.put(parts[0], parts[1]);
        }
        return result;
    }

    public Map<String, String> parseCustomModes(String modString) {
        Map<String, String> result = new HashMap<String, String>();
        if (modString == null || modString.length() == 0) {
            return result;
        }

        String[] chunks = modString.split(",");
        for (String chunk : chunks) {
            String[] parts = chunk.split(":");
            result.put(parts[0], parts[1]);
        }
        return result;
    }

    private String getFtpEncodingForIp(String ip) {
        String defaultEnc = System.getProperty(FTP_ENCODING_DEFAULT, "UTF-8");
        String customEnc = System.getProperty(FTP_ENCODING_CUSTOM);
        Map<String, String> encMap = parseCustomEncodings(customEnc);
        return (encMap.containsKey(ip)) ? encMap.get(ip) : defaultEnc;
    }

    /*
    private boolean isActiveModeForIp(String ip) {
       String defaultMode = System.getProperty("org.punksearch.crawler.ftp.mode.default");
       String customMode = System.getProperty("org.punksearch.crawler.ftp.mode.custom");
       Map<String, String> customModes = parseCustomModes(customMode);
       String modeStr = (customModes.containsKey(ip)) ? customModes.get(ip) : defaultMode;
       return (modeStr.equals("active"));
    }
    */

    private void setupFtpClient(String ip) throws IOException {
        if (ftp == null) {
            ftp = new FTPClient();
        }

        ftp.setControlEncoding(getFtpEncodingForIp(ip));
        // TODO
        // if (isActiveModeForIp(ip)) {
        // ftp.setConnectMode(FTPConnectMode.ACTIVE); } else { ftp.setConnectMode(FTPConnectMode.PASV); }
        // ftp.setRemoteHost(ip);
        ftp.setDefaultTimeout(Integer.parseInt(System.getProperty(FTP_TIMEOUT)));
    }

    private String getUser() {
        String user = System.getProperty(FTP_USER);
        return (user.length() == 0) ? "anonymous" : user;
    }

    private String getPassword() {
        String passwd = System.getProperty(FTP_PASSWORD);
        return (passwd.length() == 0) ? "some@email.com" : passwd;
    }

}

class FtpItem {

    private FTPFile file;
    private String fullpath;

    FtpItem(FTPFile file, String fullpath) {
        this.file = file;
        this.fullpath = fullpath;
    }

    // last part of path
    String getName() {
        if (file != null) {
            return file.getName();
        } else {
            return "";
        }
    }

    // full absolute path w/o last part (name)
    String getPath() {
        if (fullpath.length() > 1) { // "/a" -> "/", "/a/b/c" -> "/a/b/"
            return fullpath.substring(0, fullpath.lastIndexOf("/") + 1);
        } else {
            return "";
        }
    }

    // full absolute path w/o trailing "/" 
    String getFullPath() {
        return fullpath;
    }

    long getModificationTime() {
        if (file != null) {
            return file.getTimestamp().getTime().getTime();
        }
        return 0;
    }

    boolean isDirectory() {
        if (file != null) {
            return file.isDirectory();
        }
        return true;
    }

    boolean isFile() {
        if (file != null) {
            return file.isFile();
        }
        return false;
    }

    boolean isLink() {
        if (file != null) {
            return file.isSymbolicLink();
        } else {
            return false;
        }
    }

    boolean isHidden() {
        return false;
    }

    long getSize() {
        if (file != null) {
            return file.getSize();
        }
        return 0L;
    }
}