org.apache.nutch.protocol.ftp.Client.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nutch.protocol.ftp.Client.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nutch.protocol.ftp;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;

import java.net.InetAddress;
import java.net.Socket;

import java.util.List;

import org.apache.commons.net.MalformedServerReplyException;

import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPCommand;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.net.ftp.FTPFileEntryParser;
import org.apache.commons.net.ftp.FTPReply;

import org.apache.commons.net.ftp.FTPConnectionClosedException;

/***********************************************
 * Client.java encapsulates functionalities necessary for nutch to get dir list
 * and retrieve file from an FTP server. This class takes care of all low level
 * details of interacting with an FTP server and provides a convenient higher
 * level interface.
 * 
 * Modified from FtpClient.java in apache commons-net.
 * 
 * Notes by John Xing: ftp server implementations are hardly uniform and none
 * seems to follow RFCs whole-heartedly. We have no choice, but assume common
 * denominator as following: (1) Use stream mode for data transfer. Block mode
 * will be better for multiple file downloading and partial file downloading.
 * However not every ftpd has block mode support. (2) Use passive mode for data
 * connection. So Nutch will work if we run behind firewall. (3) Data connection
 * is opened/closed per ftp command for the reasons listed in (1). There are ftp
 * servers out there, when partial downloading is enforced by closing data
 * channel socket on our client side, the server side immediately closes control
 * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
 * to obtain remote file attributes if possible. MDTM and SIZE would be nice, but
 * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
 * thread? Do not use it at all.
 * 
 * About exceptions: Some specific exceptions are re-thrown as one of
 * FtpException*.java In fact, each function throws FtpException*.java or pass
 * IOException.
 * 
 * @author John Xing
 ***********************************************/

public class Client extends FTP {
    private int __dataTimeout;
    private int __passivePort;
    private String __passiveHost;
    // private int __fileType, __fileFormat;
    private boolean __remoteVerificationEnabled;
    // private FTPFileEntryParser __entryParser;
    private String __systemName;

    /** Public default constructor */
    public Client() {
        __initDefaults();
        __dataTimeout = -1;
        __remoteVerificationEnabled = true;
    }

    // defaults when initialize
    private void __initDefaults() {
        __passiveHost = null;
        __passivePort = -1;
        __systemName = null;
        // __fileType = FTP.ASCII_FILE_TYPE;
        // __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
        // __entryParser = null;
    }

    // parse reply for pass()
    private void __parsePassiveModeReply(String reply) throws MalformedServerReplyException {
        int i, index, lastIndex;
        String octet1, octet2;
        StringBuffer host;

        reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();

        host = new StringBuffer(24);
        lastIndex = 0;
        index = reply.indexOf(',');
        host.append(reply.substring(lastIndex, index));

        for (i = 0; i < 3; i++) {
            host.append('.');
            lastIndex = index + 1;
            index = reply.indexOf(',', lastIndex);
            host.append(reply.substring(lastIndex, index));
        }

        lastIndex = index + 1;
        index = reply.indexOf(',', lastIndex);

        octet1 = reply.substring(lastIndex, index);
        octet2 = reply.substring(index + 1);

        // index and lastIndex now used as temporaries
        try {
            index = Integer.parseInt(octet1);
            lastIndex = Integer.parseInt(octet2);
        } catch (NumberFormatException e) {
            throw new MalformedServerReplyException(
                    "Could not parse passive host information.\nServer Reply: " + reply);
        }

        index <<= 8;
        index |= lastIndex;

        __passiveHost = host.toString();
        __passivePort = index;
    }

    /**
     * open a passive data connection socket
     * 
     * @param command
     * @param arg
     * @return
     * @throws IOException
     * @throws FtpExceptionCanNotHaveDataConnection
     */
    protected Socket __openPassiveDataConnection(int command, String arg)
            throws IOException, FtpExceptionCanNotHaveDataConnection {
        Socket socket;

        // // 20040317, xing, accommodate ill-behaved servers, see below
        // int port_previous = __passivePort;

        if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
            throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. " + getReplyString());

        try {
            __parsePassiveModeReply(getReplyStrings()[0]);
        } catch (MalformedServerReplyException e) {
            throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
        }

        // // 20040317, xing, accommodate ill-behaved servers, see above
        // int count = 0;
        // System.err.println("__passivePort "+__passivePort);
        // System.err.println("port_previous "+port_previous);
        // while (__passivePort == port_previous) {
        // // just quit if too many tries. make it an exception here?
        // if (count++ > 10)
        // return null;
        // // slow down further for each new try
        // Thread.sleep(500*count);
        // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
        // throw new FtpExceptionCanNotHaveDataConnection(
        // "pasv() failed. " + getReplyString());
        // //return null;
        // try {
        // __parsePassiveModeReply(getReplyStrings()[0]);
        // } catch (MalformedServerReplyException e) {
        // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
        // }
        // }

        socket = _socketFactory_.createSocket(__passiveHost, __passivePort);

        if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
            socket.close();
            return null;
        }

        if (__remoteVerificationEnabled && !verifyRemote(socket)) {
            InetAddress host1, host2;

            host1 = socket.getInetAddress();
            host2 = getRemoteAddress();

            socket.close();

            // our precaution
            throw new FtpExceptionCanNotHaveDataConnection(
                    "Host attempting data connection " + host1.getHostAddress() + " is not same as server "
                            + host2.getHostAddress() + " So we intentionally close it for security precaution.");
        }

        if (__dataTimeout >= 0)
            socket.setSoTimeout(__dataTimeout);

        return socket;
    }

    /***
     * Sets the timeout in milliseconds to use for data connection. set
     * immediately after opening the data connection.
     ***/
    public void setDataTimeout(int timeout) {
        __dataTimeout = timeout;
    }

    /***
     * Closes the connection to the FTP server and restores connection parameters
     * to the default values.
     * <p>
     * 
     * @exception IOException
     *              If an error occurs while disconnecting.
     ***/
    public void disconnect() throws IOException {
        __initDefaults();
        super.disconnect();
        // no worry for data connection, since we always close it
        // in every ftp command that invloves data connection
    }

    /***
     * Enable or disable verification that the remote host taking part of a data
     * connection is the same as the host to which the control connection is
     * attached. The default is for verification to be enabled. You may set this
     * value at any time, whether the FTPClient is currently connected or not.
     * <p>
     * 
     * @param enable
     *          True to enable verification, false to disable verification.
     ***/
    public void setRemoteVerificationEnabled(boolean enable) {
        __remoteVerificationEnabled = enable;
    }

    /***
     * Return whether or not verification of the remote host participating in data
     * connections is enabled. The default behavior is for verification to be
     * enabled.
     * <p>
     * 
     * @return True if verification is enabled, false if not.
     ***/
    public boolean isRemoteVerificationEnabled() {
        return __remoteVerificationEnabled;
    }

    /***
     * Login to the FTP server using the provided username and password.
     * <p>
     * 
     * @param username
     *          The username to login under.
     * @param password
     *          The password to use.
     * @return True if successfully completed, false if not.
     * @exception FTPConnectionClosedException
     *              If the FTP server prematurely closes the connection as a
     *              result of the client being idle or some other reason causing
     *              the server to send FTP reply code 421. This exception may be
     *              caught either as an IOException or independently as itself.
     * @exception IOException
     *              If an I/O error occurs while either sending a command to the
     *              server or receiving a reply from the server.
     ***/
    public boolean login(String username, String password) throws IOException {
        user(username);

        if (FTPReply.isPositiveCompletion(getReplyCode()))
            return true;

        // If we get here, we either have an error code, or an intermmediate
        // reply requesting password.
        if (!FTPReply.isPositiveIntermediate(getReplyCode()))
            return false;

        return FTPReply.isPositiveCompletion(pass(password));
    }

    /***
     * Logout of the FTP server by sending the QUIT command.
     * <p>
     * 
     * @return True if successfully completed, false if not.
     * @exception FTPConnectionClosedException
     *              If the FTP server prematurely closes the connection as a
     *              result of the client being idle or some other reason causing
     *              the server to send FTP reply code 421. This exception may be
     *              caught either as an IOException or independently as itself.
     * @exception IOException
     *              If an I/O error occurs while either sending a command to the
     *              server or receiving a reply from the server.
     ***/
    public boolean logout() throws IOException {
        return FTPReply.isPositiveCompletion(quit());
    }

    /**
     * retrieve list reply for path
     * 
     * @param path
     * @param entries
     * @param limit
     * @param parser
     * @throws IOException
     * @throws FtpExceptionCanNotHaveDataConnection
     * @throws FtpExceptionUnknownForcedDataClose
     * @throws FtpExceptionControlClosedByForcedDataClose
     */
    public void retrieveList(String path, List<FTPFile> entries, int limit, FTPFileEntryParser parser)
            throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
            FtpExceptionControlClosedByForcedDataClose {
        Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);

        if (socket == null)
            throw new FtpExceptionCanNotHaveDataConnection("LIST " + ((path == null) ? "" : path));

        BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));

        // force-close data channel socket, when download limit is reached
        // boolean mandatory_close = false;

        // List entries = new LinkedList();
        int count = 0;
        String line = parser.readNextEntry(reader);
        while (line != null) {
            FTPFile ftpFile = parser.parseFTPEntry(line);
            // skip non-formatted lines
            if (ftpFile == null) {
                line = parser.readNextEntry(reader);
                continue;
            }
            entries.add(ftpFile);
            count += line.length();
            // impose download limit if limit >= 0, otherwise no limit
            // here, cut off is up to the line when total bytes is just over limit
            if (limit >= 0 && count > limit) {
                // mandatory_close = true;
                break;
            }
            line = parser.readNextEntry(reader);
        }

        // if (mandatory_close)
        // you always close here, no matter mandatory_close or not.
        // however different ftp servers respond differently, see below.
        socket.close();

        // scenarios:
        // (1) mandatory_close is false, download limit not reached
        // no special care here
        // (2) mandatory_close is true, download limit is reached
        // different servers have different reply codes:

        try {
            int reply = getReply();
            if (!_notBadReply(reply))
                throw new FtpExceptionUnknownForcedDataClose(getReplyString());
        } catch (FTPConnectionClosedException e) {
            // some ftp servers will close control channel if data channel socket
            // is closed by our end before all data has been read out. Check:
            // tux414.q-tam.hp.com FTP server (hp.com version whp02)
            // so must catch FTPConnectionClosedException thrown by getReply() above
            // disconnect();
            throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
        }

    }

    /**
     * retrieve file for path
     * 
     * @param path
     * @param os
     * @param limit
     * @throws IOException
     * @throws FtpExceptionCanNotHaveDataConnection
     * @throws FtpExceptionUnknownForcedDataClose
     * @throws FtpExceptionControlClosedByForcedDataClose
     */
    public void retrieveFile(String path, OutputStream os, int limit)
            throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
            FtpExceptionControlClosedByForcedDataClose {

        Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);

        if (socket == null)
            throw new FtpExceptionCanNotHaveDataConnection("RETR " + ((path == null) ? "" : path));

        InputStream input = socket.getInputStream();

        // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
        // do we ever need ASCII_FILE_TYPE?
        // if (__fileType == ASCII_FILE_TYPE)
        // input = new FromNetASCIIInputStream(input);

        // fixme, should we instruct server here for binary file type?

        // force-close data channel socket
        // boolean mandatory_close = false;

        int len;
        int count = 0;
        byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
        while ((len = input.read(buf, 0, buf.length)) != -1) {
            count += len;
            // impose download limit if limit >= 0, otherwise no limit
            // here, cut off is exactly of limit bytes
            if (limit >= 0 && count > limit) {
                os.write(buf, 0, len - (count - limit));
                // mandatory_close = true;
                break;
            }
            os.write(buf, 0, len);
            os.flush();
        }

        // if (mandatory_close)
        // you always close here, no matter mandatory_close or not.
        // however different ftp servers respond differently, see below.
        socket.close();

        // scenarios:
        // (1) mandatory_close is false, download limit not reached
        // no special care here
        // (2) mandatory_close is true, download limit is reached
        // different servers have different reply codes:

        // do not need this
        // sendCommand("ABOR");

        try {
            int reply = getReply();
            if (!_notBadReply(reply))
                throw new FtpExceptionUnknownForcedDataClose(getReplyString());
        } catch (FTPConnectionClosedException e) {
            // some ftp servers will close control channel if data channel socket
            // is closed by our end before all data has been read out. Check:
            // tux414.q-tam.hp.com FTP server (hp.com version whp02)
            // so must catch FTPConnectionClosedException thrown by getReply() above
            // disconnect();
            throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
        }

    }

    /**
     * reply check after closing data connection
     * 
     * @param reply
     * @return
     */
    private boolean _notBadReply(int reply) {

        if (FTPReply.isPositiveCompletion(reply)) {
            // do nothing
        } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
            // some ftp servers reply 426, e.g.,
            // foggy FTP server (Version wu-2.6.2(2)
            // there is second reply witing? no!
            // getReply();
        } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
            // some ftp servers reply 450, e.g.,
            // ProFTPD [ftp.kernel.org]
            // there is second reply witing? no!
            // getReply();
        } else if (reply == 451) { // FTPReply.ACTION_ABORTED
            // some ftp servers reply 451, e.g.,
            // ProFTPD [ftp.kernel.org]
            // there is second reply witing? no!
            // getReply();
        } else if (reply == 451) { // FTPReply.ACTION_ABORTED
        } else {
            // what other kind of ftp server out there?
            return false;
        }

        return true;
    }

    /***
     * Sets the file type to be transferred. This should be one of
     * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
     * etc. The file type only needs to be set when you want to change the type.
     * After changing it, the new type stays in effect until you change it again.
     * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
     * is never called.
     * <p>
     * 
     * @param fileType
     *          The <code> _FILE_TYPE </code> constant indcating the type of file.
     * @return True if successfully completed, false if not.
     * @exception FTPConnectionClosedException
     *              If the FTP server prematurely closes the connection as a
     *              result of the client being idle or some other reason causing
     *              the server to send FTP reply code 421. This exception may be
     *              caught either as an IOException or independently as itself.
     * @exception IOException
     *              If an I/O error occurs while either sending a command to the
     *              server or receiving a reply from the server.
     ***/
    public boolean setFileType(int fileType) throws IOException {
        if (FTPReply.isPositiveCompletion(type(fileType))) {
            /*
             * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
             */
            return true;
        }
        return false;
    }

    /***
     * Fetches the system type name from the server and returns the string. This
     * value is cached for the duration of the connection after the first call to
     * this method. In other words, only the first time that you invoke this
     * method will it issue a SYST command to the FTP server. FTPClient will
     * remember the value and return the cached value until a call to disconnect.
     * <p>
     * 
     * @return The system type name obtained from the server. null if the
     *         information could not be obtained.
     * @exception FTPConnectionClosedException
     *              If the FTP server prematurely closes the connection as a
     *              result of the client being idle or some other reason causing
     *              the server to send FTP reply code 421. This exception may be
     *              caught either as an IOException or independently as itself.
     * @exception IOException
     *              If an I/O error occurs while either sending a command to the
     *              server or receiving a reply from the server.
     ***/
    public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
        // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
        // Technically, we should expect a NAME_SYSTEM_TYPE response, but
        // in practice FTP servers deviate, so we soften the condition to
        // a positive completion.
        if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
            __systemName = (getReplyStrings()[0]).substring(4);
        } else {
            throw new FtpExceptionBadSystResponse("Bad response of SYST: " + getReplyString());
        }

        return __systemName;
    }

    /***
     * Sends a NOOP command to the FTP server. This is useful for preventing
     * server timeouts.
     * <p>
     * 
     * @return True if successfully completed, false if not.
     * @exception FTPConnectionClosedException
     *              If the FTP server prematurely closes the connection as a
     *              result of the client being idle or some other reason causing
     *              the server to send FTP reply code 421. This exception may be
     *              caught either as an IOException or independently as itself.
     * @exception IOException
     *              If an I/O error occurs while either sending a command to the
     *              server or receiving a reply from the server.
     ***/
    public boolean sendNoOp() throws IOException {
        return FTPReply.isPositiveCompletion(noop());
    }

    // client.stat(path);
    // client.sendCommand("STAT");
    // client.sendCommand("STAT",path);
    // client.sendCommand("MDTM",path);
    // client.sendCommand("SIZE",path);
    // client.sendCommand("HELP","SITE");
    // client.sendCommand("SYST");
    // client.setRestartOffset(120);

}