org.commoncrawl.service.listcrawler.ListUploadServlet.java Source code

Java tutorial

Introduction

Here is the source code for org.commoncrawl.service.listcrawler.ListUploadServlet.java

Source

/**
 * Copyright 2008 - CommonCrawl Foundation
 * 
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 **/

package org.commoncrawl.service.listcrawler;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Set;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.Shell;
import org.commoncrawl.service.listcrawler.CrawlListDatabaseRecord;
import org.commoncrawl.util.CCStringUtils;

import com.google.common.collect.ImmutableSet;

/** 
 * Servlet used to upload crawl lists to a crawler server 
 * 
 * @author rana
 *
 */
@SuppressWarnings("serial")
public class ListUploadServlet extends HttpServlet {

    public static final Log LOG = LogFactory.getLog(ListUploadServlet.class);

    public static class ListRequeueServlet extends HttpServlet {

        public static final Log LOG = LogFactory.getLog(ListRequeueServlet.class);

        @Override
        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
                throws ServletException, IOException {

            String listId = req.getParameter("listId");
            String listFileName = req.getParameter("urlFile");
            File listFile = new File(ProxyServer.getSingleton().getCrawlHistoryDataDir(), listFileName);

            LOG.info("###LISTUPLOADER: Requeue Request- ListId:" + listId + " listFileName:" + listFileName);

            if (listFile.exists()) {
                ProxyServer.getSingleton().requeueList(Long.parseLong(listId), listFile);
            }
        }
    }

    public static class RequeueBrokenListsServlet extends HttpServlet {

        public static final Log LOG = LogFactory.getLog(RequeueBrokenListsServlet.class);

        @Override
        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
                throws ServletException, IOException {
            long idsToFix[] = { 1286466854056L, 1286733537313L, 1286467182139L, 1286733537315L, 1286467448576L,
                    1286733537316L, 1286467734918L, 1286733537318L, 1286468071056L, 1286733537319L, 1286468376989L,
                    1286733537321L, 1286468673896L, 1286733537322L, 1286469018206L, 1286733537324L, 1286469408437L,
                    1286733537327L, 1286469703877L, 1286733537329L, 1286469965566L, 1286733537331L, 1286470262212L,
                    1286733537332L, 1286470558900L, 1286733537334L, 1286470853220L, 1286733537360L };

            for (int i = 0; i < idsToFix.length;) {
                File listFile = new File(ProxyServer.getSingleton().getCrawlHistoryDataDir(),
                        "listURLS-" + idsToFix[i++]);
                if (listFile.exists()) {
                    LOG.info("Reloading List File:" + listFile.getAbsolutePath());
                    ProxyServer.getSingleton().requeueList(idsToFix[i++], listFile);
                }
            }
        }
    }

    public static class ListUploadForm extends HttpServlet {

        public static final Log LOG = LogFactory.getLog(ListUploadForm.class);

        @Override
        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
                throws ServletException, IOException {

            resp.setContentType("text/html");
            PrintWriter writer = resp.getWriter();

            writer.println("<HTML>");

            writer.println("<form method='post' action='/ListUploader' enctype='multipart/form-data'>");
            writer.println("<table border=0>");
            writer.println("<tr><td>CutomerId:<td><input name='customerId' type='text' width=20 /></tr>");
            writer.println("<tr><td>List Name:<td><input name='listName' type='text' width=100 /></tr>");
            writer.println("<tr><td>List File:<td><input name='listFile' type='file' /></tr>");
            writer.println("<tr><td colspan=2>&nbsp</tr>");
            writer.println("<tr><td colspan=2><input type='submit' /></tr>");
            writer.println("</table>");

            writer.println("</HTML>");
            writer.flush();
        }
    }

    private final static String FILES = "org.mortbay.servlet.MultiPartFilter.files";

    private static Set<String> customers = new ImmutableSet.Builder<String>().add("foobar").build();

    @Override
    protected void doPut(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {

        String customerId = req.getParameter("customerId");
        String listName = req.getParameter("listName");
        String incomingFileName = req.getParameter("fileName");

        LOG.info("###LISTUPLOADER: GOT PUT Customer Id:" + customerId + " ListName:" + listName + " FileName:"
                + incomingFileName);
        if (customerId == null || !customers.contains(customerId) || listName == null || listName.length() == 0) {
            LOG.error(
                    "###LISTUPLOADER:No Customer Id or Invalid Customer Id:" + customerId + " ListId:" + listName);
            resp.sendError(500, "Invalid Customer Id or Invalid List Name!" + customerId + ":" + listName);
            return;
        } else if (incomingFileName == null || incomingFileName.length() == 0) {
            LOG.error("###LISTUPLOADER:No IncomingFilename");
            resp.sendError(500, "Invalid Filename");
            return;
        } else {
            // get the server ... 
            ProxyServer server = ProxyServer.getSingleton();
            // get the crawl history data directory ...
            File dataDir = server.getCrawlHistoryDataDir();
            // create import file ... 
            File importFile = new File(dataDir, incomingFileName + "-" + System.currentTimeMillis());
            LOG.info("###LISTUPLOADER:Filename:" + incomingFileName + " Customer:" + customerId + " List:"
                    + listName + " outputFile:" + importFile.getAbsolutePath());
            // open a handle to it 
            BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(importFile), 1 << 20);
            // allocate a buffer ... 
            byte incomingBuffer[] = new byte[1 << 19];
            int bytesRead = -1;
            int totalBytesRead = 0;
            // get input stream 
            InputStream input = req.getInputStream();
            try {
                try {
                    while ((bytesRead = input.read(incomingBuffer)) != -1) {
                        LOG.info("Read:" + bytesRead + " bytes from:" + incomingFileName);
                        outputStream.write(incomingBuffer, 0, bytesRead);
                        totalBytesRead += bytesRead;
                    }
                } finally {
                    outputStream.flush();
                    outputStream.close();
                }
                LOG.info("###LISTUPLOADER:List:" + listName + " Finished download filename:" + incomingFileName
                        + " TotalBytesRead:" + totalBytesRead + "-Inserting Record");
                // won't reach here unless write succeeded ... 
                // create a database record 
                CrawlListDatabaseRecord databaseRecord = new CrawlListDatabaseRecord();

                databaseRecord.setListName(listName);
                databaseRecord.setCustomerName(customerId);
                databaseRecord.setSourceFileName(incomingFileName);
                databaseRecord.setTempFileName(importFile.getName());

                long listId = server.queueListImportRequest(databaseRecord);

                LOG.info("###LISTUPLOADER:Queueing List:" + listName + " ListID:" + listId);

                if (listId == -1) {
                    LOG.error("###LISTUPLOADER:Queueing For List:" + listName + " Failed!");
                    resp.sendError(500, "Queue Request Failed!");
                } else {
                    resp.setContentType("text/plain");
                    resp.getWriter().print(Long.toString(listId));
                    resp.getWriter().flush();
                }
            } catch (IOException e) {
                LOG.error("###LISTUPLOADER: IOException processing List:" + listName);
                LOG.error(CCStringUtils.stringifyException(e));
                importFile.delete();
            }
        }
    }

    @Override
    protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {

        String customerId = req.getParameter("customerId");
        String listName = req.getParameter("listName");

        LOG.info("###LISTUPLOADER: GOT POST CustomerId:" + customerId + " ListName:" + listName);
        if (customerId == null || !customers.contains(customerId) || listName == null || listName.length() == 0) {
            resp.sendError(500, "Invalid Customer Id or Invalid List Name!" + customerId + ":" + listName);
        } else {
            ArrayList<MultiPartFilter.UploadFileData> files = (ArrayList<MultiPartFilter.UploadFileData>) req
                    .getAttribute(FILES);

            if (files == null || files.size() == 0) {
                LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                        + " No Files in Mutlipart Body!");
                resp.sendError(500, "No File Selected!");
                return;
            } else {
                MultiPartFilter.UploadFileData uploadData = files.get(0);
                if (uploadData.incomingContentType == null
                        || !uploadData.incomingContentType.equals("text/plain")) {
                    LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                            + " incoming MimeType:" + uploadData.incomingContentType + " NOT text/plain!");
                    resp.sendError(500, "Only Text Files Supported For Now :-(");
                    return;
                } else {
                    // get the server ... 
                    ProxyServer server = ProxyServer.getSingleton();
                    // get the crawl history data directory ...
                    File dataDir = server.getCrawlHistoryDataDir();
                    LOG.info("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                            + "Incoming FileName is:" + uploadData.incomingFile.getAbsolutePath());

                    // move the file 
                    File importFile = new File(dataDir,
                            uploadData.incomingFilename + "-" + System.currentTimeMillis());
                    LOG.info("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                            + "Renaming Incoming File to:" + importFile.getAbsolutePath());

                    int retryCount = 0;
                    boolean renameFailed = false;

                    while (!importFile.exists()) {
                        LOG.info("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                                + " Moving Temp File");

                        Shell.execCommand(new String[] { "mv", uploadData.incomingFile.getAbsolutePath(),
                                importFile.getAbsolutePath() });

                        if (!importFile.exists()) {
                            if (++retryCount == 10) {
                                renameFailed = true;
                                LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                                        + " Rename Failed. Bailing!");

                                break;
                            }
                            LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                                    + " Rename Failed. Retrying");

                            try {
                                Thread.sleep(1000);
                            } catch (InterruptedException e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }
                        } else {
                            break;
                        }
                    }
                    if (renameFailed) {
                        LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                                + " Gave Up Trying to Move File!");

                        resp.sendError(500, "Failed to Copy Temp File!");
                        return;
                    }

                    LOG.info("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                            + " Queueing Database Record");

                    // create a database record 
                    CrawlListDatabaseRecord databaseRecord = new CrawlListDatabaseRecord();

                    databaseRecord.setListName(listName);
                    databaseRecord.setCustomerName(customerId);
                    databaseRecord.setSourceFileName(uploadData.incomingFilename);
                    databaseRecord.setTempFileName(importFile.getName());

                    long listId = server.queueListImportRequest(databaseRecord);

                    if (listId == -1) {
                        LOG.error("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName
                                + " List Queueing Failed!");
                        resp.sendError(500, "Queue Request Failed!");
                    } else {
                        LOG.info("###LISTUPLOADER: CustomerId:" + customerId + " ListName:" + listName + " ListId:"
                                + listId);

                        resp.setContentType("text/plain");
                        resp.getWriter().print(Long.toString(listId));
                        resp.getWriter().flush();
                    }
                }
            }
        }
    }

}