org.theospi.portfolio.presentation.export.PresentationExport.java Source code

Java tutorial

Introduction

Here is the source code for org.theospi.portfolio.presentation.export.PresentationExport.java

Source

/**********************************************************************************
* $URL:https://source.sakaiproject.org/svn/osp/trunk/presentation/api-impl/src/java/org/theospi/portfolio/presentation/export/PresentationExport.java $
* $Id:PresentationExport.java 9134 2006-05-08 20:28:42Z chmaurer@iupui.edu $
***********************************************************************************
*
 * Copyright (c) 2005, 2006, 2008, 2009 The Sakai Foundation
 *
 * Licensed under the Educational Community License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.opensource.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*
**********************************************************************************/
package org.theospi.portfolio.presentation.export;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.zip.Adler32;
import java.util.zip.CheckedOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import websphinx.Access;
import websphinx.Crawler;
import websphinx.DownloadParameters;
import websphinx.Link;
import websphinx.LinkEvent;
import websphinx.LinkListener;
import websphinx.Page;

public class PresentationExport extends Crawler implements LinkListener {
    protected final transient Log logger = LogFactory.getLog(getClass());

    private PortfolioMirror mirror = null;
    private String hostName = null;
    private String webappName = null;
    private String tempDirectory = null;
    public static final int BUFFER = 1024 * 10;
    private ArrayList errorLinks = new ArrayList();
    private static SessionAccess access = new SessionAccess();

    public PresentationExport(String url, String tempDirectory) throws IOException {
        this.tempDirectory = tempDirectory;

        Access.setAccess(access);

        URL urlObj = new URL(url);
        this.hostName = urlObj.getHost();
        String path = urlObj.getPath();

        StringTokenizer tok = new StringTokenizer(path, "/", false);

        webappName = tok.nextToken();
        if (!tok.hasMoreTokens()) {
            webappName = "";
        } else {
            webappName = "/" + webappName;
        }

        mirror = new PortfolioMirror(tempDirectory, webappName);

        this.setRootHrefs(url);
        this.setLinkType(Crawler.ALL_LINKS);
        this.setSynchronous(true);
        this.setDomain(Crawler.WEB);
        this.addLinkListener(this);

        DownloadParameters dp = getDownloadParameters();
        dp = dp.changeMaxThreads(1);
        setDownloadParameters(dp.changeMaxPageSize(2000));
    }

    public void createZip(OutputStream out) throws IOException {
        File directory = new File(tempDirectory + webappName);

        CheckedOutputStream checksum = null;
        ZipOutputStream zos = null;
        try {
            checksum = new CheckedOutputStream(out, new Adler32());
            zos = new ZipOutputStream(new BufferedOutputStream(checksum));
            recurseDirectory("", directory, zos);

            zos.finish();
            zos.flush();
        } finally {
            if (zos != null) {
                try {
                    zos.close();
                } catch (IOException e) {
                }
            }
            if (checksum != null) {
                try {
                    checksum.close();
                } catch (IOException e) {
                }
            }
        }

    }

    /**
     * places a directory into the zip stream
     * @param parentPath
     * @param directory
     * @param zos
     * @throws IOException
     */
    protected void recurseDirectory(String parentPath, File directory, ZipOutputStream zos) throws IOException {
        // get all files... go through those
        File[] files = directory.listFiles(new DirectoryFileFilter(false));

        if (files == null)
            throw new NullPointerException(
                    "recursing through a directory which is not a directory: " + parentPath + " ---- " + directory);

        addFiles(zos, parentPath, files);

        // get all directories... go through those...
        File[] directories = directory.listFiles(new DirectoryFileFilter(true));
        for (int i = 0; i < directories.length; i++) {
            recurseDirectory(parentPath + directories[i].getName() + "/", directories[i], zos);
        }

    }

    protected void addFiles(ZipOutputStream out, String parentPrefix, File[] files) throws IOException {

        BufferedInputStream origin = null;

        byte data[] = new byte[BUFFER];
        for (int i = 0; i < files.length; i++) {
            String fileName = URLDecoder.decode(parentPrefix + files[i].getName());
            logger.debug("Adding " + fileName);
            InputStream in = null;
            try {
                in = new FileInputStream(files[i]);

                if (in == null)
                    throw new NullPointerException();

                origin = new BufferedInputStream(in, BUFFER);

                if (fileName == null)
                    throw new NullPointerException();

                ZipEntry entry = new ZipEntry(fileName);
                out.putNextEntry(entry);
                int count;
                while ((count = origin.read(data, 0, BUFFER)) != -1) {
                    out.write(data, 0, count);
                }
                out.closeEntry();
            } finally {
                try {
                    if (origin != null) {
                        origin.close();
                    }
                } catch (Exception e) {
                    logger.warn("Error cleaning up resource: ", e);
                }
                try {
                    in.close();
                } catch (Exception e) {
                    logger.warn("Error cleaning up resource: ", e);
                }
            }
        }
    }

    /**
     * Start crawling.  Returns either when the crawl is done, or
     * when pause() or stop() is called.  Because this method implements the
     * java.lang.Runnable interface, a crawler can be run in the
     * background thread.
     */
    public void run() {
        super.run();

        // process error links
        for (Iterator i = errorLinks.iterator(); i.hasNext();) {
            Link link = (Link) i.next();
            visit(link.getPage());
        }
    }

    public synchronized void visit(Page page) {

        try {
            mirror.writePage(page);
            mirror.rewrite();
        } catch (IOException e) {
            logger.info("Error visiting link.  Most likely broken link.", e);
        }

        logger.debug("visiting page");
        super.visit(page);
    }

    public synchronized boolean shouldVisit(Link link) {
        if (link.getMethod() == Link.POST) {
            return false;
        }

        if (!link.getHost().equalsIgnoreCase(hostName)) {
            return false;
        }

        // TODO maybe if (link.getURL().getFile().startsWith(webappName + "/showPublicPortfolio.do")) {
        //   return false;
        //}

        return true;
    }

    public void deleteTemp() {
        File temp = new File(tempDirectory);

        deleteContent(temp);
        temp.delete();
    }

    protected void deleteContent(File directory) {
        File[] files = directory.listFiles(new DirectoryFileFilter(false));

        if (files != null) {
            for (int i = 0; i < files.length; i++) {
                files[i].delete();
            }
        }

        // get all directories... go through those...
        File[] directories = directory.listFiles(new DirectoryFileFilter(true));
        if (directories != null) {
            for (int i = 0; i < directories.length; i++) {
                deleteContent(directories[i]);
                directories[i].delete();
            }
        }
    }

    /**
     * Notify that an event occured on a link.
     */
    public void crawled(LinkEvent event) {
        if (event.getID() == LinkEvent.ERROR) {
            // switch to stream page link

            if (!(event.getLink().getPage() instanceof StreamedPage)) {
                logger.debug("loading file through streamed page.");
                Link newLink = new Link(event.getLink().getURL());
                newLink.setPage(new StreamedPage(event.getLink()));
                addErrorLink(newLink);
            } else {
                logger.error("Link error " + event.getLink().getURL().toExternalForm(), event.getException());
            }
        } else if (event.getID() == LinkEvent.QUEUED) {
            if (event.getLink().getPage() instanceof StreamedPage) {
                event.getLink().setStatus(LinkEvent.DOWNLOADED);
            }
        }
    }

    protected synchronized void addErrorLink(Link newLink) {
        errorLinks.add(newLink);
    }

    /**
     * Implements the FileFilter.  it accepts the switch of whether to accept files or directories
     *
     */
    private static class DirectoryFileFilter implements FileFilter {
        private boolean directories = false;

        public DirectoryFileFilter(boolean directories) {
            this.directories = directories;
        }

        /**
         * Tests whether or not the specified abstract pathname should be
         * included in a pathname list.
         *
         * @param pathname The abstract pathname to be tested
         * @return <code>true</code> if and only if <code>pathname</code>
         *         should be included
         */
        public boolean accept(File pathname) {
            if (directories) {
                return pathname.isDirectory();
            } else {
                return pathname.isFile();
            }
        }

    }

}