org.archive.io.WriterPool.java Source code

Java tutorial

Introduction

Here is the source code for org.archive.io.WriterPool.java

Source

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.io;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

/**
 * Pool of Writers.
 * 
 * Abstract. Override and pass in the Constructor a factory that creates
 * {@link WriterPoolMember} implementations.
 * 
 * @author stack
 */
public abstract class WriterPool {
    private final Logger logger = Logger.getLogger(this.getClass().getName());

    /**
     * Used to generate unique filename sequences.
     */
    final protected AtomicInteger serialNo;

    /**
     * Default maximum active number of files in the pool.
     */
    public static final int DEFAULT_MAX_ACTIVE = 1;

    /** Assumed largest possible value of maxActive; pool will have this
     * maximum capacity, so dynamic changes beyond this number won't work. */
    protected static final int LARGEST_MAX_ACTIVE = 255;

    /**
     * Maximum time to wait on a free file before considering
     * making a new one (if not already at max)
     */
    public static final int DEFAULT_MAX_WAIT_FOR_IDLE = 500;

    /**
     * File settings.
     * Keep in data structure rather than as individual values.
     */
    protected final WriterPoolSettings settings;

    /** maximum number of writers to create at a time*/
    protected int maxActive;
    /** maximum ms to wait before considering creation of a writer */
    protected int maxWait;
    /** current count of active writers; only read/mutated in synchronized blocks */
    protected int currentActive = 0;
    /** round-robin queue of available writers */
    protected BlockingQueue<WriterPoolMember> availableWriters;

    /** system time when writer was last wanted (because one was not ready in time) */
    protected long lastWriterNeededTime;
    /** system time when writer was last 'rolled over' (imminent creation of new file) */
    protected long lastWriterRolloverTime;

    /**
     * Constructor
     * @param serial  Used to generate unique filename sequences
     * @param factory Factory that knows how to make a {@link WriterPoolMember}.
     * @param settings Settings for this pool.
     * @param poolMaximumActive
     * @param poolMaximumWait
     */
    public WriterPool(final AtomicInteger serial, final WriterPoolSettings settings, final int poolMaximumActive,
            final int poolMaximumWait) {
        logger.info("Initial configuration:" + " prefix=" + settings.getPrefix() + ", template="
                + settings.getTemplate() + ", compress=" + settings.getCompress() + ", maxSize="
                + settings.getMaxFileSizeBytes() + ", maxActive=" + poolMaximumActive + ", maxWait="
                + poolMaximumWait);
        this.settings = settings;
        this.maxActive = poolMaximumActive;
        this.maxWait = poolMaximumWait;
        availableWriters = new ArrayBlockingQueue<WriterPoolMember>(LARGEST_MAX_ACTIVE, true);
        this.serialNo = serial;
    }

    /**
     * Check out a {@link WriterPoolMember}.
     * 
     * This method should be followed by a call to
     * {@link #returnFile(WriterPoolMember)} or 
     * {@link #invalidateFile(WriterPoolMember)} else pool starts leaking.
     * 
     * @return Writer checked out of a pool of files or created
     * @throws IOException Problem getting Writer from pool (Converted
     * from Exception to IOException so this pool can live as a good citizen
     * down in depths of ARCSocketFactory).
     */
    public WriterPoolMember borrowFile() throws IOException {
        WriterPoolMember writer = null;
        while (writer == null) {
            try {
                writer = availableWriters.poll(maxWait, TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                // nothing to do but proceed
            }
            if (writer == null) {
                writer = makeNewWriterIfAppropriate();
            }
        }
        return writer;
    }

    /**
     * Create a new writer instance, if still below maxActive count. 
     * Remember times to help make later decision when writer should 
     * be discarded. 
     * 
     * @return WriterPoolMember or null if already at max
     */
    protected synchronized WriterPoolMember makeNewWriterIfAppropriate() {
        long now = System.currentTimeMillis();
        lastWriterNeededTime = now;
        if (currentActive < maxActive) {
            currentActive++;
            lastWriterRolloverTime = now;
            return makeWriter();
        }
        return null;
    }

    /**
     * @return new WriterPoolMember of appropriate type
     */
    protected abstract WriterPoolMember makeWriter();

    /**
     * Discard a previously-used writer, cleanly closing it and leaving it out
     * of the pool. 
     * @param writer
     * @throws IOException
     */
    public synchronized void destroyWriter(WriterPoolMember writer) throws IOException {
        currentActive--;
        writer.close();
    }

    /**
     * Return a writer, for likely reuse unless (1) writer's current file has 
     * reached its target size; and (2) there's been no demand for additional 
     * writers since the last time a new writer-file was rolled-over. In that
     * case, the possibly-superfluous writer instance is discarded. 
    * @param writer Writer to return to the pool.
    * @throws IOException Problem returning File to pool.
    */
    public void returnFile(WriterPoolMember writer) throws IOException {
        synchronized (this) {
            if (writer.isOversize()) {
                // maybe retire writer rather than recycle
                if (lastWriterNeededTime <= lastWriterRolloverTime) {
                    // no timeouts waiting for recycled writer since last writer rollover
                    destroyWriter(writer);
                    return;
                } else {
                    // reuse writer instance, causing new file to be created
                    lastWriterRolloverTime = System.currentTimeMillis();
                }
            }
        }
        if (!availableWriters.offer(writer)) {
            logger.log(Level.WARNING, "writer unreturnable to available pool; closing early");
            destroyWriter(writer);
        }
    }

    /**
     * Close and discard a writer that experienced a potentially-corrupting
     * error. 
     * @param f writer with problem 
     * @throws IOException
     */
    public synchronized void invalidateFile(WriterPoolMember f) throws IOException {
        try {
            destroyWriter(f);
        } catch (Exception e) {
            // Convert exception.
            throw new IOException(e.getMessage());
        }
        // It'll have been closed.  Rename with an '.invalid' suffix so it
        // gets attention.
        File file = f.getFile();
        file.renameTo(new File(file.getAbsoluteFile() + WriterPoolMember.INVALID_SUFFIX));
    }

    /**
     * @return Number of {@link WriterPoolMember}s checked out of pool.
     * @throws java.lang.UnsupportedOperationException
     */
    public synchronized int getNumActive() throws UnsupportedOperationException {
        return currentActive - getNumIdle();
    }

    /**
     * @return Number of {@link WriterPoolMember} instances still in the pool.
     * @throws java.lang.UnsupportedOperationException
     */
    public int getNumIdle() throws UnsupportedOperationException {
        return availableWriters.size();
    }

    /**
     * Close all {@link WriterPoolMember}s in pool.
     */
    public void close() {
        Collection<WriterPoolMember> writers = drainAllWriters();
        for (WriterPoolMember writer : writers) {
            try {
                destroyWriter(writer);
            } catch (IOException e) {
                logger.log(Level.WARNING, "problem closing writer", e);
            }
        }
    }

    /**
     * @return Returns settings.
     */
    public WriterPoolSettings getSettings() {
        return this.settings;
    }

    /**
     * @return State of the pool string
     */
    protected String getPoolState() {
        StringBuffer buffer = new StringBuffer("Active ");
        buffer.append(getNumActive());
        buffer.append(" of max ");
        buffer.append(maxActive);
        buffer.append(", idle ");
        buffer.append(getNumIdle());
        return buffer.toString();
    }

    /**
     * Returns the atomic integer used to generate serial numbers
     * for files.
     * 
     * @return  the serial number generator
     */
    public AtomicInteger getSerialNo() {
        return serialNo;
    }

    /**
     * Drains all the active writers from {@link #availableWriters}, blocking to
     * wait for any writers currently in use to become available.
     * 
     * <p>
     * When finished with writers, call availableWriters.addAll(...) to put them
     * back into the rotation.
     * 
     * @return all the active writers
     */
    protected synchronized Collection<WriterPoolMember> drainAllWriters() {
        LinkedList<WriterPoolMember> writers = new LinkedList<WriterPoolMember>();
        availableWriters.drainTo(writers);

        while (writers.size() < currentActive) {
            try {
                WriterPoolMember w = availableWriters.take();
                writers.add(w);
            } catch (InterruptedException e) {
                logger.severe("caught " + e + " while waiting for writers to free up; returning only "
                        + writers.size() + " of " + currentActive + " active writers");
                break;
            }
        }

        return writers;
    }

    public void flush() {
        Collection<WriterPoolMember> writers = drainAllWriters();

        for (WriterPoolMember writer : writers) {
            try {
                writer.flush();
            } catch (IOException e) {
                logger.log(Level.WARNING, "problem flushing writer " + writer, e);
            }
        }

        availableWriters.addAll(writers);
    }

    public JSONArray jsonStatus() throws JSONException {
        Collection<WriterPoolMember> writers = drainAllWriters();

        JSONArray ja = new JSONArray();
        for (WriterPoolMember w : writers) {
            JSONObject jo = new JSONObject();
            jo.put("file", w.getFile());
            jo.put("position", w.getPosition());
            ja.put(jo);
        }

        availableWriters.addAll(writers);

        return ja;
    }
}