com.twitlinks.parser.Parser.java Source code

Introduction

Here is the source code for com.twitlinks.parser.Parser.java
Source

/* 
 * Copyright (c) 2011 Twitlinks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 */
package com.twitlinks.parser;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.twitlinks.TwitDocument;
import com.twitlinks.indexer.Buffer;

/**
 * @author raunak
 * @version 1.0
 */
public class Parser extends Thread {
    /**
     * Instance of the Log object
     */
    private Log log = LogFactory.getLog(Parser.class);

    /** Current thread sleep time */
    private static int WAIT = 1000;

    /** Minimum length a thread can go to sleep for. 1 second */
    private static final int MINWAIT = 1000; // 1 SECOND

    /** Maximum length a thread can go to sleep for. 15 Minutes */
    private static final int MAXWAIT = 1000; // 1 SECOND

    /**
     * Count of lines read by the parser
     */
    private long linesRead = 0;

    /**
     * Read from file or from Crawler
     */
    private boolean fromFile = false;

    /**
     * Name of file to read from
     */
    private String fileName;

    /**
     * Constructs a <code>Parser</code> object.
     */
    public Parser() {
        this(true, null);
    }

    public Parser(boolean fromFile, String fileName) {
        this.fromFile = fromFile;
        this.fileName = fileName;
    }

    /**
     * A specialized pattern to parse date with the format
     * "Wed Feb 16 13:57:26 GMT 2011"
     */
    private static DateFormat formatter = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z");

    /**
     * Creates a <code>Document</code> object from a given input.
     * 
     * @param input
     *            A <code>String</code> representation of the document.
     * @return A <code>Document</code> object.
     * 
     * @throws ParseException
     */
    private TwitDocument createDocument(String input) throws ParseException {
        String[] tokens = input.split("\t");
        return new TwitDocument(tokens[0], tokens[1], formatter.parse(tokens[2]), tokens[5], tokens[6], tokens[7],
                tokens[8]);
    }

    public void run() {
        log.info("Parser Started");
        boolean success = false;
        if (fromFile) {
            try {
                String line = null;
                BufferedReader bufferedReader = null;
                bufferedReader = new BufferedReader(new FileReader("data/" + fileName));
                while ((line = bufferedReader.readLine()) != null) {
                    linesRead++;
                    try {
                        do {
                            success = Buffer.documentQueue.offer(createDocument(line));
                        } while (!success);

                    } catch (ParseException pe) {
                        pe.printStackTrace();
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        } else {
            // TODO:Read from Crawler
            if (!success) { // Blocking Queue is full. Increase
                // the
                // waiting time.
                try {
                    if (Parser.WAIT < Parser.MAXWAIT) {
                        Parser.WAIT = Parser.WAIT * 2;
                    }
                    Thread.sleep(WAIT);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            } else if (Parser.WAIT > Parser.MINWAIT) {
                Parser.WAIT = Parser.WAIT - Parser.MINWAIT;
            }
        }
        log.info("Parser Finished");
    }

    /**
     * Get count of lines read by the parser
     * 
     * @return the linesRead
     */
    public long getLinesRead() {
        return linesRead;
    }
}