Java tutorial
/* * Copyright (c) 2011 Twitlinks * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.twitlinks.parser; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.twitlinks.TwitDocument; import com.twitlinks.indexer.Buffer; /** * @author raunak * @version 1.0 */ public class Parser extends Thread { /** * Instance of the Log object */ private Log log = LogFactory.getLog(Parser.class); /** Current thread sleep time */ private static int WAIT = 1000; /** Minimum length a thread can go to sleep for. 1 second */ private static final int MINWAIT = 1000; // 1 SECOND /** Maximum length a thread can go to sleep for. 15 Minutes */ private static final int MAXWAIT = 1000; // 1 SECOND /** * Count of lines read by the parser */ private long linesRead = 0; /** * Read from file or from Crawler */ private boolean fromFile = false; /** * Name of file to read from */ private String fileName; /** * Constructs a <code>Parser</code> object. */ public Parser() { this(true, null); } public Parser(boolean fromFile, String fileName) { this.fromFile = fromFile; this.fileName = fileName; } /** * A specialized pattern to parse date with the format * "Wed Feb 16 13:57:26 GMT 2011" */ private static DateFormat formatter = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z"); /** * Creates a <code>Document</code> object from a given input. * * @param input * A <code>String</code> representation of the document. * @return A <code>Document</code> object. * * @throws ParseException */ private TwitDocument createDocument(String input) throws ParseException { String[] tokens = input.split("\t"); return new TwitDocument(tokens[0], tokens[1], formatter.parse(tokens[2]), tokens[5], tokens[6], tokens[7], tokens[8]); } public void run() { log.info("Parser Started"); boolean success = false; if (fromFile) { try { String line = null; BufferedReader bufferedReader = null; bufferedReader = new BufferedReader(new FileReader("data/" + fileName)); while ((line = bufferedReader.readLine()) != null) { linesRead++; try { do { success = Buffer.documentQueue.offer(createDocument(line)); } while (!success); } catch (ParseException pe) { pe.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } } else { // TODO:Read from Crawler if (!success) { // Blocking Queue is full. Increase // the // waiting time. try { if (Parser.WAIT < Parser.MAXWAIT) { Parser.WAIT = Parser.WAIT * 2; } Thread.sleep(WAIT); } catch (InterruptedException e) { e.printStackTrace(); } } else if (Parser.WAIT > Parser.MINWAIT) { Parser.WAIT = Parser.WAIT - Parser.MINWAIT; } } log.info("Parser Finished"); } /** * Get count of lines read by the parser * * @return the linesRead */ public long getLinesRead() { return linesRead; } }