Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.appeligo.captions; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.management.MemoryUsage; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.TimeZone; import org.apache.commons.configuration.Configuration; import org.apache.log4j.Logger; import org.apache.lucene.document.Field; import org.apache.lucene.index.Term; import com.appeligo.ccdataweb.CaptionStore; import com.appeligo.config.ConfigurationService; import com.appeligo.epg.DefaultEpg; import com.appeligo.lucene.DocumentUtil; import com.appeligo.lucene.LuceneIndexer; import com.appeligo.search.util.ConfigUtils; import com.appeligo.util.Utils; import com.caucho.hessian.client.HessianProxyFactory; import com.knowbout.cc2nlp.CCEventService; import com.knowbout.cc2nlp.CCSentenceEvent; import com.knowbout.cc2nlp.CCXDSEvent; import com.knowbout.cc2nlp.CaptionTypeChangeEvent; import com.knowbout.cc2nlp.ITVLinkEvent; import com.knowbout.cc2nlp.ProgramStartEvent; import com.knowbout.epg.service.EPGProvider; import com.knowbout.epg.service.Network; import com.knowbout.epg.service.ScheduledProgram; /** * This class provides a is an end point for close caption events. It indexes * the sentences using Lucene to make the searchable. * * @author fear */ public class CaptionListener implements CCEventService { private static final Logger log = Logger.getLogger(CaptionListener.class); private CaptionStore captionStore; private static String documentRoot = "/tmp"; private static PrintStream statsFile; private static String currentDay; private static long lastWrite; private static String hostname; private HashMap<String, ProgramCaptions> captions; private String programIndexLocation; private String compositeIndexLocation; private String liveIndexLocation; private String liveLineup; private EPGProvider epg; private List<String> lineupIds; static { documentRoot = ConfigUtils.getSystemConfig().getString("documentRoot[@path]", "/tmp"); } /** * * */ @SuppressWarnings("unchecked") public CaptionListener() throws MalformedURLException { if (log.isInfoEnabled()) { log.info("Instantiated a " + this.getClass().getName()); } Configuration config = ConfigUtils.getSystemConfig(); programIndexLocation = config.getString("luceneIndex"); compositeIndexLocation = config.getString("compositeIndex"); liveIndexLocation = config.getString("luceneLiveIndex"); liveLineup = config.getString("liveLineup"); //Set the optimization duraction of the live index to 30 minutes. int liveIndexOptimization = config.getInt("luceneLiveIndexOptimization", 30); LuceneIndexer liveIndex = LuceneIndexer.getInstance(liveIndexLocation); liveIndex.setOptimizeDuration(liveIndexOptimization); epg = DefaultEpg.getInstance(); captions = new HashMap<String, ProgramCaptions>(); //PENDING(CE): We should probably get this list form the EPG? Seems like we should. Configuration lineupConfiguration = ConfigurationService.getConfiguration("lineups"); lineupIds = (List<String>) lineupConfiguration.getList("lineups.lineup.id"); DeleteOldProgramsThread.startThread(); } private CaptionStore getCaptionStore() throws MalformedURLException { if (captionStore == null) { Configuration config = ConfigUtils.getSystemConfig(); HessianProxyFactory factory = new HessianProxyFactory(); String url = config.getString("captionsEndpoint"); if (log.isInfoEnabled()) { log.info("CaptionStore endpoint is " + url); } captionStore = (CaptionStore) factory.create(CaptionStore.class, url); } return captionStore; } /* (non-Javadoc) * @see com.knowbout.nlp.keywords.service.CCEventService#startCapture(java.lang.String, java.lang.String) */ public String startCapture() { log.info("startCapture()"); return SUCCESS; } /* (non-Javadoc) * @see com.knowbout.nlp.keywords.service.CCEventService#captureCCEvent(java.lang.String, java.lang.String) */ public String captureSentence(CCSentenceEvent event) { try { if (log.isDebugEnabled()) { log.debug("capture sentence(" + event + ")"); } checkStats(); //PENDING(CE) Make sure the timestamp is a real time and not an offset ScheduledProgram scheduledProgram = event.getScheduledProgram(); ProgramCaptions programCaptions = captions.get(event.getCallsign()); if (programCaptions == null) { if (log.isInfoEnabled()) { log.info("Attempting to recover scheduled program, must have been a cold start."); } if (scheduledProgram == null) { scheduledProgram = epg.getScheduledProgramByNetworkCallSign(liveLineup, event.getCallsign(), new Date(event.getTimestamp())); } if (scheduledProgram != null) { if (log.isInfoEnabled()) { log.info("Setting current program to " + scheduledProgram.getProgramId() + ":" + scheduledProgram.getProgramTitle()); } List<ScheduledProgram> schedules = getSchedulePrograms(scheduledProgram); programCaptions = new ProgramCaptions(scheduledProgram.getProgramId(), scheduledProgram.getScheduleId(), scheduledProgram.getNetwork().getStationCallSign(), scheduledProgram, schedules); try { CaptionStore store = getCaptionStore(); String[] pastCaptions = store.getSentences(event.getLineupID(), event.getCallsign(), event.getProgramStartTime(), event.getProgramStartTime(), event.getTimestamp()); for (String caption : pastCaptions) { programCaptions.addSentence(DocumentUtil.prettySentence(caption)); } } catch (MalformedURLException e) { log.error("Unable to locate the caption store to back fill captions.", e); } catch (Exception e) { log.error("Error retreving sentences from caption store for " + event.getLineupID() + " callsign:" + event.getCallsign() + " startTime: " + event.getProgramStartTime() + " eventTime: " + event.getTimestamp(), e); } programCaptions.addSentence(DocumentUtil.prettySentence(event.getSentence())); captions.put(event.getCallsign(), programCaptions); addToLiveIndex(event, programCaptions, true); } else { log.error("We are unable to find a program for callsign " + event.getCallsign() + " on lineup: " + event.getLineupID() + " at time:" + new Date(event.getTimestamp())); return FAILURE; } } else { //Check to see if the program has changed. This is a safty check in case the programStart event is out of synch //PENDING(CE): Can this happen? Need to talk to Rich about it. if (programCaptions.isCurrentProgram(scheduledProgram)) { programCaptions.addSentence(DocumentUtil.prettySentence(event.getSentence())); addToLiveIndex(event, programCaptions, false); } else { //It is not, so we reached the end of a program and now we need to index all of the data //Ok since we changed the behavior of the EPG service (7/14/07) to return the current program if it is not over yet //We need to get ask for the schedule list again. List<ScheduledProgram> nextShowing = getSchedulePrograms(programCaptions.getCurrentProgram()); LuceneIndexer.getInstance(programIndexLocation).addProgram(programCaptions.getCaptions(), programCaptions.getProgramId(), nextShowing, Field.Store.YES, true, new Date()); LuceneIndexer.getCompositeInstance(compositeIndexLocation).addCompositeProgram( programCaptions.getCaptions(), programCaptions.getProgramId(), programCaptions.getSchedule(), Field.Store.YES, true, new Date()); //Now start saving sentences for the new program List<ScheduledProgram> schedules = getSchedulePrograms(scheduledProgram); programCaptions = new ProgramCaptions(scheduledProgram.getProgramId(), scheduledProgram.getStartTime().getTime(), scheduledProgram.getNetwork().getStationCallSign(), scheduledProgram, schedules); programCaptions.addSentence(DocumentUtil.prettySentence(event.getSentence())); captions.put(event.getCallsign(), programCaptions); addToLiveIndex(event, programCaptions, true); } } return SUCCESS; } catch (Throwable e) { log.error(e.getMessage() + event, e); return FAILURE; } } private void addToLiveIndex(CCSentenceEvent event, ProgramCaptions captions, boolean newProgram) { String callsign = event.getCallsign(); LuceneIndexer liveIndex = LuceneIndexer.getInstance(liveIndexLocation); liveIndex.deleteDocuments(new Term("lineup-" + liveLineup + "-stationCallSign", callsign)); liveIndex.addProgram(captions.getCaptions(), captions.getProgramId(), captions.getSchedule(), Field.Store.YES, false, new Date()); } private synchronized static void checkStats() { int interval = 5; // minutes long timestamp = new Date().getTime(); if ((timestamp - lastWrite) > (interval * 60 * 1000)) { lastWrite = timestamp; String day = Utils.getDatePath(timestamp); if (!day.equals(currentDay)) { if (statsFile != null) { statsFile.println("</table></body></html>"); statsFile.close(); statsFile = null; } currentDay = day; } if (hostname == null) { try { hostname = InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { hostname = "UnknownHost"; } } String dirname = documentRoot + "/stats/" + currentDay + "/" + hostname; String statsFileName = dirname + "/searchprocstats.html"; try { if (statsFile == null) { File dir = new File(dirname); if ((!dir.exists()) && (!dir.mkdirs())) { throw new IOException("Error creating directory " + dirname); } File file = new File(statsFileName); if (file.exists()) { statsFile = new PrintStream(new FileOutputStream(statsFileName, true)); statsFile.println("<tr><td colspan='5'>Restart</td></tr>"); } else { statsFile = new PrintStream(new FileOutputStream(statsFileName)); String title = "Search Process (tomcat) status for " + currentDay; statsFile.println("<html><head><title>" + title + "</title></head>"); statsFile.println("<body><h1>" + title + "</h1>"); statsFile.println("<table border='1'>"); statsFile.println("<tr>"); statsFile.println("<th colspan='2'>" + interval + " Minute Intervals</th>" + "<th colspan='3'>Mem Pre GC</th>" + "<th>GC</th>" + "<th colspan='3'>Mem Post GC</th>"); statsFile.println("</tr>"); statsFile.println("<tr>"); statsFile.println("<th>Timestamp</th>"); statsFile.println("<th>Time</th>"); statsFile.println("<th>Used</th>"); statsFile.println("<th>Committed</th>"); statsFile.println("<th>Max</th>"); statsFile.println("<th>Millis</th>"); statsFile.println("<th>Used</th>"); statsFile.println("<th>Committed</th>"); statsFile.println("<th>Max</th>"); statsFile.println("</tr>"); } } Calendar cal = Calendar.getInstance(); cal.setTimeZone(TimeZone.getTimeZone("GMT")); cal.setTimeInMillis(timestamp); String time = String.format("%1$tH:%1$tM:%1$tS", cal); MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); MemoryUsage memory = memoryBean.getHeapMemoryUsage(); statsFile.print("<tr>"); statsFile.print("<td>" + timestamp + "</td>"); statsFile.print("<td>" + time + "</td>"); statsFile.format("<td>%,d</td>", memory.getUsed()); statsFile.format("<td>%,d</td>", memory.getCommitted()); statsFile.format("<td>%,d</td>", memory.getMax()); long beforeGC = System.currentTimeMillis(); System.gc(); long elapsed = System.currentTimeMillis() - beforeGC; statsFile.format("<td>%,d</td>", (int) elapsed); memoryBean = ManagementFactory.getMemoryMXBean(); memory = memoryBean.getHeapMemoryUsage(); statsFile.format("<td>%,d</td>", memory.getUsed()); statsFile.format("<td>%,d</td>", memory.getCommitted()); statsFile.format("<td>%,d</td>", memory.getMax()); statsFile.println("</tr>"); } catch (IOException e) { log.error("Error opening or writing to " + statsFileName, e); } } } /** * */ public String endCapture() { if (log.isDebugEnabled()) { log.debug("endCapture()"); } return SUCCESS; } /** * */ public String captureXDS(CCXDSEvent event) { if (log.isDebugEnabled()) { log.debug("capture xds(" + event + ")"); } return SUCCESS; } public String captureITVLink(ITVLinkEvent itvLinkEvent) { if (log.isDebugEnabled()) { log.debug("capture itvlink(" + itvLinkEvent.getITVLink() + ")"); } return SUCCESS; } public String captionTypeChanged(CaptionTypeChangeEvent captionTypeChangedEvent) { return SUCCESS; } /* (non-Javadoc) * @see com.knowbout.cc2nlp.CCEventService#startProgram(com.knowbout.cc2nlp.CCEvent) */ public String startProgram(ProgramStartEvent programStartEvent) { ScheduledProgram scheduledProgram = programStartEvent.getScheduledProgram(); Network network = scheduledProgram.getNetwork(); if (network != null) { if (log.isInfoEnabled()) { log.info("Due to program start event, purging storage for callsign " + network.getStationName()); } ProgramCaptions programCaptions = captions.remove(programStartEvent.getCallsign()); //We had captured CC for this callsign already. It must be an old program, so lets //Index off that data if (programCaptions != null) { //It is not, so we reached the end of a program and now we need to index all of the data List<ScheduledProgram> schedule = getSchedulePrograms(scheduledProgram); LuceneIndexer.getInstance(programIndexLocation).addProgram(programCaptions.getCaptions(), programCaptions.getProgramId(), programCaptions.getSchedule(), Field.Store.YES, true, new Date()); LuceneIndexer.getCompositeInstance(compositeIndexLocation).addCompositeProgram( programCaptions.getCaptions(), programCaptions.getProgramId(), programCaptions.getSchedule(), Field.Store.YES, true, new Date()); ProgramCaptions newCaptions = new ProgramCaptions(scheduledProgram.getProgramId(), scheduledProgram.getScheduleId(), network.getStationCallSign(), scheduledProgram, schedule); captions.put(programStartEvent.getCallsign(), newCaptions); } } return SUCCESS; } private List<ScheduledProgram> getSchedulePrograms(ScheduledProgram program) { String programId = program.getProgramId(); ArrayList<ScheduledProgram> programs = new ArrayList<ScheduledProgram>(); for (String lineup : lineupIds) { if (program.getLineupId().equals(lineup)) { programs.add(program); } else { ScheduledProgram sked = epg.getNextShowing(lineup, programId, false, true); if (sked == null) { sked = epg.getLastShowing(lineup, programId); } if (sked != null) { programs.add(sked); } } } return programs; } }